Skip to content

Commit

Permalink
implement get_jsonline_chunk_lazy and get_jsonline_chunk
Browse files Browse the repository at this point in the history
  • Loading branch information
supercoderhawk committed Aug 4, 2020
1 parent cb63d4a commit d85ccc6
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 0 deletions.
28 changes: 28 additions & 0 deletions pysenal/io/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from collections import Iterable
import configparser
from ..utils.logger import get_logger
from ..utils.utils import get_chunk

_ENCODING_UTF8 = 'utf-8'

Expand Down Expand Up @@ -198,6 +199,33 @@ def read_jsonline_lazy(filename, encoding=_ENCODING_UTF8, default=None):
file.close()


def get_jsonline_chunk_lazy(filename, chunk_size, encoding=_ENCODING_UTF8, default=None):
"""
use generator to read jsonline items chunk by chunk
:param filename: source jsonline file
:param chunk_size: chunk size
:param encoding: file encoding
:param default: default value to return when file is not existed
:return: chunk of some items
"""
file_generator = read_jsonline_lazy(filename, encoding, default)
for chunk in get_chunk(file_generator, chunk_size):
yield chunk


def get_jsonline_chunk(filename, chunk_size, encoding=_ENCODING_UTF8, default=None):
"""
read jsonline items chunk by chunk
:param filename: source jsonline file
:param chunk_size: chunk size
:param encoding: file encoding
:param default: default value to return when file is not existed
:return: chunk of some items
"""
chunk_generator = get_chunk(read_jsonline_lazy(filename, encoding, default), chunk_size)
return list(chunk_generator)


def write_jsonline(filename, items, encoding=_ENCODING_UTF8, serialize_method=None):
"""
write items to file with json line format
Expand Down
8 changes: 8 additions & 0 deletions tests/io/test_file.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: UTF-8 -*-
import tempfile
import pytest
import types
from decimal import Decimal
from pysenal.io.file import *
from pysenal.utils import json_serialize
Expand Down Expand Up @@ -71,6 +72,13 @@ def test_read_jsonline(example_json, fake_filename):
assert read_jsonline(TEST_DATA_DIR + 'a.jsonl') == example_json


def test_read_jsonline_chunk(example_json):
assert get_jsonline_chunk(TEST_DATA_DIR + 'a.jsonl', 2) == [example_json]
generator = get_jsonline_chunk_lazy(TEST_DATA_DIR + 'a.jsonl', 2)
assert isinstance(generator, types.GeneratorType)
assert list(generator) == [example_json]


def test_write_lines(example_lines):
dirname = tempfile.gettempdir() + '/'
filename = dirname + 'a.txt'
Expand Down

0 comments on commit d85ccc6

Please sign in to comment.