jf package¶

Submodules¶

jf.jfio module¶

class jf.jfio.MinimalAdapter¶

Bases: object

>>> a = MinimalAdapter()
>>> a(iter([b"abcde", b"fghij"])).read(2)
'ab'
>>> a.read(2)
'cd'
>>> a.read(2)
'ef'

read(size)¶

class jf.jfio.StructEncoder(*, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, default=None)¶

Bases: json.encoder.JSONEncoder

Try to convert everything to json

>>> from datetime import datetime
>>> import json
>>> len(json.dumps(datetime.now(), cls=StructEncoder)) > 10
True

default(obj)¶

Implement this method in a subclass such that it returns a serializable object for o, or calls the base implementation (to raise a TypeError).

For example, to support arbitrary iterators, you could implement default like this:

def default(self, o):
    try:
        iterable = iter(o)
    except TypeError:
        pass
    else:
        return list(iterable)
    # Let the base class default method raise the TypeError
    return JSONEncoder.default(self, o)

jf.jfio.data_input(files=None, additionals={}, inputfmt=None, listen=None)¶

Data input function

>>> import tempfile
>>> with tempfile.NamedTemporaryFile() as tmpfile:
...     tmpfile.write(b'[{"myconfig": "myvalue"}]') and True
...     tmpfile.flush()
...     len(list(data_input([tmpfile.name])))
True
1
>>> import tempfile
>>> with tempfile.NamedTemporaryFile(suffix=".yaml") as tmpfile:
...     tmpfile.write(b'[{"myconfig": "myvalue"}]') and True
...     tmpfile.flush()
...     len(list(data_input([tmpfile.name])))
True
1
>>> with tempfile.NamedTemporaryFile(suffix=".csv") as tmpfile:
...     tmpfile.write(b"hello,world\nno,yes\nno,no\n") and True
...     tmpfile.flush()
...     len(list(data_input([tmpfile.name])))
True
2
>>> list(data_input(["nots3://bucket/key.json"], {}))
Traceback (most recent call last):
...
NotImplementedError: ...

jf.jfio.fetch_file(fn, f, additionals)¶

Fetch file with custom handler

>>> from io import StringIO
>>> s = StringIO()
>>> class fetch_mod:
...     def jf_fetch_s3(m):
...          return '{"hello": "world"}'
>>> fetch_file("s3://bucket/key.json", s, {"mod": fetch_mod})
>>> s.getvalue()
'{"hello": "world"}'

jf.jfio.fetch_http(url)¶

jf.jfio.fetch_https(url)¶

jf.jfio.get_handler(method, fntype, additionals)¶

jf.jfio.get_supported_formats()¶

>>> len(get_supported_formats()) > 2
True

jf.jfio.not_dotaccessible(it)¶

jf.jfio.print_results(ret, output, compact=False, raw=False, additionals={})¶

Print array with various formats

>>> data = [{"a": 1}]
>>> print_results(data, 'help')
- clipboard
- csv
...
>>> print_results(data, 'py', True)
{'a': 1}
>>> print_results(data, 'json', True)
{"a": 1}
>>> print_results(["hello"], 'json', True, raw=True)
hello
>>> print_results(data, 'json', False)
{
  "a": 1
}
>>> print_results(data, 'yaml')
a: 1
<BLANKLINE>
>>> print_results(data, 'csv')
,a
0,1
<BLANKLINE>
>>> print_results(data, 'pickle')
<bytes>
>>> class serialize_mod:
...     def jf_serialize_msg(m):
...          return repr(m)
>>> print_results(data, 'msg', additionals={"mod": serialize_mod})
<bytes>
>>> print_results(data, 'not supported')
Traceback (most recent call last):
...
NotImplementedError: Cannot output not supported yet. Please consider making a PR!

jf.jfio.save_pandas(alldata, output, _highligh=None)¶

jf.jfio.write_bytes(barr)¶

jf.jfio.yield_json_and_json_lines(inp)¶

Yield json and json lines

Split potentially huge json strings into lines or components for low memory data processing.

Notice: Results are still json strings, so you most likely want to json.loads them.