From 0dea194992c7e0697dd34433c0a76f8b4fff979b Mon Sep 17 00:00:00 2001 From: Danielle McLean Date: Thu, 27 Oct 2016 11:25:29 +1100 Subject: [PATCH] j2y and y2j filter programs, which convert streams of values between JSON and YAML - good for use with jq :) --- local/bin/j2y | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++ local/bin/y2j | 17 ++++++++++ 2 files changed, 105 insertions(+) create mode 100755 local/bin/j2y create mode 100755 local/bin/y2j diff --git a/local/bin/j2y b/local/bin/j2y new file mode 100755 index 0000000..f528e5c --- /dev/null +++ b/local/bin/j2y @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 + +from yaml import dump +try: + from yaml import CDumper as Dumper +except ImportError: + from yaml import Dumper +import json, re, sys + +def dict_representer(dumper, d): + node = dumper.represent_dict(d) + # Don't use YAML flow style for large dicts, because the flow style output + # only really looks good with a very small number of keys. + if node.flow_style and len(d) > 5: + node.flow_style = False + return node +Dumper.add_representer(dict, dict_representer) + +def cat(files): + usedStdin = False + for f in files: + if f == '-': + if usedStdin: continue + usedStdin = True + with open(f) if f != '-' else sys.stdin as s: + for line in s: yield line + +# http://stackoverflow.com/a/7795029/1208816 +braces = '{}[]' +whitespace_esc = ' \t' +braces_esc = '\\' + '\\'.join(braces) +braces_pat = '[' + braces_esc + ']' +no_braces_pat = '[^' + braces_esc + ']*' +until_braces_pat = re.compile(no_braces_pat + braces_pat) +balance_map = dict(zip(braces, [1, -1, 1, -1])) + +def streamingfinditer(pat, stream): + for s in stream: + while True: + m = pat.search(s) + if not m: + yield (0,s) + break + yield (1, m.group()) + s = pat.split(s, 1)[1] + +def simpleorcompoundobjects(stream): + obj = "" + unbalanced = 0 + for (c, m) in streamingfinditer(until_braces_pat, stream): + if (c == 0): # remainder of line returned, nothing interesting + if (unbalanced == 0): + yield (0, m) + else: + obj += m + if (c == 1): # match returned + if (unbalanced == 0): + yield (0, m[:-1]) + obj += m[-1] + else: + obj += m + unbalanced += balance_map[m[-1]] + if (unbalanced == 0): + yield (1, obj) + obj = "" + +def streamingiterload(stream): + for c,o in simpleorcompoundobjects(stream): + for x in iterload(o): + yield x + +# http://stackoverflow.com/a/6886743/1208816 +def iterload(string_or_fp, cls=json.JSONDecoder, **kwargs): + try: + string = string_or_fp.read() + except AttributeError: + string = str(string_or_fp) + + decoder = cls(**kwargs) + idx = json.decoder.WHITESPACE.match(string, 0).end() + while idx < len(string): + obj, end = decoder.raw_decode(string, idx) + yield obj + idx = json.decoder.WHITESPACE.match(string, end).end() + +files = sys.argv[1:] or ('-',) +for obj in streamingiterload(cat(files)): + print(dump(obj, Dumper=Dumper, explicit_start=True), end='', flush=True) diff --git a/local/bin/y2j b/local/bin/y2j new file mode 100755 index 0000000..33e798c --- /dev/null +++ b/local/bin/y2j @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +try: + from yaml import CSafeLoader as SafeLoader +except ImportError: + from yaml import SafeLoader +import sys, json + +files = sys.argv[1:] or ('-',) + +stdinUsed = False +for f in files: + if f == '-': + if stdinUsed: continue + stdinUsed = True + with open(f) if f != '-' else sys.stdin as stream: + l = SafeLoader(stream) + while l.check_data(): print(json.dumps(l.get_data()))