j2y and y2j filter programs, which convert streams of values between JSON and YAML - good for use with jq :)

2016-10-27 11:25:29 +11:00 · 2016-10-27 11:25:29 +11:00 · 0dea194992
commit 0dea194992
parent dd820bf691
2 changed files with 105 additions and 0 deletions
--- a/local/bin/j2y
+++ b/local/bin/j2y
@ -0,0 +1,88 @@
 #!/usr/bin/env python3
 from yaml import dump
 try:
  from yaml import CDumper as Dumper
 except ImportError:
  from yaml import Dumper
 import json, re, sys
 def dict_representer(dumper, d):
  node = dumper.represent_dict(d)
  # Don't use YAML flow style for large dicts, because the flow style output
  # only really looks good with a very small number of keys.
  if node.flow_style and len(d) > 5:
    node.flow_style = False
  return node
 Dumper.add_representer(dict, dict_representer)
 def cat(files):
  usedStdin = False
  for f in files:
    if f == '-':
      if usedStdin: continue
      usedStdin = True
    with open(f) if f != '-' else sys.stdin as s:
      for line in s: yield line
 # http://stackoverflow.com/a/7795029/1208816
 braces = '{}[]'
 whitespace_esc = ' \t'
 braces_esc = '\\' + '\\'.join(braces)
 braces_pat = '[' + braces_esc + ']'
 no_braces_pat = '[^' + braces_esc + ']*'
 until_braces_pat = re.compile(no_braces_pat + braces_pat)
 balance_map = dict(zip(braces, [1, -1, 1, -1]))
 def streamingfinditer(pat, stream):
  for s in stream:
    while True:
      m = pat.search(s)
      if not m:
        yield (0,s)
        break
      yield (1, m.group())
      s = pat.split(s, 1)[1]
 def simpleorcompoundobjects(stream):
  obj = ""
  unbalanced = 0
  for (c, m) in streamingfinditer(until_braces_pat, stream):
    if (c == 0): # remainder of line returned, nothing interesting
      if (unbalanced == 0):
        yield (0, m)
      else:
        obj += m
    if (c == 1): # match returned
      if (unbalanced == 0):
        yield (0, m[:-1])
        obj += m[-1]
      else:
        obj += m
      unbalanced += balance_map[m[-1]]
      if (unbalanced == 0):
        yield (1, obj)
        obj = "" 
 def streamingiterload(stream):
  for c,o in simpleorcompoundobjects(stream):
    for x in iterload(o):
      yield x 
 # http://stackoverflow.com/a/6886743/1208816
 def iterload(string_or_fp, cls=json.JSONDecoder, **kwargs):
  try:
    string = string_or_fp.read()
  except AttributeError:
    string = str(string_or_fp)
  decoder = cls(**kwargs)
  idx = json.decoder.WHITESPACE.match(string, 0).end()
  while idx < len(string):
    obj, end = decoder.raw_decode(string, idx)
    yield obj
    idx = json.decoder.WHITESPACE.match(string, end).end()
 files = sys.argv[1:] or ('-',)
 for obj in streamingiterload(cat(files)):
  print(dump(obj, Dumper=Dumper, explicit_start=True), end='', flush=True)
--- a/local/bin/y2j
+++ b/local/bin/y2j
@ -0,0 +1,17 @@
 #!/usr/bin/env python3
 try:
  from yaml import CSafeLoader as SafeLoader
 except ImportError:
  from yaml import SafeLoader
 import sys, json
 files = sys.argv[1:] or ('-',)
 stdinUsed = False
 for f in files:
  if f == '-':
    if stdinUsed: continue
    stdinUsed = True
  with open(f) if f != '-' else sys.stdin as stream:
    l = SafeLoader(stream)
    while l.check_data(): print(json.dumps(l.get_data()))