I have some preprocessing to do with some existing .yml files - however, some of them have Jinja template syntax embedded in them:
A:
B:
- ip: 1.2.3.4
- m
One way to do this is to use the jinja2 parser itself to parse the template and output an alternate format.
This code inherits from the Jinja2 Parser, Lexer and Environment classes to parse inside variable blocks (usually {{ }}). Instead of evaluating the variables, this code changes the text to something that yaml can understand. The exact same code can be used to reverse the process with an exchange of the delimiters. By default it translates to the delimiters suggested by snakecharmerb.
import jinja2
import yaml
class MyParser(jinja2.parser.Parser):
def parse_tuple(self, *args, **kwargs):
super(MyParser, self).parse_tuple(*args, **kwargs)
if not isinstance(self.environment._jinja_vars, list):
node_text = self.environment._jinja_vars
self.environment._jinja_vars = None
return jinja2.nodes.Const(
self.environment.new_variable_start_string +
node_text +
self.environment.new_variable_end_string)
class MyLexer(jinja2.lexer.Lexer):
def __init__(self, *args, **kwargs):
super(MyLexer, self).__init__(*args, **kwargs)
self.environment = None
def tokenize(self, source, name=None, filename=None, state=None):
stream = self.tokeniter(source, name, filename, state)
def my_stream(environment):
for t in stream:
if environment._jinja_vars is None:
if t[1] == 'variable_begin':
self.environment._jinja_vars = []
elif t[1] == 'variable_end':
node_text = ''.join(
[x[2] for x in self.environment._jinja_vars])
self.environment._jinja_vars = node_text
else:
environment._jinja_vars.append(t)
yield t
return jinja2.lexer.TokenStream(self.wrap(
my_stream(self.environment), name, filename), name, filename)
jinja2.lexer.Lexer = MyLexer
class MyEnvironment(jinja2.Environment):
def __init__(self,
new_variable_start_string='<<',
new_variable_end_string='>>',
reverse=False,
*args,
**kwargs):
if kwargs.get('loader') is None:
kwargs['loader'] = jinja2.BaseLoader()
super(MyEnvironment, self).__init__(*args, **kwargs)
self._jinja_vars = None
if reverse:
self.new_variable_start_string = self.variable_start_string
self.new_variable_end_string = self.variable_end_string
self.variable_start_string = new_variable_start_string
self.variable_end_string = new_variable_end_string
else:
self.new_variable_start_string = new_variable_start_string
self.new_variable_end_string = new_variable_end_string
self.lexer.environment = self
def _parse(self, source, name, filename):
return MyParser(self, source, name,
jinja2._compat.encode_filename(filename)).parse()
The jinja2 parser scans the template file looking for delimiters. When finding delimiters, it then switches to parse the appropriate material between the delimiters. The changes in the code here insert themselves into the lexer and parser to capture the text captured during the template compilation, and then when finding the termination delimiter, concats the parsed tokens into a string and inserts it as a jinja2.nodes.Const parse node, in place of the compiled jinja code, so that when the template is rendered the string is inserted instead of a variable expansion.
The MyEnvironment() code is used to hook in the custom parser and lexer extensions. And while at it, added some parameters processing.
The primary advantage of this approach is that it should be fairly robust to parsing whatever jinja will parse.
def dict_from_yaml_template(template_string):
env = MyEnvironment()
template = env.from_string(template_string)
return yaml.load(template.render())
def yaml_template_from_dict(template_yaml, **kwargs):
env = MyEnvironment(reverse=True)
template = env.from_string(yaml.dump(template_yaml, **kwargs))
return template.render()
with open('data.yml') as f:
data = dict_from_yaml_template(f.read())
data['A']['B'][1]['myArray'].append('val 3')
data['A']['B'][1]['myArray'].append('<< jinja.variable2 >>')
new_yaml = yaml_template_from_dict(data, default_flow_style=False)
print(new_yaml)
A:
B:
- ip: 1.2.3.4
- myArray:
- {{ x['}}'] }}
- {{ [(1, 2, (3, 4))] }}
- {{ jinja.variable }}
- val1
- val2
A:
B:
- ip: 1.2.3.4
- myArray:
- {{ x['}}'] }}
- {{ [(1, 2, (3, 4))] }}
- {{ jinja.variable }}
- val1
- val2
- val 3
- {{ jinja.variable2 }}