diff --git a/docxtpl/__init__.py b/docxtpl/__init__.py index c20bdc1..287fcd0 100644 --- a/docxtpl/__init__.py +++ b/docxtpl/__init__.py @@ -15,7 +15,7 @@ from docx.opc.constants import RELATIONSHIP_TYPE as REL_TYPE from jinja2 import Template from jinja2.exceptions import TemplateError try: - from html import escape + from html import escape, unescape except ImportError: # cgi.escape is deprecated in python 3.7 from cgi import escape @@ -24,6 +24,7 @@ import six import binascii import os import zipfile +import sys NEWLINE_XML = '' NEWPARAGRAPH_XML = '' @@ -169,7 +170,43 @@ class DocxTemplate(object): def map_headers_footers_xml(self, relKey, xml): self.docx._part._rels[relKey]._target._blob = xml - def render(self,context,jinja_env=None): + @staticmethod + def escape_values(context): + """Escape strings for an XML Word document + which may contain <, >, &, ', and ". + """ + def escape_recursively(d): + """Escape string values of the passed :dict: `d` in-place + including nested dictionaries. + """ + nonlocal hash_values + + for k, v in d.items(): + if isinstance(v, dict): + hash_value = id(v) + if hash_value not in hash_values: + hash_values.add(hash_value) + escape_recursively(v) + else: + # Avoid dict, Listing, InlineImage, RichText, etc. classes + # by comparing v to str. Do not use try-except. + if isinstance(v, str): + # Unescape at first to avoid secondary escaping + d[k] = escape(unescape(v)) + + # Avoid RecursionError (if back edges, i.e. cycles, exist) + # by using a set of hash values of iterated dictionaries. + hash_values = {id(context), } + + escape_recursively(context) + + def render(self, context, jinja_env=None): + if sys.version_info >= (3, 0): + self.escape_values(context) + else: + # Sorry folk, use awesome Python3 such as 3.6 + pass + # Body xml_src = self.build_xml(context,jinja_env) diff --git a/tests/escape_auto.py b/tests/escape_auto.py new file mode 100644 index 0000000..f38c3ed --- /dev/null +++ b/tests/escape_auto.py @@ -0,0 +1,13 @@ +from docxtpl import * + +tpl = DocxTemplate("test_files/escape_tpl_auto.docx") + +context = {'myvar': R('"less than" must be escaped : <, this can be done with RichText() or R()'), + 'myescvar':'It can be escaped with a "|e" jinja filter in the template too : < ', + 'nlnp' : R('Here is a multiple\nlines\nstring\aand some\aother\aparagraphs\aNOTE: the current character styling is removed'), + 'mylisting': Listing('the listing\nwith\nsome\nlines\nand special chars : <>&'), + 'autoescape': """These string should be auto escaped for an XML Word document which may contain <, >, &, ", and '.""" + } + +tpl.render(context) +tpl.save("test_files/escape_auto.docx") diff --git a/tests/test_files/escape_auto.docx b/tests/test_files/escape_auto.docx new file mode 100644 index 0000000..634c112 Binary files /dev/null and b/tests/test_files/escape_auto.docx differ diff --git a/tests/test_files/escape_tpl_auto.docx b/tests/test_files/escape_tpl_auto.docx new file mode 100644 index 0000000..c6f7a6c Binary files /dev/null and b/tests/test_files/escape_tpl_auto.docx differ