This commit is contained in:
Eric Lapouyade 2020-05-24 11:52:03 +02:00
parent 5ee1c03410
commit 7363428782
5 changed files with 47 additions and 7 deletions

View File

@ -1,3 +1,7 @@
0.10.0 (2020-05-25)
-------------------
- Fix spaces missing in some cases (#116, #227)
0.9.2 (2020-04-26)
-------------------
- Fix #271

View File

@ -54,8 +54,25 @@ If you want to manage paragraphs, table rows and a whole run with its style, you
**Note:**
a 'run' for Microsoft Word is a sequence of characters with the same style. For example, if you create a paragraph with all characters of the same style, MS Word will create internally only one 'run' in the paragraph. Now, if you put in bold a text in the middle of this paragraph, word will transform the previous 'run' into 3 different 'runs' (normal - bold - normal).
a 'run' for Microsoft Word is a sequence of characters with the same style.
For example, if you create a paragraph with all characters of the same style,
MS Word will create internally only one 'run' in the paragraph. Now,
if you put in bold a text in the middle of this paragraph,
word will transform the previous 'run' into 3 different 'runs' (normal - bold - normal).
**Important:**
Always put space after a jinja2 starting var/tag delimiter and a space before the ending one :
Avoid::
{{myvariable}}
{%if something%}
Use instead::
{{ myvariable }}
{% if something %}
Extensions
++++++++++

View File

@ -7,7 +7,7 @@ Created : 2015-03-12
import functools
import io
__version__ = '0.9.2'
__version__ = '0.10.0'
from lxml import etree
from docx import Document
@ -67,11 +67,17 @@ class DocxTemplate(object):
fh.write(self.get_xml())
def patch_xml(self, src_xml):
# strip all xml tags inside {% %} and {{ }} that MS word can insert
# into xml source also unescape html entities
""" Make a lots of cleanning to have a raw xml understandable by jinja2 :
strip all unnecessary xml tags, manage table cell background color and colspan,
unescape html entities, etc... """
# replace {<something>{ by {{ ( works with {{ }} {% and %} )
src_xml = re.sub(r'(?<={)(<[^>]*>)+(?=[\{%])|(?<=[%\}])(<[^>]*>)+(?=\})', '',
src_xml, flags=re.DOTALL)
# replace {{<some tags>jinja2 stuff<some other tags>}} by {{jinja2 stuff}}
# same thing with {% ... %}
# "jinja2 stuff" could a variable, a 'if' etc... anything jinja2 will understand
def striptags(m):
return re.sub('</w:t>.*?(<w:t>|<w:t [^>]*>)', '',
m.group(0), flags=re.DOTALL)
@ -101,9 +107,8 @@ class DocxTemplate(object):
src_xml = re.sub(r'(<w:tc[ >](?:(?!<w:tc[ >]).)*){%\s*cellbg\s+([^%]*)\s*%}(.*?</w:tc>)',
cellbg, src_xml, flags=re.DOTALL)
# avoid {{r and {%r tags to strip MS xml tags too far
# ensure space preservation when splitting
src_xml = re.sub(r'<w:t>((?:(?!<w:t>).)*)({{r\s.*?}}|{%r\s.*?%})',
# ensure space preservation
src_xml = re.sub(r'<w:t>((?:(?!<w:t>).)*)({{.*?}}|{%.*?%})',
r'<w:t xml:space="preserve">\1\2',
src_xml, flags=re.DOTALL)
src_xml = re.sub(r'({{r\s.*?}}|{%r\s.*?%})',

14
tests/preserve_spaces.py Normal file
View File

@ -0,0 +1,14 @@
from docxtpl import DocxTemplate, InlineImage
# With old docxtpl version, "... for spicy ..." was replaced by "... forspicy..."
# This test is for checking that is some cases the spaces are not lost anymore
tpl = DocxTemplate('templates/preserve_spaces_tpl.docx')
tags = ['tag_1', 'tag_2']
replacement = ['looking', 'too']
context = dict(zip(tags, replacement))
tpl.render(context)
tpl.save('output/preserve_spaces.docx')

Binary file not shown.