Working on better subdocs

This commit is contained in:
Eric Lapouyade 2021-07-31 12:49:47 +02:00
parent 0a1cb24b12
commit 30712bbc67
15 changed files with 1092 additions and 994 deletions

View File

@ -1,3 +1,9 @@
0.12.0 (2021-07-31)
-------------------
- Code has be split into many files for better readability
- Use docxcomposer to attach parts when a docx file is given to create a subdoc
Images, styles etc... must now be taken in account in subdocs
0.11.5 (2021-05-09)
-------------------
- PR #351

11
Pipfile
View File

@ -1,13 +1,12 @@
[[source]]
url = "https://pypi.python.org/simple"
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[dev-packages]
"e1839a8" = {path = ".", editable = true}
[packages]
flake8 = "*"
[dev-packages]
docxtpl = {editable = true, path = "."}
[requires]
python_version = "3.6"
python_version = "3.9"

242
Pipfile.lock generated
View File

@ -1,181 +1,145 @@
{
"_meta": {
"hash": {
"sha256": "23d67e77e436d7d9001f9d16dcfd0a12cc15bc444b55aace59c1d999efbb2680"
"sha256": "386706742fa1989e0f0bd8fd5cfca85a2b70f29277202755af3e02ca7eb456c8"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.6"
"python_version": "3.9"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.python.org/simple",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"flake8": {
"hashes": [
"sha256:749dbbd6bfd0cf1318af27bf97a14e28e5ff548ef8e5b1566ccfb25a11e7c839",
"sha256:aadae8761ec651813c24be05c6f7b4680857ef6afaae4651a4eccaef97ce6c3b"
],
"index": "pypi",
"version": "==3.8.4"
},
"importlib-metadata": {
"hashes": [
"sha256:77a540690e24b0305878c37ffd421785a6f7e53c8b5720d211b211de8d0e95da",
"sha256:cefa1a2f919b866c5beb7c9f7b0ebb4061f30a8a9bf16d609b000e2dfaceb9c3"
],
"markers": "python_version < '3.8'",
"version": "==2.0.0"
},
"mccabe": {
"hashes": [
"sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
"sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
],
"version": "==0.6.1"
},
"pycodestyle": {
"hashes": [
"sha256:2295e7b2f6b5bd100585ebcb1f616591b652db8a741695b3d8f5d28bdc934367",
"sha256:c58a7d2815e0e8d7972bf1803331fb0152f867bd89adf8a01dfd55085434192e"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.6.0"
},
"pyflakes": {
"hashes": [
"sha256:0d94e0e05a19e57a99444b6ddcf9a6eb2e5c68d3ca1e98e90707af8152c90a92",
"sha256:35b2d75ee967ea93b55750aa9edbbf72813e06a66ba54438df2cfac9e3c27fc8"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.2.0"
},
"zipp": {
"hashes": [
"sha256:102c24ef8f171fd729d46599845e95c7ab894a4cf45f5de11a44cc7444fb1108",
"sha256:ed5eee1974372595f9e416cc7bbeeb12335201d8081ca8a0743c954d4446e5cb"
],
"markers": "python_version >= '3.6'",
"version": "==3.4.0"
}
},
"default": {},
"develop": {
"docxtpl": {
"editable": true,
"path": "."
"docxcompose": {
"hashes": [
"sha256:9b99f878469de72482e68a60521591f59a469e747ac7c46bb0ae6d682671d3e6"
],
"version": "==1.3.2"
},
"e1839a8": {
"docxtpl": {
"editable": true,
"path": "."
},
"jinja2": {
"hashes": [
"sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0",
"sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035"
"sha256:1f06f2da51e7b56b8f238affdd6b4e2c61e39598a378cc49345bc1bd42a978a4",
"sha256:703f484b47a6af502e743c9122595cc812b0271f661722403114f71a79d0f5a4"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==2.11.2"
"markers": "python_version >= '3.6'",
"version": "==3.0.1"
},
"lxml": {
"hashes": [
"sha256:098fb713b31050463751dcc694878e1d39f316b86366fb9fe3fbbe5396ac9fab",
"sha256:0e89f5d422988c65e6936e4ec0fe54d6f73f3128c80eb7ecc3b87f595523607b",
"sha256:189ad47203e846a7a4951c17694d845b6ade7917c47c64b29b86526eefc3adf5",
"sha256:1d87936cb5801c557f3e981c9c193861264c01209cb3ad0964a16310ca1b3301",
"sha256:211b3bcf5da70c2d4b84d09232534ad1d78320762e2c59dedc73bf01cb1fc45b",
"sha256:2358809cc64394617f2719147a58ae26dac9e21bae772b45cfb80baa26bfca5d",
"sha256:23c83112b4dada0b75789d73f949dbb4e8f29a0a3511647024a398ebd023347b",
"sha256:24e811118aab6abe3ce23ff0d7d38932329c513f9cef849d3ee88b0f848f2aa9",
"sha256:2d5896ddf5389560257bbe89317ca7bcb4e54a02b53a3e572e1ce4226512b51b",
"sha256:2d6571c48328be4304aee031d2d5046cbc8aed5740c654575613c5a4f5a11311",
"sha256:2e311a10f3e85250910a615fe194839a04a0f6bc4e8e5bb5cac221344e3a7891",
"sha256:302160eb6e9764168e01d8c9ec6becddeb87776e81d3fcb0d97954dd51d48e0a",
"sha256:3a7a380bfecc551cfd67d6e8ad9faa91289173bdf12e9cfafbd2bdec0d7b1ec1",
"sha256:3d9b2b72eb0dbbdb0e276403873ecfae870599c83ba22cadff2db58541e72856",
"sha256:475325e037fdf068e0c2140b818518cf6bc4aa72435c407a798b2db9f8e90810",
"sha256:4b7572145054330c8e324a72d808c8c8fbe12be33368db28c39a255ad5f7fb51",
"sha256:4fff34721b628cce9eb4538cf9a73d02e0f3da4f35a515773cce6f5fe413b360",
"sha256:56eff8c6fb7bc4bcca395fdff494c52712b7a57486e4fbde34c31bb9da4c6cc4",
"sha256:573b2f5496c7e9f4985de70b9bbb4719ffd293d5565513e04ac20e42e6e5583f",
"sha256:7ecaef52fd9b9535ae5f01a1dd2651f6608e4ec9dc136fc4dfe7ebe3c3ddb230",
"sha256:803a80d72d1f693aa448566be46ffd70882d1ad8fc689a2e22afe63035eb998a",
"sha256:8862d1c2c020cb7a03b421a9a7b4fe046a208db30994fc8ff68c627a7915987f",
"sha256:9b06690224258db5cd39a84e993882a6874676f5de582da57f3df3a82ead9174",
"sha256:a71400b90b3599eb7bf241f947932e18a066907bf84617d80817998cee81e4bf",
"sha256:bb252f802f91f59767dcc559744e91efa9df532240a502befd874b54571417bd",
"sha256:be1ebf9cc25ab5399501c9046a7dcdaa9e911802ed0e12b7d620cd4bbf0518b3",
"sha256:be7c65e34d1b50ab7093b90427cbc488260e4b3a38ef2435d65b62e9fa3d798a",
"sha256:c0dac835c1a22621ffa5e5f999d57359c790c52bbd1c687fe514ae6924f65ef5",
"sha256:c152b2e93b639d1f36ec5a8ca24cde4a8eefb2b6b83668fcd8e83a67badcb367",
"sha256:d182eada8ea0de61a45a526aa0ae4bcd222f9673424e65315c35820291ff299c",
"sha256:d18331ea905a41ae71596502bd4c9a2998902328bbabd29e3d0f5f8569fabad1",
"sha256:d20d32cbb31d731def4b1502294ca2ee99f9249b63bc80e03e67e8f8e126dea8",
"sha256:d4ad7fd3269281cb471ad6c7bafca372e69789540d16e3755dd717e9e5c9d82f",
"sha256:d6f8c23f65a4bfe4300b85f1f40f6c32569822d08901db3b6454ab785d9117cc",
"sha256:d84d741c6e35c9f3e7406cb7c4c2e08474c2a6441d59322a00dcae65aac6315d",
"sha256:e65c221b2115a91035b55a593b6eb94aa1206fa3ab374f47c6dc10d364583ff9",
"sha256:f98b6f256be6cec8dd308a8563976ddaff0bdc18b730720f6f4bee927ffe926f"
"sha256:079f3ae844f38982d156efce585bc540c16a926d4436712cf4baee0cce487a3d",
"sha256:0fbcf5565ac01dff87cbfc0ff323515c823081c5777a9fc7703ff58388c258c3",
"sha256:122fba10466c7bd4178b07dba427aa516286b846b2cbd6f6169141917283aae2",
"sha256:1b38116b6e628118dea5b2186ee6820ab138dbb1e24a13e478490c7db2f326ae",
"sha256:1b7584d421d254ab86d4f0b13ec662a9014397678a7c4265a02a6d7c2b18a75f",
"sha256:26e761ab5b07adf5f555ee82fb4bfc35bf93750499c6c7614bd64d12aaa67927",
"sha256:289e9ca1a9287f08daaf796d96e06cb2bc2958891d7911ac7cae1c5f9e1e0ee3",
"sha256:2a9d50e69aac3ebee695424f7dbd7b8c6d6eb7de2a2eb6b0f6c7db6aa41e02b7",
"sha256:3082c518be8e97324390614dacd041bb1358c882d77108ca1957ba47738d9d59",
"sha256:33bb934a044cf32157c12bfcfbb6649807da20aa92c062ef51903415c704704f",
"sha256:3439c71103ef0e904ea0a1901611863e51f50b5cd5e8654a151740fde5e1cade",
"sha256:36108c73739985979bf302006527cf8a20515ce444ba916281d1c43938b8bb96",
"sha256:39b78571b3b30645ac77b95f7c69d1bffc4cf8c3b157c435a34da72e78c82468",
"sha256:4289728b5e2000a4ad4ab8da6e1db2e093c63c08bdc0414799ee776a3f78da4b",
"sha256:4bff24dfeea62f2e56f5bab929b4428ae6caba2d1eea0c2d6eb618e30a71e6d4",
"sha256:4c61b3a0db43a1607d6264166b230438f85bfed02e8cff20c22e564d0faff354",
"sha256:542d454665a3e277f76954418124d67516c5f88e51a900365ed54a9806122b83",
"sha256:5a0a14e264069c03e46f926be0d8919f4105c1623d620e7ec0e612a2e9bf1c04",
"sha256:5c8c163396cc0df3fd151b927e74f6e4acd67160d6c33304e805b84293351d16",
"sha256:66e575c62792c3f9ca47cb8b6fab9e35bab91360c783d1606f758761810c9791",
"sha256:6f12e1427285008fd32a6025e38e977d44d6382cf28e7201ed10d6c1698d2a9a",
"sha256:74f7d8d439b18fa4c385f3f5dfd11144bb87c1da034a466c5b5577d23a1d9b51",
"sha256:7610b8c31688f0b1be0ef882889817939490a36d0ee880ea562a4e1399c447a1",
"sha256:76fa7b1362d19f8fbd3e75fe2fb7c79359b0af8747e6f7141c338f0bee2f871a",
"sha256:7728e05c35412ba36d3e9795ae8995e3c86958179c9770e65558ec3fdfd3724f",
"sha256:8157dadbb09a34a6bd95a50690595e1fa0af1a99445e2744110e3dca7831c4ee",
"sha256:820628b7b3135403540202e60551e741f9b6d3304371712521be939470b454ec",
"sha256:884ab9b29feaca361f7f88d811b1eea9bfca36cf3da27768d28ad45c3ee6f969",
"sha256:89b8b22a5ff72d89d48d0e62abb14340d9e99fd637d046c27b8b257a01ffbe28",
"sha256:92e821e43ad382332eade6812e298dc9701c75fe289f2a2d39c7960b43d1e92a",
"sha256:b007cbb845b28db4fb8b6a5cdcbf65bacb16a8bd328b53cbc0698688a68e1caa",
"sha256:bc4313cbeb0e7a416a488d72f9680fffffc645f8a838bd2193809881c67dd106",
"sha256:bccbfc27563652de7dc9bdc595cb25e90b59c5f8e23e806ed0fd623755b6565d",
"sha256:c47ff7e0a36d4efac9fd692cfa33fbd0636674c102e9e8d9b26e1b93a94e7617",
"sha256:c4f05c5a7c49d2fb70223d0d5bcfbe474cf928310ac9fa6a7c6dddc831d0b1d4",
"sha256:cdaf11d2bd275bf391b5308f86731e5194a21af45fbaaaf1d9e8147b9160ea92",
"sha256:ce256aaa50f6cc9a649c51be3cd4ff142d67295bfc4f490c9134d0f9f6d58ef0",
"sha256:d2e35d7bf1c1ac8c538f88d26b396e73dd81440d59c1ef8522e1ea77b345ede4",
"sha256:d916d31fd85b2f78c76400d625076d9124de3e4bda8b016d25a050cc7d603f24",
"sha256:df7c53783a46febb0e70f6b05df2ba104610f2fb0d27023409734a3ecbb78fb2",
"sha256:e1cbd3f19a61e27e011e02f9600837b921ac661f0c40560eefb366e4e4fb275e",
"sha256:efac139c3f0bf4f0939f9375af4b02c5ad83a622de52d6dfa8e438e8e01d0eb0",
"sha256:efd7a09678fd8b53117f6bae4fa3825e0a22b03ef0a932e070c0bdbb3a35e654",
"sha256:f2380a6376dfa090227b663f9678150ef27543483055cc327555fb592c5967e2",
"sha256:f8380c03e45cf09f8557bdaa41e1fa7c81f3ae22828e1db470ab2a6c96d8bc23",
"sha256:f90ba11136bfdd25cae3951af8da2e95121c9b9b93727b1b896e3fa105b2f586"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==4.6.1"
"version": "==4.6.3"
},
"markupsafe": {
"hashes": [
"sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473",
"sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161",
"sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235",
"sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5",
"sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42",
"sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff",
"sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b",
"sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1",
"sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e",
"sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183",
"sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66",
"sha256:596510de112c685489095da617b5bcbbac7dd6384aeebeda4df6025d0256a81b",
"sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1",
"sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15",
"sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1",
"sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e",
"sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b",
"sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905",
"sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735",
"sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d",
"sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e",
"sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d",
"sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c",
"sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21",
"sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2",
"sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5",
"sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b",
"sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6",
"sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f",
"sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f",
"sha256:cdb132fc825c38e1aeec2c8aa9338310d29d337bebbd7baa06889d09a60a1fa2",
"sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7",
"sha256:e8313f01ba26fbbe36c7be1966a7b7424942f670f38e666995b88d012765b9be"
"sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298",
"sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64",
"sha256:0446679737af14f45767963a1a9ef7620189912317d095f2d9ffa183a4d25d2b",
"sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567",
"sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff",
"sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74",
"sha256:1d609f577dc6e1aa17d746f8bd3c31aa4d258f4070d61b2aa5c4166c1539de35",
"sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26",
"sha256:3c112550557578c26af18a1ccc9e090bfe03832ae994343cfdacd287db6a6ae7",
"sha256:47ab1e7b91c098ab893b828deafa1203de86d0bc6ab587b160f78fe6c4011f75",
"sha256:49e3ceeabbfb9d66c3aef5af3a60cc43b85c33df25ce03d0031a608b0a8b2e3f",
"sha256:4efca8f86c54b22348a5467704e3fec767b2db12fc39c6d963168ab1d3fc9135",
"sha256:53edb4da6925ad13c07b6d26c2a852bd81e364f95301c66e930ab2aef5b5ddd8",
"sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a",
"sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914",
"sha256:6557b31b5e2c9ddf0de32a691f2312a32f77cd7681d8af66c2692efdbef84c18",
"sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8",
"sha256:6a7fae0dd14cf60ad5ff42baa2e95727c3d81ded453457771d02b7d2b3f9c0c2",
"sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d",
"sha256:7d91275b0245b1da4d4cfa07e0faedd5b0812efc15b702576d103293e252af1b",
"sha256:905fec760bd2fa1388bb5b489ee8ee5f7291d692638ea5f67982d968366bef9f",
"sha256:97383d78eb34da7e1fa37dd273c20ad4320929af65d156e35a5e2d89566d9dfb",
"sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833",
"sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415",
"sha256:ab3ef638ace319fa26553db0624c4699e31a28bb2a835c5faca8f8acf6a5a902",
"sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9",
"sha256:b7f2d075102dc8c794cbde1947378051c4e5180d52d276987b8d28a3bd58c17d",
"sha256:be98f628055368795d818ebf93da628541e10b75b41c559fdf36d104c5787066",
"sha256:d7f9850398e85aba693bb640262d3611788b1f29a79f0c93c565694658f4071f",
"sha256:f5653a225f31e113b152e56f154ccbe59eeb1c7487b39b9d9f9cdb58e6c79dc5",
"sha256:f826e31d18b516f653fe296d967d700fddad5901ae07c622bb3705955e1faa94",
"sha256:f8ba0e8349a38d3001fae7eadded3f6606f0da5d748ee53cc1dab1d6527b9509",
"sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51",
"sha256:fa130dd50c57d53368c9d59395cb5526eda596d3ffe36666cd81a44d56e48872"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.1.1"
"markers": "python_version >= '3.6'",
"version": "==2.0.1"
},
"python-docx": {
"hashes": [
"sha256:bc76ecac6b2d00ce6442a69d03a6f35c71cd72293cd8405a7472dfe317920024"
"sha256:1105d233a0956dd8dd1e710d20b159e2d72ac3c301041b95f4d4ceb3e0ebebc4"
],
"version": "==0.8.10"
"version": "==0.8.11"
},
"six": {
"hashes": [
"sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
"sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
"sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
"sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.15.0"
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'",
"version": "==1.16.0"
}
}
}

View File

@ -4,849 +4,11 @@ Created : 2015-03-12
@author: Eric Lapouyade
"""
__version__ = '0.11.5'
__version__ = '0.12.0.dev0'
import functools
import io
from lxml import etree
from docx import Document
from docx.opc.oxml import parse_xml
from docx.opc.part import XmlPart
import docx.oxml.ns
from docx.opc.constants import RELATIONSHIP_TYPE as REL_TYPE
from jinja2 import Environment, Template, meta
from jinja2.exceptions import TemplateError
try:
from html import escape
except ImportError:
# cgi.escape is deprecated in python 3.7
from cgi import escape
import re
import six
import binascii
import os
import zipfile
from .inline_image import InlineImage
from .listing import Listing
from .richtext import RichText, R
from .subdoc import Subdoc
from .template import DocxTemplate
class DocxTemplate(object):
""" Class for managing docx files as they were jinja2 templates """
HEADER_URI = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header"
FOOTER_URI = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer"
def __init__(self, docx):
self.docx = Document(docx)
self.crc_to_new_media = {}
self.crc_to_new_embedded = {}
self.zipname_to_replace = {}
self.pics_to_replace = {}
self.pic_map = {}
self.current_rendering_part = None
def __getattr__(self, name):
return getattr(self.docx, name)
def xml_to_string(self, xml, encoding='unicode'):
# Be careful : pretty_print MUST be set to False, otherwise patch_xml()
# won't work properly
return etree.tostring(xml, encoding='unicode', pretty_print=False)
def get_docx(self):
return self.docx
def get_xml(self):
return self.xml_to_string(self.docx._element.body)
def write_xml(self, filename):
with open(filename, 'w') as fh:
fh.write(self.get_xml())
def patch_xml(self, src_xml):
""" Make a lots of cleanning to have a raw xml understandable by jinja2 :
strip all unnecessary xml tags, manage table cell background color and colspan,
unescape html entities, etc... """
# replace {<something>{ by {{ ( works with {{ }} {% and %} )
src_xml = re.sub(r'(?<={)(<[^>]*>)+(?=[\{%])|(?<=[%\}])(<[^>]*>)+(?=\})', '',
src_xml, flags=re.DOTALL)
# replace {{<some tags>jinja2 stuff<some other tags>}} by {{jinja2 stuff}}
# same thing with {% ... %}
# "jinja2 stuff" could a variable, a 'if' etc... anything jinja2 will understand
def striptags(m):
return re.sub('</w:t>.*?(<w:t>|<w:t [^>]*>)', '',
m.group(0), flags=re.DOTALL)
src_xml = re.sub(r'{%(?:(?!%}).)*|{{(?:(?!}}).)*', striptags,
src_xml, flags=re.DOTALL)
# manage table cell colspan
def colspan(m):
cell_xml = m.group(1) + m.group(3)
cell_xml = re.sub(r'<w:r[ >](?:(?!<w:r[ >]).)*<w:t></w:t>.*?</w:r>',
'', cell_xml, flags=re.DOTALL)
cell_xml = re.sub(r'<w:gridSpan[^/]*/>', '', cell_xml, count=1)
return re.sub(r'(<w:tcPr[^>]*>)', r'\1<w:gridSpan w:val="{{%s}}"/>'
% m.group(2), cell_xml)
src_xml = re.sub(r'(<w:tc[ >](?:(?!<w:tc[ >]).)*){%\s*colspan\s+([^%]*)\s*%}(.*?</w:tc>)',
colspan, src_xml, flags=re.DOTALL)
# manage table cell background color
def cellbg(m):
cell_xml = m.group(1) + m.group(3)
cell_xml = re.sub(r'<w:r[ >](?:(?!<w:r[ >]).)*<w:t></w:t>.*?</w:r>',
'', cell_xml, flags=re.DOTALL)
cell_xml = re.sub(r'<w:shd[^/]*/>', '', cell_xml, count=1)
return re.sub(r'(<w:tcPr[^>]*>)',
r'\1<w:shd w:val="clear" w:color="auto" w:fill="{{%s}}"/>'
% m.group(2), cell_xml)
src_xml = re.sub(r'(<w:tc[ >](?:(?!<w:tc[ >]).)*){%\s*cellbg\s+([^%]*)\s*%}(.*?</w:tc>)',
cellbg, src_xml, flags=re.DOTALL)
# ensure space preservation
src_xml = re.sub(r'<w:t>((?:(?!<w:t>).)*)({{.*?}}|{%.*?%})',
r'<w:t xml:space="preserve">\1\2',
src_xml, flags=re.DOTALL)
src_xml = re.sub(r'({{r\s.*?}}|{%r\s.*?%})',
r'</w:t></w:r><w:r><w:t xml:space="preserve">\1</w:t></w:r><w:r><w:t xml:space="preserve">',
src_xml, flags=re.DOTALL)
# {%- will merge with previous paragraph text
src_xml = re.sub(r'</w:t>(?:(?!</w:t>).)*?{%-', '{%', src_xml, flags=re.DOTALL)
# -%} will merge with next paragraph text
src_xml = re.sub(r'-%}(?:(?!<w:t[ >]).)*?<w:t[^>]*?>', '%}', src_xml, flags=re.DOTALL)
for y in ['tr', 'tc', 'p', 'r']:
# replace into xml code the row/paragraph/run containing
# {%y xxx %} or {{y xxx}} template tag
# by {% xxx %} or {{ xx }} without any surronding <w:y> tags :
# This is mandatory to have jinja2 generating correct xml code
pat = r'<w:%(y)s[ >](?:(?!<w:%(y)s[ >]).)*({%%|{{)%(y)s ([^}%%]*(?:%%}|}})).*?</w:%(y)s>' % {'y': y}
src_xml = re.sub(pat, r'\1 \2', src_xml, flags=re.DOTALL)
# add vMerge
# use {% vm %} to make this table cell and its copies be vertically merged within a {% for %}
def v_merge_tc(m):
def v_merge(m1):
return (
'<w:vMerge w:val="{% if loop.first %}restart{% else %}continue{% endif %}"/>' +
m1.group(1) + # Everything between ``</w:tcPr>`` and ``<w:t>``.
"{% if loop.first %}" +
m1.group(2) + # Everything before ``{% vm %}``.
m1.group(3) + # Everything after ``{% vm %}``.
"{% endif %}" +
m1.group(4) # ``</w:t>``.
)
return re.sub(
r'(</w:tcPr[ >].*?<w:t(?:.*?)>)(.*?)(?:{%\s*vm\s*%})(.*?)(</w:t>)',
v_merge,
m.group(), # Everything between ``</w:tc>`` and ``</w:tc>`` with ``{% vm %}`` inside.
flags=re.DOTALL,
)
src_xml = re.sub(r'<w:tc[ >](?:(?!<w:tc[ >]).)*?{%\s*vm\s*%}.*?</w:tc[ >]',
v_merge_tc, src_xml, flags=re.DOTALL)
# Use ``{% hm %}`` to make table cell become horizontally merged within
# a ``{% for %}``.
def h_merge_tc(m):
xml_to_patch = m.group() # Everything between ``</w:tc>`` and ``</w:tc>`` with ``{% hm %}`` inside.
def with_gridspan(m1):
return (
m1.group(1) + # ``w:gridSpan w:val="``.
'{{ ' + m1.group(2) + ' * loop.length }}' + # Content of ``w:val``, multiplied by loop length.
m1.group(3) # Closing quotation mark.
)
def without_gridspan(m2):
return (
'<w:gridSpan w:val="{{ loop.length }}"/>' +
m2.group(1) + # Everything between ``</w:tcPr>`` and ``<w:t>``.
m2.group(2) + # Everything before ``{% hm %}``.
m2.group(3) + # Everything after ``{% hm %}``.
m2.group(4) # ``</w:t>``.
)
if re.search(r'w:gridSpan', xml_to_patch):
# Simple case, there's already ``gridSpan``, multiply its value.
xml = re.sub(
r'(w:gridSpan w:val=")(\d+)(")',
with_gridspan,
xml_to_patch,
flags=re.DOTALL,
)
xml = re.sub(
r'{%\s*hm\s*%}',
'',
xml, # Patched xml.
flags=re.DOTALL,
)
else:
# There're no ``gridSpan``, add one.
xml = re.sub(
r'(</w:tcPr[ >].*?<w:t(?:.*?)>)(.*?)(?:{%\s*hm\s*%})(.*?)(</w:t>)',
without_gridspan,
xml_to_patch,
flags=re.DOTALL,
)
# Discard every other cell generated in loop.
return "{% if loop.first %}" + xml + "{% endif %}"
src_xml = re.sub(r'<w:tc[ >](?:(?!<w:tc[ >]).)*?{%\s*hm\s*%}.*?</w:tc[ >]',
h_merge_tc, src_xml, flags=re.DOTALL)
def clean_tags(m):
return (m.group(0)
.replace(r"&#8216;", "'")
.replace('&lt;', '<')
.replace('&gt;', '>')
.replace(u'', u'"')
.replace(u'', u'"')
.replace(u"", u"'")
.replace(u"", u"'"))
src_xml = re.sub(r'(?<=\{[\{%])(.*?)(?=[\}%]})', clean_tags, src_xml)
return src_xml
def render_xml_part(self, src_xml, part, context, jinja_env=None):
src_xml = src_xml.replace(r'<w:p>', '\n<w:p>')
try:
self.current_rendering_part = part
if jinja_env:
template = jinja_env.from_string(src_xml)
else:
template = Template(src_xml)
dst_xml = template.render(context)
except TemplateError as exc:
if hasattr(exc, 'lineno') and exc.lineno is not None:
line_number = max(exc.lineno - 4, 0)
exc.docx_context = map(lambda x: re.sub(r'<[^>]+>', '', x),
src_xml.splitlines()[line_number:(line_number + 7)])
raise exc
dst_xml = dst_xml.replace('\n<w:p>', '<w:p>')
dst_xml = (dst_xml
.replace('{_{', '{{')
.replace('}_}', '}}')
.replace('{_%', '{%')
.replace('%_}', '%}'))
dst_xml = self.resolve_listing(dst_xml)
return dst_xml
def resolve_listing(self, xml):
def resolve_text(run_properties, paragraph_properties, m):
xml = m.group(0).replace('\t', '</w:t></w:r>'
'<w:r>%s<w:tab/></w:r>'
'<w:r>%s<w:t xml:space="preserve">' % (run_properties, run_properties))
xml = xml.replace('\a', '</w:t></w:r></w:p>'
'<w:p>%s<w:r>%s<w:t xml:space="preserve">' % (paragraph_properties, run_properties))
xml = xml.replace('\n', '</w:t><w:br/><w:t xml:space="preserve">')
xml = xml.replace('\f', '</w:t></w:r></w:p>'
'<w:p><w:r><w:br w:type="page"/></w:r></w:p>'
'<w:p>%s<w:r>%s<w:t xml:space="preserve">' % (paragraph_properties, run_properties))
return xml
def resolve_run(paragraph_properties, m):
run_properties = re.search(r'<w:rPr>.*?</w:rPr>', m.group(0))
run_properties = run_properties.group(0) if run_properties else ''
return re.sub(r'<w:t(?:[^>]*)?>.*?</w:t>',
lambda x: resolve_text(run_properties, paragraph_properties, x), m.group(0),
flags=re.DOTALL)
def resolve_paragraph(m):
paragraph_properties = re.search(r'<w:pPr>.*?</w:pPr>', m.group(0))
paragraph_properties = paragraph_properties.group(0) if paragraph_properties else ''
return re.sub(r'<w:r(?:[^>]*)?>.*?</w:r>',
lambda x: resolve_run(paragraph_properties, x),
m.group(0), flags=re.DOTALL)
xml = re.sub(r'<w:p(?:[^>]*)?>.*?</w:p>', resolve_paragraph, xml, flags=re.DOTALL)
return xml
def build_xml(self, context, jinja_env=None):
xml = self.get_xml()
xml = self.patch_xml(xml)
xml = self.render_xml_part(xml, self.docx._part, context, jinja_env)
return xml
def map_tree(self, tree):
root = self.docx._element
body = root.body
root.replace(body, tree)
def get_headers_footers(self, uri):
for relKey, val in self.docx._part._rels.items():
if (val.reltype == uri) and (val.target_part.blob):
yield relKey, val.target_part
def get_part_xml(self, part):
return self.xml_to_string(parse_xml(part.blob))
def get_headers_footers_encoding(self, xml):
m = re.match(r'<\?xml[^\?]+\bencoding="([^"]+)"', xml, re.I)
if m:
return m.group(1)
return 'utf-8'
def build_headers_footers_xml(self, context, uri, jinja_env=None):
for relKey, part in self.get_headers_footers(uri):
xml = self.get_part_xml(part)
encoding = self.get_headers_footers_encoding(xml)
xml = self.patch_xml(xml)
xml = self.render_xml_part(xml, part, context, jinja_env)
yield relKey, xml.encode(encoding)
def map_headers_footers_xml(self, relKey, xml):
part = self.docx._part._rels[relKey].target_part
new_part = XmlPart.load(part.partname, part.content_type, xml, part.package)
for rId, rel in part.rels.items():
new_part.load_rel(rel.reltype, rel._target, rel.rId, rel.is_external)
self.docx._part._rels[relKey]._target = new_part
def render(self, context, jinja_env=None, autoescape=False):
if autoescape:
if not jinja_env:
jinja_env = Environment(autoescape=autoescape)
else:
jinja_env.autoescape = autoescape
# Body
xml_src = self.build_xml(context, jinja_env)
# fix tables if needed
tree = self.fix_tables(xml_src)
self.map_tree(tree)
# Headers
headers = self.build_headers_footers_xml(context, self.HEADER_URI,
jinja_env)
for relKey, xml in headers:
self.map_headers_footers_xml(relKey, xml)
# Footers
footers = self.build_headers_footers_xml(context, self.FOOTER_URI,
jinja_env)
for relKey, xml in footers:
self.map_headers_footers_xml(relKey, xml)
# using of TC tag in for cycle can cause that count of columns does not
# correspond to real count of columns in row. This function is able to fix it.
def fix_tables(self, xml):
parser = etree.XMLParser(recover=True)
tree = etree.fromstring(xml, parser=parser)
# get namespace
ns = '{' + tree.nsmap['w'] + '}'
# walk trough xml and find table
for t in tree.iter(ns+'tbl'):
tblGrid = t.find(ns+'tblGrid')
columns = tblGrid.findall(ns+'gridCol')
to_add = 0
# walk trough all rows and try to find if there is higher cell count
for r in t.iter(ns+'tr'):
cells = r.findall(ns+'tc')
if (len(columns) + to_add) < len(cells):
to_add = len(cells) - len(columns)
# is neccessary to add columns?
if to_add > 0:
# at first, calculate width of table according to columns
# (we want to preserve it)
width = 0.0
new_average = None
for c in columns:
if not c.get(ns+'w') is None:
width += float(c.get(ns+'w'))
# try to keep proportion of table
if width > 0:
old_average = width / len(columns)
new_average = width / (len(columns) + to_add)
# scale the old columns
for c in columns:
c.set(ns+'w', str(int(float(c.get(ns+'w')) *
new_average/old_average)))
# add new columns
for i in range(to_add):
etree.SubElement(tblGrid, ns+'gridCol',
{ns+'w': str(int(new_average))})
# Refetch columns after columns addition.
columns = tblGrid.findall(ns + 'gridCol')
columns_len = len(columns)
cells_len_max = 0
def get_cell_len(total, cell):
tc_pr = cell.find(ns + 'tcPr')
grid_span = None if tc_pr is None else tc_pr.find(ns + 'gridSpan')
if grid_span is not None:
return total + int(grid_span.get(ns + 'val'))
return total + 1
# Calculate max of table cells to compare with `gridCol`.
for r in t.iter(ns + 'tr'):
cells = r.findall(ns + 'tc')
cells_len = functools.reduce(get_cell_len, cells, 0)
cells_len_max = max(cells_len_max, cells_len)
to_remove = columns_len - cells_len_max
# If after the loop, there're less columns, than
# originally was, remove extra `gridCol` declarations.
if to_remove > 0:
# Have to keep track of the removed width to scale the
# table back to its original width.
removed_width = 0.0
for c in columns[-to_remove:]:
removed_width += float(c.get(ns + 'w'))
tblGrid.remove(c)
columns_left = tblGrid.findall(ns + 'gridCol')
# Distribute `removed_width` across all columns that has
# left after extras removal.
extra_space = 0
if len(columns_left) > 0:
extra_space = removed_width / len(columns_left)
extra_space = int(extra_space)
for c in columns_left:
c.set(ns+'w', str(int(float(c.get(ns+'w')) + extra_space)))
return tree
def new_subdoc(self, docpath=None):
return Subdoc(self, docpath)
@staticmethod
def get_file_crc(file_obj):
if hasattr(file_obj, 'read'):
buf = file_obj.read()
else:
with open(file_obj, 'rb') as fh:
buf = fh.read()
crc = (binascii.crc32(buf) & 0xFFFFFFFF)
return crc
def replace_media(self, src_file, dst_file):
"""Replace one media by another one into a docx
This has been done mainly because it is not possible to add images in
docx header/footer.
With this function, put a dummy picture in your header/footer,
then specify it with its replacement in this function using the file path
or file-like objects.
Syntax: tpl.replace_media('dummy_media_to_replace.png','media_to_paste.jpg')
-- or --
tpl.replace_media(io.BytesIO(image_stream), io.BytesIO(new_image_stream))
Note: for images, the aspect ratio will be the same as the replaced image
Note2: it is important to have the source media file as it is required
to calculate its CRC to find them in the docx
"""
crc = self.get_file_crc(src_file)
if hasattr(dst_file, 'read'):
self.crc_to_new_media[crc] = dst_file.read()
else:
with open(dst_file, 'rb') as fh:
self.crc_to_new_media[crc] = fh.read()
def replace_pic(self, embedded_file, dst_file):
"""Replace embedded picture with original-name given by embedded_file.
(give only the file basename, not the full path)
The new picture is given by dst_file (either a filename or a file-like
object)
Notes:
1) embedded_file and dst_file must have the same extension/format
in case dst_file is a file-like object, no check is done on
format compatibility
2) the aspect ratio will be the same as the replaced image
3) There is no need to keep the original file (this is not the case
for replace_embedded and replace_media)
"""
if hasattr(dst_file, 'read'):
# NOTE: file extension not checked
self.pics_to_replace[embedded_file] = dst_file.read()
else:
with open(dst_file, 'rb') as fh:
self.pics_to_replace[embedded_file] = fh.read()
def replace_embedded(self, src_file, dst_file):
"""Replace one embedded object by another one into a docx
This has been done mainly because it is not possible to add images
in docx header/footer.
With this function, put a dummy picture in your header/footer,
then specify it with its replacement in this function
Syntax: tpl.replace_embedded('dummy_doc.docx','doc_to_paste.docx')
Note2 : it is important to have the source file as it is required to
calculate its CRC to find them in the docx
"""
with open(dst_file, 'rb') as fh:
crc = self.get_file_crc(src_file)
self.crc_to_new_embedded[crc] = fh.read()
def replace_zipname(self, zipname, dst_file):
"""Replace one file in the docx file
First note that a MSWord .docx file is in fact a zip file.
This method can be used to replace document embedded in the docx template.
Some embedded document may have been modified by MSWord while saving
the template : thus replace_embedded() cannot be used as CRC is not the
same as the original file.
This method works for embedded MSWord file like Excel or PowerPoint file,
but won't work for others like PDF, Python or even Text files :
For these ones, MSWord generate an oleObjectNNN.bin file which is no
use to be replaced as it is encoded.
Syntax:
tpl.replace_zipname(
'word/embeddings/Feuille_Microsoft_Office_Excel1.xlsx',
'my_excel_file.xlsx')
The zipname is the one you can find when you open docx with WinZip,
7zip (Windows) or unzip -l (Linux). The zipname starts with
"word/embeddings/". Note that the file is renamed by MSWord,
so you have to guess a little bit...
"""
with open(dst_file, 'rb') as fh:
self.zipname_to_replace[zipname] = fh.read()
def post_processing(self, docx_file):
if (self.crc_to_new_media or
self.crc_to_new_embedded or
self.zipname_to_replace):
if hasattr(docx_file, 'read'):
tmp_file = io.BytesIO()
DocxTemplate(docx_file).save(tmp_file)
tmp_file.seek(0)
docx_file.seek(0)
docx_file.truncate()
docx_file.seek(0)
else:
tmp_file = '%s_docxtpl_before_replace_medias' % docx_file
os.rename(docx_file, tmp_file)
with zipfile.ZipFile(tmp_file) as zin:
with zipfile.ZipFile(docx_file, 'w') as zout:
for item in zin.infolist():
buf = zin.read(item.filename)
if item.filename in self.zipname_to_replace:
zout.writestr(item, self.zipname_to_replace[item.filename])
elif (item.filename.startswith('word/media/') and
item.CRC in self.crc_to_new_media):
zout.writestr(item, self.crc_to_new_media[item.CRC])
elif (item.filename.startswith('word/embeddings/') and
item.CRC in self.crc_to_new_embedded):
zout.writestr(item, self.crc_to_new_embedded[item.CRC])
else:
zout.writestr(item, buf)
if not hasattr(tmp_file, 'read'):
os.remove(tmp_file)
if hasattr(docx_file, 'read'):
docx_file.seek(0)
def pre_processing(self):
if self.pics_to_replace:
self._replace_pics()
def _replace_pics(self):
"""Replaces pictures xml tags in the docx template with pictures provided by the user"""
replaced_pics = {key: False for key in self.pics_to_replace}
# Main document
part = self.docx.part
self._replace_docx_part_pics(part, replaced_pics)
# Header/Footer
for relid, rel in six.iteritems(part.rels):
if rel.reltype in (REL_TYPE.HEADER, REL_TYPE.FOOTER):
self._replace_docx_part_pics(rel.target_part, replaced_pics)
# make sure all template images defined by user were replaced
for img_id, replaced in replaced_pics.items():
if not replaced:
raise ValueError(
"Picture %s not found in the docx template" % img_id
)
def get_pic_map(self):
return self.pic_map
def _replace_docx_part_pics(self, doc_part, replaced_pics):
et = etree.fromstring(doc_part.blob)
part_map = {}
gds = et.xpath('//a:graphic/a:graphicData', namespaces=docx.oxml.ns.nsmap)
for gd in gds:
rel = None
# Either IMAGE, CHART, SMART_ART, ...
try:
if gd.attrib['uri'] == docx.oxml.ns.nsmap['pic']:
# Either PICTURE or LINKED_PICTURE image
blip = gd.xpath('pic:pic/pic:blipFill/a:blip',
namespaces=docx.oxml.ns.nsmap)[0]
dest = blip.xpath('@r:embed', namespaces=docx.oxml.ns.nsmap)
if len(dest) > 0:
rel = dest[0]
else:
continue
else:
continue
non_visual_properties = 'pic:pic/pic:nvPicPr/pic:cNvPr/'
filename = gd.xpath('%s@name' % non_visual_properties,
namespaces=docx.oxml.ns.nsmap)[0]
titles = gd.xpath('%s@title' % non_visual_properties,
namespaces=docx.oxml.ns.nsmap)
if titles:
title = titles[0]
else:
title = ""
descriptions = gd.xpath('%s@descr' % non_visual_properties,
namespaces=docx.oxml.ns.nsmap)
if descriptions:
description = descriptions[0]
else:
description = ""
part_map[filename] = (doc_part.rels[rel].target_ref,
doc_part.rels[rel].target_part)
# replace data
for img_id, img_data in six.iteritems(self.pics_to_replace):
if img_id == filename or img_id == title or img_id == description:
part_map[filename][1]._blob = img_data
replaced_pics[img_id] = True
break
# FIXME: figure out what exceptions are thrown here and catch more specific exceptions
except Exception:
continue
self.pic_map.update(part_map)
def build_url_id(self, url):
return self.docx._part.relate_to(url, REL_TYPE.HYPERLINK,
is_external=True)
def save(self, filename, *args, **kwargs):
self.pre_processing()
self.docx.save(filename, *args, **kwargs)
self.post_processing(filename)
def get_undeclared_template_variables(self, jinja_env=None):
xml = self.get_xml()
xml = self.patch_xml(xml)
for uri in [self.HEADER_URI, self.FOOTER_URI]:
for relKey, part in self.get_headers_footers(uri):
_xml = self.get_part_xml(part)
xml += self.patch_xml(_xml)
if jinja_env:
env = jinja_env
else:
env = Environment()
parse_content = env.parse(xml)
return meta.find_undeclared_variables(parse_content)
undeclared_template_variables = property(get_undeclared_template_variables)
class Subdoc(object):
""" Class for subdocument to insert into master document """
def __init__(self, tpl, docpath=None):
self.tpl = tpl
self.docx = tpl.get_docx()
self.subdocx = Document(docpath)
self.subdocx._part = self.docx._part
def __getattr__(self, name):
return getattr(self.subdocx, name)
def _get_xml(self):
if self.subdocx._element.body.sectPr is not None:
self.subdocx._element.body.remove(self.subdocx._element.body.sectPr)
xml = re.sub(r'</?w:body[^>]*>', '', etree.tostring(
self.subdocx._element.body, encoding='unicode', pretty_print=False))
return xml
def __unicode__(self):
return self._get_xml()
def __str__(self):
return self._get_xml()
def __html__(self):
return self._get_xml()
class RichText(object):
""" class to generate Rich Text when using templates variables
This is much faster than using Subdoc class,
but this only for texts INSIDE an existing paragraph.
"""
def __init__(self, text=None, **text_prop):
self.xml = ''
if text:
self.add(text, **text_prop)
def add(self, text,
style=None,
color=None,
highlight=None,
size=None,
subscript=None,
superscript=None,
bold=False,
italic=False,
underline=False,
strike=False,
font=None,
url_id=None):
# If a RichText is added
if isinstance(text, RichText):
self.xml += text.xml
return
# If not a string : cast to string (ex: int, dict etc...)
if not isinstance(text, (six.text_type, six.binary_type)):
text = six.text_type(text)
if not isinstance(text, six.text_type):
text = text.decode('utf-8', errors='ignore')
text = escape(text)
prop = u''
if style:
prop += u'<w:rStyle w:val="%s"/>' % style
if color:
if color[0] == '#':
color = color[1:]
prop += u'<w:color w:val="%s"/>' % color
if highlight:
if highlight[0] == '#':
highlight = highlight[1:]
prop += u'<w:highlight w:val="%s"/>' % highlight
if size:
prop += u'<w:sz w:val="%s"/>' % size
prop += u'<w:szCs w:val="%s"/>' % size
if subscript:
prop += u'<w:vertAlign w:val="subscript"/>'
if superscript:
prop += u'<w:vertAlign w:val="superscript"/>'
if bold:
prop += u'<w:b/>'
if italic:
prop += u'<w:i/>'
if underline:
if underline not in ['single', 'double', 'thick', 'dotted', 'dash', 'dotDash', 'dotDotDash', 'wave']:
underline = 'single'
prop += u'<w:u w:val="%s"/>' % underline
if strike:
prop += u'<w:strike/>'
if font:
prop += (u'<w:rFonts w:ascii="{font}" w:hAnsi="{font}" w:cs="{font}"/>'
.format(font=font))
xml = u'<w:r>'
if prop:
xml += u'<w:rPr>%s</w:rPr>' % prop
xml += u'<w:t xml:space="preserve">%s</w:t></w:r>' % text
if url_id:
xml = (u'<w:hyperlink r:id="%s" w:tgtFrame="_blank">%s</w:hyperlink>'
% (url_id, xml))
self.xml += xml
def __unicode__(self):
return self.xml
def __str__(self):
return self.xml
def __html__(self):
return self.xml
R = RichText
class Listing(object):
r"""class to manage \n and \a without to use RichText,
by this way you keep the current template styling
use {{ mylisting }} in your template and
context={ mylisting:Listing(the_listing_with_newlines) }
"""
def __init__(self, text):
# If not a string : cast to string (ex: int, dict etc...)
if not isinstance(text, (six.text_type, six.binary_type)):
text = six.text_type(text)
self.xml = escape(text)
def __unicode__(self):
return self.xml
def __str__(self):
return self.xml
def __html__(self):
return self.xml
class InlineImage(object):
"""Class to generate an inline image
This is much faster than using Subdoc class.
"""
tpl = None
image_descriptor = None
width = None
height = None
def __init__(self, tpl, image_descriptor, width=None, height=None):
self.tpl, self.image_descriptor = tpl, image_descriptor
self.width, self.height = width, height
def _insert_image(self):
pic = self.tpl.current_rendering_part.new_pic_inline(
self.image_descriptor,
self.width,
self.height
).xml
return '</w:t></w:r><w:r><w:drawing>%s</w:drawing></w:r><w:r>' \
'<w:t xml:space="preserve">' % pic
def __unicode__(self):
return self._insert_image()
def __str__(self):
return self._insert_image()
def __html__(self):
return self._insert_image()

38
docxtpl/inline_image.py Normal file
View File

@ -0,0 +1,38 @@
# -*- coding: utf-8 -*-
"""
Created : 2021-07-30
@author: Eric Lapouyade
"""
class InlineImage(object):
"""Class to generate an inline image
This is much faster than using Subdoc class.
"""
tpl = None
image_descriptor = None
width = None
height = None
def __init__(self, tpl, image_descriptor, width=None, height=None):
self.tpl, self.image_descriptor = tpl, image_descriptor
self.width, self.height = width, height
def _insert_image(self):
pic = self.tpl.current_rendering_part.new_pic_inline(
self.image_descriptor,
self.width,
self.height
).xml
return '</w:t></w:r><w:r><w:drawing>%s</w:drawing></w:r><w:r>' \
'<w:t xml:space="preserve">' % pic
def __unicode__(self):
return self._insert_image()
def __str__(self):
return self._insert_image()
def __html__(self):
return self._insert_image()

34
docxtpl/listing.py Normal file
View File

@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
"""
Created : 2021-07-30
@author: Eric Lapouyade
"""
import six
try:
from html import escape
except ImportError:
# cgi.escape is deprecated in python 3.7
from cgi import escape
class Listing(object):
r"""class to manage \n and \a without to use RichText,
by this way you keep the current template styling
use {{ mylisting }} in your template and
context={ mylisting:Listing(the_listing_with_newlines) }
"""
def __init__(self, text):
# If not a string : cast to string (ex: int, dict etc...)
if not isinstance(text, (six.text_type, six.binary_type)):
text = six.text_type(text)
self.xml = escape(text)
def __unicode__(self):
return self.xml
def __str__(self):
return self.xml
def __html__(self):
return self.xml

103
docxtpl/richtext.py Normal file
View File

@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-
"""
Created : 2021-07-30
@author: Eric Lapouyade
"""
import six
try:
from html import escape
except ImportError:
# cgi.escape is deprecated in python 3.7
from cgi import escape
class RichText(object):
""" class to generate Rich Text when using templates variables
This is much faster than using Subdoc class,
but this only for texts INSIDE an existing paragraph.
"""
def __init__(self, text=None, **text_prop):
self.xml = ''
if text:
self.add(text, **text_prop)
def add(self, text,
style=None,
color=None,
highlight=None,
size=None,
subscript=None,
superscript=None,
bold=False,
italic=False,
underline=False,
strike=False,
font=None,
url_id=None):
# If a RichText is added
if isinstance(text, RichText):
self.xml += text.xml
return
# If not a string : cast to string (ex: int, dict etc...)
if not isinstance(text, (six.text_type, six.binary_type)):
text = six.text_type(text)
if not isinstance(text, six.text_type):
text = text.decode('utf-8', errors='ignore')
text = escape(text)
prop = u''
if style:
prop += u'<w:rStyle w:val="%s"/>' % style
if color:
if color[0] == '#':
color = color[1:]
prop += u'<w:color w:val="%s"/>' % color
if highlight:
if highlight[0] == '#':
highlight = highlight[1:]
prop += u'<w:highlight w:val="%s"/>' % highlight
if size:
prop += u'<w:sz w:val="%s"/>' % size
prop += u'<w:szCs w:val="%s"/>' % size
if subscript:
prop += u'<w:vertAlign w:val="subscript"/>'
if superscript:
prop += u'<w:vertAlign w:val="superscript"/>'
if bold:
prop += u'<w:b/>'
if italic:
prop += u'<w:i/>'
if underline:
if underline not in ['single', 'double', 'thick', 'dotted', 'dash', 'dotDash', 'dotDotDash', 'wave']:
underline = 'single'
prop += u'<w:u w:val="%s"/>' % underline
if strike:
prop += u'<w:strike/>'
if font:
prop += (u'<w:rFonts w:ascii="{font}" w:hAnsi="{font}" w:cs="{font}"/>'
.format(font=font))
xml = u'<w:r>'
if prop:
xml += u'<w:rPr>%s</w:rPr>' % prop
xml += u'<w:t xml:space="preserve">%s</w:t></w:r>' % text
if url_id:
xml = (u'<w:hyperlink r:id="%s" w:tgtFrame="_blank">%s</w:hyperlink>'
% (url_id, xml))
self.xml += xml
def __unicode__(self):
return self.xml
def __str__(self):
return self.xml
def __html__(self):
return self.xml
R = RichText

99
docxtpl/subdoc.py Normal file
View File

@ -0,0 +1,99 @@
# -*- coding: utf-8 -*-
"""
Created : 2021-07-30
@author: Eric Lapouyade
"""
from copy import deepcopy
from docx import Document
from docx.oxml import CT_SectPr
from docx.opc.constants import RELATIONSHIP_TYPE as RT
from docxcompose.properties import CustomProperties
from docxcompose.utils import xpath
from docxcompose.composer import Composer
from docxcompose.utils import NS
from lxml import etree
import re
class SubdocComposer(Composer):
def attach_parts(self, doc, remove_property_fields=True):
""" Attach docx parts instead of appending the whole document
thus subdoc insertion can be delegated to jinja2 """
self.reset_reference_mapping()
# Remove custom property fields but keep the values
if remove_property_fields:
cprops = CustomProperties(doc)
for name in cprops.keys():
cprops.dissolve_fields(name)
self._create_style_id_mapping(doc)
for element in doc.element.body:
if isinstance(element, CT_SectPr):
continue
element = deepcopy(element)
self.add_referenced_parts(doc.part, self.doc.part, element)
self.add_styles(doc, element)
self.add_numberings(doc, element)
self.restart_first_numbering(doc, element)
self.add_images(doc, element)
self.add_diagrams(doc, element)
self.add_shapes(doc, element)
self.add_footnotes(doc, element)
self.remove_header_and_footer_references(doc, element)
self.add_styles_from_other_parts(doc)
self.renumber_bookmarks()
self.renumber_docpr_ids()
self.renumber_nvpicpr_ids()
self.fix_section_types(doc)
def add_diagrams(self, doc, element):
# While waiting docxcompose 1.3.3
dgm_rels = xpath(element, './/dgm:relIds[@r:dm]')
for dgm_rel in dgm_rels:
for item, rt_type in (
('dm', RT.DIAGRAM_DATA),
('lo', RT.DIAGRAM_LAYOUT),
('qs', RT.DIAGRAM_QUICK_STYLE),
('cs', RT.DIAGRAM_COLORS)
):
dm_rid = dgm_rel.get('{%s}%s' % (NS['r'], item))
dm_part = doc.part.rels[dm_rid].target_part
new_rid = self.doc.part.relate_to(dm_part, rt_type)
dgm_rel.set('{%s}%s' % (NS['r'], item), new_rid)
class Subdoc(object):
""" Class for subdocument to insert into master document """
def __init__(self, tpl, docpath=None):
self.tpl = tpl
self.docx = tpl.get_docx()
self.subdocx = Document(docpath)
if docpath:
compose = SubdocComposer(tpl)
compose.attach_parts(self.subdocx)
else:
self.subdocx._part = self.docx._part
def __getattr__(self, name):
return super().getattr(self.subdocx, name)
def _get_xml(self):
if self.subdocx._element.body.sectPr is not None:
self.subdocx._element.body.remove(self.subdocx._element.body.sectPr)
xml = re.sub(r'</?w:body[^>]*>', '', etree.tostring(
self.subdocx._element.body, encoding='unicode', pretty_print=False))
return xml
def __unicode__(self):
return self._get_xml()
def __str__(self):
return self._get_xml()
def __html__(self):
return self._get_xml()

677
docxtpl/template.py Normal file
View File

@ -0,0 +1,677 @@
# -*- coding: utf-8 -*-
"""
Created : 2015-03-12
@author: Eric Lapouyade
"""
from .subdoc import Subdoc
import functools
import io
from lxml import etree
from docx import Document
from docx.opc.oxml import parse_xml
from docx.opc.part import XmlPart
import docx.oxml.ns
from docx.opc.constants import RELATIONSHIP_TYPE as REL_TYPE
from jinja2 import Environment, Template, meta
from jinja2.exceptions import TemplateError
try:
from html import escape
except ImportError:
# cgi.escape is deprecated in python 3.7
from cgi import escape
import re
import six
import binascii
import os
import zipfile
class DocxTemplate(object):
""" Class for managing docx files as they were jinja2 templates """
HEADER_URI = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header"
FOOTER_URI = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer"
def __init__(self, docx):
self.docx = Document(docx)
self.crc_to_new_media = {}
self.crc_to_new_embedded = {}
self.zipname_to_replace = {}
self.pics_to_replace = {}
self.pic_map = {}
self.current_rendering_part = None
def __getattr__(self, name):
return getattr(self.docx, name)
def xml_to_string(self, xml, encoding='unicode'):
# Be careful : pretty_print MUST be set to False, otherwise patch_xml()
# won't work properly
return etree.tostring(xml, encoding='unicode', pretty_print=False)
def get_docx(self):
return self.docx
def get_xml(self):
return self.xml_to_string(self.docx._element.body)
def write_xml(self, filename):
with open(filename, 'w') as fh:
fh.write(self.get_xml())
def patch_xml(self, src_xml):
""" Make a lots of cleanning to have a raw xml understandable by jinja2 :
strip all unnecessary xml tags, manage table cell background color and colspan,
unescape html entities, etc... """
# replace {<something>{ by {{ ( works with {{ }} {% and %} )
src_xml = re.sub(r'(?<={)(<[^>]*>)+(?=[\{%])|(?<=[%\}])(<[^>]*>)+(?=\})', '',
src_xml, flags=re.DOTALL)
# replace {{<some tags>jinja2 stuff<some other tags>}} by {{jinja2 stuff}}
# same thing with {% ... %}
# "jinja2 stuff" could a variable, a 'if' etc... anything jinja2 will understand
def striptags(m):
return re.sub('</w:t>.*?(<w:t>|<w:t [^>]*>)', '',
m.group(0), flags=re.DOTALL)
src_xml = re.sub(r'{%(?:(?!%}).)*|{{(?:(?!}}).)*', striptags,
src_xml, flags=re.DOTALL)
# manage table cell colspan
def colspan(m):
cell_xml = m.group(1) + m.group(3)
cell_xml = re.sub(r'<w:r[ >](?:(?!<w:r[ >]).)*<w:t></w:t>.*?</w:r>',
'', cell_xml, flags=re.DOTALL)
cell_xml = re.sub(r'<w:gridSpan[^/]*/>', '', cell_xml, count=1)
return re.sub(r'(<w:tcPr[^>]*>)', r'\1<w:gridSpan w:val="{{%s}}"/>'
% m.group(2), cell_xml)
src_xml = re.sub(r'(<w:tc[ >](?:(?!<w:tc[ >]).)*){%\s*colspan\s+([^%]*)\s*%}(.*?</w:tc>)',
colspan, src_xml, flags=re.DOTALL)
# manage table cell background color
def cellbg(m):
cell_xml = m.group(1) + m.group(3)
cell_xml = re.sub(r'<w:r[ >](?:(?!<w:r[ >]).)*<w:t></w:t>.*?</w:r>',
'', cell_xml, flags=re.DOTALL)
cell_xml = re.sub(r'<w:shd[^/]*/>', '', cell_xml, count=1)
return re.sub(r'(<w:tcPr[^>]*>)',
r'\1<w:shd w:val="clear" w:color="auto" w:fill="{{%s}}"/>'
% m.group(2), cell_xml)
src_xml = re.sub(r'(<w:tc[ >](?:(?!<w:tc[ >]).)*){%\s*cellbg\s+([^%]*)\s*%}(.*?</w:tc>)',
cellbg, src_xml, flags=re.DOTALL)
# ensure space preservation
src_xml = re.sub(r'<w:t>((?:(?!<w:t>).)*)({{.*?}}|{%.*?%})',
r'<w:t xml:space="preserve">\1\2',
src_xml, flags=re.DOTALL)
src_xml = re.sub(r'({{r\s.*?}}|{%r\s.*?%})',
r'</w:t></w:r><w:r><w:t xml:space="preserve">\1</w:t></w:r><w:r><w:t xml:space="preserve">',
src_xml, flags=re.DOTALL)
# {%- will merge with previous paragraph text
src_xml = re.sub(r'</w:t>(?:(?!</w:t>).)*?{%-', '{%', src_xml, flags=re.DOTALL)
# -%} will merge with next paragraph text
src_xml = re.sub(r'-%}(?:(?!<w:t[ >]).)*?<w:t[^>]*?>', '%}', src_xml, flags=re.DOTALL)
for y in ['tr', 'tc', 'p', 'r']:
# replace into xml code the row/paragraph/run containing
# {%y xxx %} or {{y xxx}} template tag
# by {% xxx %} or {{ xx }} without any surronding <w:y> tags :
# This is mandatory to have jinja2 generating correct xml code
pat = r'<w:%(y)s[ >](?:(?!<w:%(y)s[ >]).)*({%%|{{)%(y)s ([^}%%]*(?:%%}|}})).*?</w:%(y)s>' % {'y': y}
src_xml = re.sub(pat, r'\1 \2', src_xml, flags=re.DOTALL)
# add vMerge
# use {% vm %} to make this table cell and its copies be vertically merged within a {% for %}
def v_merge_tc(m):
def v_merge(m1):
return (
'<w:vMerge w:val="{% if loop.first %}restart{% else %}continue{% endif %}"/>' +
m1.group(1) + # Everything between ``</w:tcPr>`` and ``<w:t>``.
"{% if loop.first %}" +
m1.group(2) + # Everything before ``{% vm %}``.
m1.group(3) + # Everything after ``{% vm %}``.
"{% endif %}" +
m1.group(4) # ``</w:t>``.
)
return re.sub(
r'(</w:tcPr[ >].*?<w:t(?:.*?)>)(.*?)(?:{%\s*vm\s*%})(.*?)(</w:t>)',
v_merge,
m.group(), # Everything between ``</w:tc>`` and ``</w:tc>`` with ``{% vm %}`` inside.
flags=re.DOTALL,
)
src_xml = re.sub(r'<w:tc[ >](?:(?!<w:tc[ >]).)*?{%\s*vm\s*%}.*?</w:tc[ >]',
v_merge_tc, src_xml, flags=re.DOTALL)
# Use ``{% hm %}`` to make table cell become horizontally merged within
# a ``{% for %}``.
def h_merge_tc(m):
xml_to_patch = m.group() # Everything between ``</w:tc>`` and ``</w:tc>`` with ``{% hm %}`` inside.
def with_gridspan(m1):
return (
m1.group(1) + # ``w:gridSpan w:val="``.
'{{ ' + m1.group(2) + ' * loop.length }}' + # Content of ``w:val``, multiplied by loop length.
m1.group(3) # Closing quotation mark.
)
def without_gridspan(m2):
return (
'<w:gridSpan w:val="{{ loop.length }}"/>' +
m2.group(1) + # Everything between ``</w:tcPr>`` and ``<w:t>``.
m2.group(2) + # Everything before ``{% hm %}``.
m2.group(3) + # Everything after ``{% hm %}``.
m2.group(4) # ``</w:t>``.
)
if re.search(r'w:gridSpan', xml_to_patch):
# Simple case, there's already ``gridSpan``, multiply its value.
xml = re.sub(
r'(w:gridSpan w:val=")(\d+)(")',
with_gridspan,
xml_to_patch,
flags=re.DOTALL,
)
xml = re.sub(
r'{%\s*hm\s*%}',
'',
xml, # Patched xml.
flags=re.DOTALL,
)
else:
# There're no ``gridSpan``, add one.
xml = re.sub(
r'(</w:tcPr[ >].*?<w:t(?:.*?)>)(.*?)(?:{%\s*hm\s*%})(.*?)(</w:t>)',
without_gridspan,
xml_to_patch,
flags=re.DOTALL,
)
# Discard every other cell generated in loop.
return "{% if loop.first %}" + xml + "{% endif %}"
src_xml = re.sub(r'<w:tc[ >](?:(?!<w:tc[ >]).)*?{%\s*hm\s*%}.*?</w:tc[ >]',
h_merge_tc, src_xml, flags=re.DOTALL)
def clean_tags(m):
return (m.group(0)
.replace(r"&#8216;", "'")
.replace('&lt;', '<')
.replace('&gt;', '>')
.replace(u'', u'"')
.replace(u'', u'"')
.replace(u"", u"'")
.replace(u"", u"'"))
src_xml = re.sub(r'(?<=\{[\{%])(.*?)(?=[\}%]})', clean_tags, src_xml)
return src_xml
def render_xml_part(self, src_xml, part, context, jinja_env=None):
src_xml = src_xml.replace(r'<w:p>', '\n<w:p>')
try:
self.current_rendering_part = part
if jinja_env:
template = jinja_env.from_string(src_xml)
else:
template = Template(src_xml)
dst_xml = template.render(context)
except TemplateError as exc:
if hasattr(exc, 'lineno') and exc.lineno is not None:
line_number = max(exc.lineno - 4, 0)
exc.docx_context = map(lambda x: re.sub(r'<[^>]+>', '', x),
src_xml.splitlines()[line_number:(line_number + 7)])
raise exc
dst_xml = dst_xml.replace('\n<w:p>', '<w:p>')
dst_xml = (dst_xml
.replace('{_{', '{{')
.replace('}_}', '}}')
.replace('{_%', '{%')
.replace('%_}', '%}'))
dst_xml = self.resolve_listing(dst_xml)
return dst_xml
def resolve_listing(self, xml):
def resolve_text(run_properties, paragraph_properties, m):
xml = m.group(0).replace('\t', '</w:t></w:r>'
'<w:r>%s<w:tab/></w:r>'
'<w:r>%s<w:t xml:space="preserve">' % (run_properties, run_properties))
xml = xml.replace('\a', '</w:t></w:r></w:p>'
'<w:p>%s<w:r>%s<w:t xml:space="preserve">' % (paragraph_properties, run_properties))
xml = xml.replace('\n', '</w:t><w:br/><w:t xml:space="preserve">')
xml = xml.replace('\f', '</w:t></w:r></w:p>'
'<w:p><w:r><w:br w:type="page"/></w:r></w:p>'
'<w:p>%s<w:r>%s<w:t xml:space="preserve">' % (paragraph_properties, run_properties))
return xml
def resolve_run(paragraph_properties, m):
run_properties = re.search(r'<w:rPr>.*?</w:rPr>', m.group(0))
run_properties = run_properties.group(0) if run_properties else ''
return re.sub(r'<w:t(?:[^>]*)?>.*?</w:t>',
lambda x: resolve_text(run_properties, paragraph_properties, x), m.group(0),
flags=re.DOTALL)
def resolve_paragraph(m):
paragraph_properties = re.search(r'<w:pPr>.*?</w:pPr>', m.group(0))
paragraph_properties = paragraph_properties.group(0) if paragraph_properties else ''
return re.sub(r'<w:r(?:[^>]*)?>.*?</w:r>',
lambda x: resolve_run(paragraph_properties, x),
m.group(0), flags=re.DOTALL)
xml = re.sub(r'<w:p(?:[^>]*)?>.*?</w:p>', resolve_paragraph, xml, flags=re.DOTALL)
return xml
def build_xml(self, context, jinja_env=None):
xml = self.get_xml()
xml = self.patch_xml(xml)
xml = self.render_xml_part(xml, self.docx._part, context, jinja_env)
return xml
def map_tree(self, tree):
root = self.docx._element
body = root.body
root.replace(body, tree)
def get_headers_footers(self, uri):
for relKey, val in self.docx._part._rels.items():
if (val.reltype == uri) and (val.target_part.blob):
yield relKey, val.target_part
def get_part_xml(self, part):
return self.xml_to_string(parse_xml(part.blob))
def get_headers_footers_encoding(self, xml):
m = re.match(r'<\?xml[^\?]+\bencoding="([^"]+)"', xml, re.I)
if m:
return m.group(1)
return 'utf-8'
def build_headers_footers_xml(self, context, uri, jinja_env=None):
for relKey, part in self.get_headers_footers(uri):
xml = self.get_part_xml(part)
encoding = self.get_headers_footers_encoding(xml)
xml = self.patch_xml(xml)
xml = self.render_xml_part(xml, part, context, jinja_env)
yield relKey, xml.encode(encoding)
def map_headers_footers_xml(self, relKey, xml):
part = self.docx._part._rels[relKey].target_part
new_part = XmlPart.load(part.partname, part.content_type, xml, part.package)
for rId, rel in part.rels.items():
new_part.load_rel(rel.reltype, rel._target, rel.rId, rel.is_external)
self.docx._part._rels[relKey]._target = new_part
def render(self, context, jinja_env=None, autoescape=False):
if autoescape:
if not jinja_env:
jinja_env = Environment(autoescape=autoescape)
else:
jinja_env.autoescape = autoescape
# Body
xml_src = self.build_xml(context, jinja_env)
# fix tables if needed
tree = self.fix_tables(xml_src)
self.map_tree(tree)
# Headers
headers = self.build_headers_footers_xml(context, self.HEADER_URI,
jinja_env)
for relKey, xml in headers:
self.map_headers_footers_xml(relKey, xml)
# Footers
footers = self.build_headers_footers_xml(context, self.FOOTER_URI,
jinja_env)
for relKey, xml in footers:
self.map_headers_footers_xml(relKey, xml)
# using of TC tag in for cycle can cause that count of columns does not
# correspond to real count of columns in row. This function is able to fix it.
def fix_tables(self, xml):
parser = etree.XMLParser(recover=True)
tree = etree.fromstring(xml, parser=parser)
# get namespace
ns = '{' + tree.nsmap['w'] + '}'
# walk trough xml and find table
for t in tree.iter(ns+'tbl'):
tblGrid = t.find(ns+'tblGrid')
columns = tblGrid.findall(ns+'gridCol')
to_add = 0
# walk trough all rows and try to find if there is higher cell count
for r in t.iter(ns+'tr'):
cells = r.findall(ns+'tc')
if (len(columns) + to_add) < len(cells):
to_add = len(cells) - len(columns)
# is neccessary to add columns?
if to_add > 0:
# at first, calculate width of table according to columns
# (we want to preserve it)
width = 0.0
new_average = None
for c in columns:
if not c.get(ns+'w') is None:
width += float(c.get(ns+'w'))
# try to keep proportion of table
if width > 0:
old_average = width / len(columns)
new_average = width / (len(columns) + to_add)
# scale the old columns
for c in columns:
c.set(ns+'w', str(int(float(c.get(ns+'w')) *
new_average/old_average)))
# add new columns
for i in range(to_add):
etree.SubElement(tblGrid, ns+'gridCol',
{ns+'w': str(int(new_average))})
# Refetch columns after columns addition.
columns = tblGrid.findall(ns + 'gridCol')
columns_len = len(columns)
cells_len_max = 0
def get_cell_len(total, cell):
tc_pr = cell.find(ns + 'tcPr')
grid_span = None if tc_pr is None else tc_pr.find(ns + 'gridSpan')
if grid_span is not None:
return total + int(grid_span.get(ns + 'val'))
return total + 1
# Calculate max of table cells to compare with `gridCol`.
for r in t.iter(ns + 'tr'):
cells = r.findall(ns + 'tc')
cells_len = functools.reduce(get_cell_len, cells, 0)
cells_len_max = max(cells_len_max, cells_len)
to_remove = columns_len - cells_len_max
# If after the loop, there're less columns, than
# originally was, remove extra `gridCol` declarations.
if to_remove > 0:
# Have to keep track of the removed width to scale the
# table back to its original width.
removed_width = 0.0
for c in columns[-to_remove:]:
removed_width += float(c.get(ns + 'w'))
tblGrid.remove(c)
columns_left = tblGrid.findall(ns + 'gridCol')
# Distribute `removed_width` across all columns that has
# left after extras removal.
extra_space = 0
if len(columns_left) > 0:
extra_space = removed_width / len(columns_left)
extra_space = int(extra_space)
for c in columns_left:
c.set(ns+'w', str(int(float(c.get(ns+'w')) + extra_space)))
return tree
def new_subdoc(self, docpath=None):
return Subdoc(self, docpath)
@staticmethod
def get_file_crc(file_obj):
if hasattr(file_obj, 'read'):
buf = file_obj.read()
else:
with open(file_obj, 'rb') as fh:
buf = fh.read()
crc = (binascii.crc32(buf) & 0xFFFFFFFF)
return crc
def replace_media(self, src_file, dst_file):
"""Replace one media by another one into a docx
This has been done mainly because it is not possible to add images in
docx header/footer.
With this function, put a dummy picture in your header/footer,
then specify it with its replacement in this function using the file path
or file-like objects.
Syntax: tpl.replace_media('dummy_media_to_replace.png','media_to_paste.jpg')
-- or --
tpl.replace_media(io.BytesIO(image_stream), io.BytesIO(new_image_stream))
Note: for images, the aspect ratio will be the same as the replaced image
Note2: it is important to have the source media file as it is required
to calculate its CRC to find them in the docx
"""
crc = self.get_file_crc(src_file)
if hasattr(dst_file, 'read'):
self.crc_to_new_media[crc] = dst_file.read()
else:
with open(dst_file, 'rb') as fh:
self.crc_to_new_media[crc] = fh.read()
def replace_pic(self, embedded_file, dst_file):
"""Replace embedded picture with original-name given by embedded_file.
(give only the file basename, not the full path)
The new picture is given by dst_file (either a filename or a file-like
object)
Notes:
1) embedded_file and dst_file must have the same extension/format
in case dst_file is a file-like object, no check is done on
format compatibility
2) the aspect ratio will be the same as the replaced image
3) There is no need to keep the original file (this is not the case
for replace_embedded and replace_media)
"""
if hasattr(dst_file, 'read'):
# NOTE: file extension not checked
self.pics_to_replace[embedded_file] = dst_file.read()
else:
with open(dst_file, 'rb') as fh:
self.pics_to_replace[embedded_file] = fh.read()
def replace_embedded(self, src_file, dst_file):
"""Replace one embedded object by another one into a docx
This has been done mainly because it is not possible to add images
in docx header/footer.
With this function, put a dummy picture in your header/footer,
then specify it with its replacement in this function
Syntax: tpl.replace_embedded('dummy_doc.docx','doc_to_paste.docx')
Note2 : it is important to have the source file as it is required to
calculate its CRC to find them in the docx
"""
with open(dst_file, 'rb') as fh:
crc = self.get_file_crc(src_file)
self.crc_to_new_embedded[crc] = fh.read()
def replace_zipname(self, zipname, dst_file):
"""Replace one file in the docx file
First note that a MSWord .docx file is in fact a zip file.
This method can be used to replace document embedded in the docx template.
Some embedded document may have been modified by MSWord while saving
the template : thus replace_embedded() cannot be used as CRC is not the
same as the original file.
This method works for embedded MSWord file like Excel or PowerPoint file,
but won't work for others like PDF, Python or even Text files :
For these ones, MSWord generate an oleObjectNNN.bin file which is no
use to be replaced as it is encoded.
Syntax:
tpl.replace_zipname(
'word/embeddings/Feuille_Microsoft_Office_Excel1.xlsx',
'my_excel_file.xlsx')
The zipname is the one you can find when you open docx with WinZip,
7zip (Windows) or unzip -l (Linux). The zipname starts with
"word/embeddings/". Note that the file is renamed by MSWord,
so you have to guess a little bit...
"""
with open(dst_file, 'rb') as fh:
self.zipname_to_replace[zipname] = fh.read()
def post_processing(self, docx_file):
if (self.crc_to_new_media or
self.crc_to_new_embedded or
self.zipname_to_replace):
if hasattr(docx_file, 'read'):
tmp_file = io.BytesIO()
DocxTemplate(docx_file).save(tmp_file)
tmp_file.seek(0)
docx_file.seek(0)
docx_file.truncate()
docx_file.seek(0)
else:
tmp_file = '%s_docxtpl_before_replace_medias' % docx_file
os.rename(docx_file, tmp_file)
with zipfile.ZipFile(tmp_file) as zin:
with zipfile.ZipFile(docx_file, 'w') as zout:
for item in zin.infolist():
buf = zin.read(item.filename)
if item.filename in self.zipname_to_replace:
zout.writestr(item, self.zipname_to_replace[item.filename])
elif (item.filename.startswith('word/media/') and
item.CRC in self.crc_to_new_media):
zout.writestr(item, self.crc_to_new_media[item.CRC])
elif (item.filename.startswith('word/embeddings/') and
item.CRC in self.crc_to_new_embedded):
zout.writestr(item, self.crc_to_new_embedded[item.CRC])
else:
zout.writestr(item, buf)
if not hasattr(tmp_file, 'read'):
os.remove(tmp_file)
if hasattr(docx_file, 'read'):
docx_file.seek(0)
def pre_processing(self):
if self.pics_to_replace:
self._replace_pics()
def _replace_pics(self):
"""Replaces pictures xml tags in the docx template with pictures provided by the user"""
replaced_pics = {key: False for key in self.pics_to_replace}
# Main document
part = self.docx.part
self._replace_docx_part_pics(part, replaced_pics)
# Header/Footer
for relid, rel in six.iteritems(part.rels):
if rel.reltype in (REL_TYPE.HEADER, REL_TYPE.FOOTER):
self._replace_docx_part_pics(rel.target_part, replaced_pics)
# make sure all template images defined by user were replaced
for img_id, replaced in replaced_pics.items():
if not replaced:
raise ValueError(
"Picture %s not found in the docx template" % img_id
)
def get_pic_map(self):
return self.pic_map
def _replace_docx_part_pics(self, doc_part, replaced_pics):
et = etree.fromstring(doc_part.blob)
part_map = {}
gds = et.xpath('//a:graphic/a:graphicData', namespaces=docx.oxml.ns.nsmap)
for gd in gds:
rel = None
# Either IMAGE, CHART, SMART_ART, ...
try:
if gd.attrib['uri'] == docx.oxml.ns.nsmap['pic']:
# Either PICTURE or LINKED_PICTURE image
blip = gd.xpath('pic:pic/pic:blipFill/a:blip',
namespaces=docx.oxml.ns.nsmap)[0]
dest = blip.xpath('@r:embed', namespaces=docx.oxml.ns.nsmap)
if len(dest) > 0:
rel = dest[0]
else:
continue
else:
continue
non_visual_properties = 'pic:pic/pic:nvPicPr/pic:cNvPr/'
filename = gd.xpath('%s@name' % non_visual_properties,
namespaces=docx.oxml.ns.nsmap)[0]
titles = gd.xpath('%s@title' % non_visual_properties,
namespaces=docx.oxml.ns.nsmap)
if titles:
title = titles[0]
else:
title = ""
descriptions = gd.xpath('%s@descr' % non_visual_properties,
namespaces=docx.oxml.ns.nsmap)
if descriptions:
description = descriptions[0]
else:
description = ""
part_map[filename] = (doc_part.rels[rel].target_ref,
doc_part.rels[rel].target_part)
# replace data
for img_id, img_data in six.iteritems(self.pics_to_replace):
if img_id == filename or img_id == title or img_id == description:
part_map[filename][1]._blob = img_data
replaced_pics[img_id] = True
break
# FIXME: figure out what exceptions are thrown here and catch more specific exceptions
except Exception:
continue
self.pic_map.update(part_map)
def build_url_id(self, url):
return self.docx._part.relate_to(url, REL_TYPE.HYPERLINK,
is_external=True)
def save(self, filename, *args, **kwargs):
self.pre_processing()
self.docx.save(filename, *args, **kwargs)
self.post_processing(filename)
def get_undeclared_template_variables(self, jinja_env=None):
xml = self.get_xml()
xml = self.patch_xml(xml)
for uri in [self.HEADER_URI, self.FOOTER_URI]:
for relKey, part in self.get_headers_footers(uri):
_xml = self.get_part_xml(part)
xml += self.patch_xml(_xml)
if jinja_env:
env = jinja_env
else:
env = Environment()
parse_content = env.parse(xml)
return meta.find_undeclared_variables(parse_content)
undeclared_template_variables = property(get_undeclared_template_variables)

View File

@ -1,4 +0,0 @@
six
python-docx
jinja2
lxml

View File

@ -62,6 +62,7 @@ setup(name='docxtpl',
packages=['docxtpl'],
install_requires=['six',
'python-docx',
'docxcompose',
'jinja2',
'lxml'],
extras_require={'docs': ['Sphinx', 'sphinxcontrib-napoleon']},

View File

@ -44,4 +44,4 @@ context = {
# testing that it works also when autoescape has been forced to True
jinja_env = jinja2.Environment(autoescape=True)
tpl.render(context, jinja_env)
tpl.save('output/inline_image.docx')
tpl.save('templates/merge_docx_subdoc.docx')

19
tests/merge_docx.py Normal file
View File

@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
'''
Created : 2021-07-30
@author: Eric Lapouyade
'''
from docxtpl import DocxTemplate
from docx.shared import Inches
tpl = DocxTemplate('templates/merge_docx_master_tpl.docx')
sd = tpl.new_subdoc('templates/merge_docx_subdoc.docx')
context = {
'mysubdoc': sd,
}
tpl.render(context)
tpl.save('output/merge_docx.docx')

Binary file not shown.

Binary file not shown.