Skip to content

Commit 9fe7e5f

Browse files
authored
Merge pull request #239 from transifex/docx_allow_space_removal
Allow space control on docx files
2 parents 4cd5f07 + 0a675d3 commit 9fe7e5f

File tree

2 files changed

+60
-3
lines changed

2 files changed

+60
-3
lines changed

openformats/formats/office_open_xml/parser.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def compile_paragraph(cls, paragraph, rels_soup, stringset):
168168
).find_all(text=True)
169169

170170
leading_spaces = 0
171-
171+
empty_text_element = None
172172
added_hl_text_elements = defaultdict(list)
173173
deleted_hl_text_elements = defaultdict(list)
174174

@@ -178,6 +178,7 @@ def compile_paragraph(cls, paragraph, rels_soup, stringset):
178178
# and remove leading whitespace from the next string
179179
if not text.strip():
180180
leading_spaces = len(text) - len(text.strip())
181+
empty_text_element = text_element
181182
continue
182183
else:
183184
hyperlink_url = cls.get_hyperlink_url(
@@ -193,6 +194,10 @@ def compile_paragraph(cls, paragraph, rels_soup, stringset):
193194
translation = six.text_type(translation_part)
194195
if not translation[:leading_spaces].strip():
195196
translation = translation[leading_spaces:]
197+
else:
198+
if empty_text_element:
199+
cls.remove_text_element(empty_text_element)
200+
196201
leading_spaces = 0
197202

198203
# the text parts of the translation are more that the

openformats/tests/formats/docx/test_docx.py

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,60 @@ def test_simple_file(self):
122122
self.assertEqual(openstring.string, translation)
123123
self.assertEqual(openstring.string, openstring.key)
124124

125+
def test_space_control(self):
126+
path = '{}/special_cases_2.docx'.format(self.TESTFILE_BASE)
127+
with open(path, 'rb') as f:
128+
content = f.read()
129+
130+
handler = DocxHandler()
131+
template, stringset = handler.parse(content)
132+
133+
self.assertEqual(len(stringset), 1)
134+
135+
openstring = stringset[0]
136+
self.assertEqual(openstring.order, 0)
137+
self.assertEqual(
138+
openstring.string,
139+
u'one two <tx href="https://www.transifex.com/">three </tx><tx> four </tx>five' # noqa
140+
)
141+
self.assertEqual(openstring.string, openstring.key)
142+
143+
translation = u'ενα δύο <tx href="https://www.transifex.com/">τρία </tx><tx> τέσσερα </tx>πέντε' # noqa
144+
stringset = [
145+
OpenString(openstring.key, translation, order=1)
146+
]
147+
148+
content = handler.compile(template, stringset)
149+
template, stringset = handler.parse(content)
150+
151+
self.assertEqual(len(stringset), 1)
152+
153+
openstring = stringset[0]
154+
self.assertEqual(openstring.order, 0)
155+
self.assertEqual(
156+
openstring.string,
157+
u'ενα δύο <tx href="https://www.transifex.com/">τρία </tx><tx> τέσσερα </tx>πέντε'
158+
)
159+
self.assertEqual(openstring.string, openstring.key)
160+
161+
translation = u'ενα δύο<tx href="https://www.transifex.com/">τρία</tx><tx>τέσσερα</tx>πέντε' # noqa
162+
stringset = [
163+
OpenString(openstring.key, translation, order=1)
164+
]
165+
166+
content = handler.compile(template, stringset)
167+
template, stringset = handler.parse(content)
168+
169+
self.assertEqual(len(stringset), 1)
170+
171+
openstring = stringset[0]
172+
self.assertEqual(openstring.order, 0)
173+
self.assertEqual(
174+
openstring.string,
175+
u'ενα δύο<tx href="https://www.transifex.com/">τρία</tx><tx>τέσσερα</tx>πέντε'
176+
)
177+
self.assertEqual(openstring.string, openstring.key)
178+
125179
def test_hyperlink_reorder(self):
126180
path = '{}/special_cases_2.docx'.format(self.TESTFILE_BASE)
127181
with open(path, 'rb') as f:
@@ -166,8 +220,6 @@ def test_hyperlink_reorder(self):
166220
self.assertEqual(text_elements[1].parent.rPr.color, None)
167221
self.assertEqual(text_elements[1].parent.rPr.u, None)
168222

169-
170-
171223
def test_complex_file(self):
172224
path = '{}/complex.docx'.format(self.TESTFILE_BASE)
173225
with open(path, 'rb') as f:

0 commit comments

Comments
 (0)