@@ -53,7 +53,14 @@ def swap_hyperlink_elements(
53
53
replacements [e ] = added_format
54
54
55
55
for text_element , format in six .iteritems (replacements ):
56
- text_element .parent .rPr .replaceWith (format )
56
+ if text_element .parent .rPr :
57
+ if format :
58
+ text_element .parent .rPr .replaceWith (format )
59
+ else :
60
+ text_element .parent .rPr .extract ()
61
+ else :
62
+ if format :
63
+ text_element .insert_before (format )
57
64
58
65
@staticmethod
59
66
def _escape_xml (translation ):
@@ -164,82 +171,84 @@ def compile_paragraph(cls, paragraph, rels_soup, stringset):
164
171
if stringset .get (txid , None ) is None :
165
172
return
166
173
167
- translation = stringset [txid ].string
168
- translation = cls ._escape_xml (translation )
174
+ translation_string = stringset [txid ].string
175
+ escaped_translation_string = cls ._escape_xml (translation_string )
169
176
170
177
translation_soup = BeautifulSoup (
171
- u'<wrapper>{}</wrapper>' .format (translation ), 'xml' ,
178
+ u'<wrapper>{}</wrapper>' .format (escaped_translation_string ), 'xml' ,
172
179
).find_all (text = True )
173
180
174
- leading_spaces = 0
175
- empty_text_element = None
176
181
added_hl_text_elements = defaultdict (list )
177
182
deleted_hl_text_elements = defaultdict (list )
183
+ empty_text_element = None
184
+ elements_for_removal = []
185
+ last_element = None
178
186
187
+ leading_spaces = 0
188
+
189
+ # First of all try to replace each element translation
190
+ # this is the happiest path
179
191
for index , text_element in enumerate (text_elements ):
180
192
text = six .text_type (text_element .text )
193
+
181
194
# detect text elements that contain no text
182
195
# and remove leading whitespace from the next string
183
196
if not text .strip ():
184
197
leading_spaces = len (text ) - len (text .strip ())
185
198
empty_text_element = text_element
186
199
continue
200
+
201
+ last_element = text_element
202
+
203
+ hyperlink_url = cls .get_hyperlink_url (text_element , rels_soup )
204
+
205
+ # the text parts of the translation are less that the
206
+ # text parts of the document, so we will just remove
207
+ # any exceeding part from the document
208
+ if len (translation_soup ) == 0 :
209
+ elements_for_removal .append (text_element )
210
+ continue
187
211
else :
188
- hyperlink_url = cls .get_hyperlink_url (
189
- text_element , rels_soup
190
- )
191
- # the text parts of the translation are less that the
192
- # text parts of the document, so we will just remove
193
- # any excessing part from the document
194
- if len (translation_soup ) == 0 :
195
- cls .remove_text_element (text_element )
196
- continue
197
212
translation_part = translation_soup .pop (0 )
198
213
translation = six .text_type (translation_part )
214
+ translation_hyperlink_url = cls .get_translation_hyperlink (translation_part )
215
+
199
216
if not translation [:leading_spaces ].strip ():
200
217
translation = translation [leading_spaces :]
218
+ leading_spaces = 0
201
219
else :
202
220
if empty_text_element :
203
- cls . remove_text_element (empty_text_element )
221
+ elements_for_removal . append (empty_text_element )
204
222
empty_text_element = None
205
223
206
- leading_spaces = 0
207
-
208
- # the text parts of the translation are more that the
209
- # text parts of the document, so we will compress the
210
- # remaining translation parts into one string
211
- if (index == len (text_elements ) - 1 and len (translation_soup ) > 0 ):
212
- translation = "" .join (
213
- [translation ] +
214
- [six .text_type (t ) for t in translation_soup ]
215
- )
216
-
217
- # attempt to find a parent containing `href` attribute
218
- # in order to extract the potential hyperlink url.
219
- translation_hyperlink_url = getattr (
220
- translation_part .find_parent (attrs = {'href' : True }
221
- ), 'attrs' , {}).get ('href' , None )
224
+ text_element .clear ()
225
+ text_element .insert (0 , translation )
222
226
223
227
# Edit in place hyperlink url
224
228
if hyperlink_url and translation_hyperlink_url :
225
229
cls .set_hyperlink_url (
226
230
text_element , rels_soup , translation_hyperlink_url
227
231
)
232
+ else :
233
+ if hyperlink_url :
234
+ deleted_hl_text_elements [hyperlink_url ]\
235
+ .append (text_element )
236
+ elif translation_hyperlink_url :
237
+ added_hl_text_elements [translation_hyperlink_url ]\
238
+ .append (text_element )
239
+
240
+ # the text parts of the translation are more that the
241
+ # text parts of the document, so we will compress the
242
+ # remaining translation parts into one string
243
+ if len (translation_soup ) > 0 :
244
+ translation = last_element .contents [0 ] + \
245
+ "" .join ([six .text_type (t ) for t in translation_soup ]
246
+ )
247
+ last_element .clear ()
248
+ last_element .insert (0 , translation )
228
249
229
- # remove hyperlink from source docx
230
- if hyperlink_url and not translation_hyperlink_url :
231
- deleted_hl_text_elements [hyperlink_url ].append (text_element )
232
-
233
- # create a new hyperlink
234
- if not hyperlink_url and translation_hyperlink_url :
235
- added_hl_text_elements [translation_hyperlink_url ].append (
236
- text_element
237
- )
238
-
239
- text_element .clear ()
240
- text_element .insert (0 , translation )
241
-
242
- if len (added_hl_text_elements ) == len (deleted_hl_text_elements ):
250
+ if len (added_hl_text_elements ) == len (deleted_hl_text_elements )\
251
+ and len (added_hl_text_elements ) > 0 :
243
252
cls .swap_hyperlink_elements (
244
253
added_hl_text_elements ,
245
254
deleted_hl_text_elements
@@ -252,3 +261,11 @@ def compile_paragraph(cls, paragraph, rels_soup, stringset):
252
261
for url , text_elements in six .iteritems (added_hl_text_elements ):
253
262
for text_element in text_elements :
254
263
cls .create_hyperlink_url (text_element , rels_soup , url )
264
+
265
+ for element in elements_for_removal :
266
+ cls .remove_text_element (element )
267
+
268
+ def get_translation_hyperlink (self , translation_part ):
269
+ return getattr (
270
+ translation_part .find_parent (attrs = {'href' : True }
271
+ ), 'attrs' , {}).get ('href' , None )
0 commit comments