Skip to content

Commit ecaaadc

Browse files
authored
Merge pull request #245 from transifex/TX-13671-pptx-upload-fails
TX-13671 - Skip `a:fld` tag
2 parents cade82c + 46ee864 commit ecaaadc

File tree

5 files changed

+46
-13
lines changed

5 files changed

+46
-13
lines changed

openformats/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,7 @@ class ParseError(OpenformatsError):
88

99
class RuleError(OpenformatsError):
1010
pass
11+
12+
13+
class MissingParentError(OpenformatsError):
14+
pass

openformats/formats/office_open_xml/parser.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import six
22
from copy import copy
33

4+
from openformats.exceptions import MissingParentError
45
from openformats.strings import OpenString
56
from bs4 import BeautifulSoup
67
from collections import defaultdict
@@ -90,9 +91,12 @@ def parse_paragraph(cls, paragraph, rels_soup):
9091
text = u"".join([u" "*leading_spaces, text])
9192
leading_spaces = 0
9293

93-
hyperlink_url = cls.get_hyperlink_url(
94-
text_element, rels_soup
95-
)
94+
try:
95+
hyperlink_url = cls.get_hyperlink_url(
96+
text_element, rels_soup
97+
)
98+
except MissingParentError:
99+
continue
96100

97101
if all([
98102
text_elements_count == 2,
@@ -214,7 +218,7 @@ def compile_paragraph(cls, paragraph, rels_soup, stringset):
214218
# in order to extract the potential hyperlink url.
215219
translation_hyperlink_url = getattr(
216220
translation_part.find_parent(attrs={'href': True}
217-
), 'attrs', {}).get('href', None)
221+
), 'attrs', {}).get('href', None)
218222

219223
# Edit in place hyperlink url
220224
if hyperlink_url and translation_hyperlink_url:
@@ -235,7 +239,6 @@ def compile_paragraph(cls, paragraph, rels_soup, stringset):
235239
text_element.clear()
236240
text_element.insert(0, translation)
237241

238-
239242
if len(added_hl_text_elements) == len(deleted_hl_text_elements):
240243
cls.swap_hyperlink_elements(
241244
added_hl_text_elements,

openformats/formats/pptx.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import six
1111
from bs4 import BeautifulSoup
1212
from openformats.handlers import Handler
13+
from openformats.exceptions import MissingParentError
1314
from openformats.formats.office_open_xml.parser import OfficeOpenXmlHandler
1415

1516

@@ -83,6 +84,7 @@ class PptxFile(object):
8384
</Relationships>
8485
```
8586
"""
87+
8688
def __init__(self, content):
8789
self.__tmp_folder = "{}/{}".format(
8890
tempfile.gettempdir(), uuid.uuid4().hex
@@ -187,7 +189,7 @@ def set_slide(self, slide, content):
187189
def get_slide_rels(self, slide):
188190
if self.__slides[slide]['rels']['content'] is None:
189191
with io.open(self.__slides[slide]['rels']['path'], 'r') as f:
190-
self.__slides[slide]['rels']['content']= f.read()
192+
self.__slides[slide]['rels']['content'] = f.read()
191193

192194
return self.__slides[slide]['rels']['content']
193195

@@ -225,6 +227,9 @@ class PptxHandler(Handler, OfficeOpenXmlHandler):
225227
def get_hyperlink_url(cls, element, document_rels):
226228
parent = element.find_parent('a:r')
227229

230+
if not parent:
231+
raise MissingParentError
232+
228233
hyperlinks = parent.find_all('a:hlinkClick', limit=1)
229234
if hyperlinks:
230235
rel = document_rels.find(
@@ -323,7 +328,7 @@ def parse(self, content, **kwargs):
323328
template = pptx.compress()
324329
pptx.delete()
325330
return template, stringset
326-
331+
327332
def compile(self, template, stringset, **kwargs):
328333
stringset = {
329334
string.string_hash: string for string in stringset
31.5 KB
Binary file not shown.

openformats/tests/formats/pptx/test_pptx.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -325,24 +325,30 @@ def test_hyperlinks_reordering(self):
325325
paragraph = soup.find_all('p:sp')[0]
326326
text_elements = paragraph.find_all('a:t')
327327

328-
self.assertEqual(text_elements[3].parent.rPr, text_elements_one_before[1].parent.rPr)
329-
self.assertEqual(text_elements[1].parent.rPr, text_elements_one_before[3].parent.rPr)
328+
self.assertEqual(text_elements[3].parent.rPr,
329+
text_elements_one_before[1].parent.rPr)
330+
self.assertEqual(text_elements[1].parent.rPr,
331+
text_elements_one_before[3].parent.rPr)
330332

331333
paragraph = soup.find_all('p:sp')[1]
332334
text_elements = paragraph.find_all('a:t')
333335

334-
self.assertEqual(text_elements[0].parent.rPr, text_elements_two_before[4].parent.rPr)
336+
self.assertEqual(text_elements[0].parent.rPr,
337+
text_elements_two_before[4].parent.rPr)
335338
self.assertEqual(
336339
re.sub(r'rId\w+', 'rId', six.text_type(text_elements[0].parent.rPr)),
337-
re.sub(r'rId\w+', 'rId', six.text_type(text_elements_two_before[4].parent.rPr))
340+
re.sub(r'rId\w+', 'rId',
341+
six.text_type(text_elements_two_before[4].parent.rPr))
338342
)
339343
self.assertEqual(
340344
re.sub(r'rId\w+', 'rId', six.text_type(text_elements[1].parent.rPr)),
341-
re.sub(r'rId\w+', 'rId', six.text_type(text_elements_two_before[6].parent.rPr))
345+
re.sub(r'rId\w+', 'rId',
346+
six.text_type(text_elements_two_before[6].parent.rPr))
342347
)
343348
self.assertEqual(
344349
re.sub(r'rId\w+', 'rId', six.text_type(text_elements[2].parent.rPr)),
345-
re.sub(r'rId\w+', 'rId', six.text_type(text_elements_two_before[6].parent.rPr))
350+
re.sub(r'rId\w+', 'rId',
351+
six.text_type(text_elements_two_before[6].parent.rPr))
346352
)
347353

348354
def test_tags_not_matching(self):
@@ -573,3 +579,18 @@ def test_slide_notes(self):
573579
u'<tx>πρόταση</tx> από κάτω'
574580
])
575581
)
582+
583+
def test_pptx_file_with_autofield(self):
584+
"""Test pptx file that contains automatically updated field
585+
can be compiled normally
586+
"""
587+
path = '{}/autofield.pptx'.format(self.TESTFILE_BASE)
588+
with open(path, 'rb') as f:
589+
content = f.read()
590+
591+
pptx = PptxFile(content)
592+
593+
self.assertTrue(u'/ppt/slides/slide1.xml' in pptx.get_slides())
594+
slide = u'/ppt/slides/slide1.xml'
595+
for text in [u'Title', u'text']:
596+
self.assertTrue(text in pptx.get_slide(slide))

0 commit comments

Comments
 (0)