Skip to content

Commit 4cd5f07

Browse files
authored
Merge pull request #236 from transifex/pptx_notes_segmentations
Better string segmentation for pptx notes
2 parents d954670 + 57f3e0a commit 4cd5f07

File tree

2 files changed

+39
-20
lines changed

2 files changed

+39
-20
lines changed

openformats/formats/pptx.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -307,15 +307,16 @@ def parse(self, content, **kwargs):
307307
soup = BeautifulSoup(pptx.get_slide(slide), 'xml')
308308
rels_soup = BeautifulSoup(pptx.get_slide_rels(slide), 'xml')
309309

310-
for paragraph in soup.find_all('p:sp'):
311-
open_string = self.parse_paragraph(paragraph, rels_soup)
312-
if not open_string:
313-
continue
310+
for parent in soup.find_all('p:sp'):
311+
for paragraph in parent.find_all('a:p'):
312+
open_string = self.parse_paragraph(paragraph, rels_soup)
313+
if not open_string:
314+
continue
314315

315-
open_string.order = next(order)
316-
if notes_slide:
317-
open_string.tags = ['notes']
318-
stringset.append(open_string)
316+
open_string.order = next(order)
317+
if notes_slide:
318+
open_string.tags = ['notes']
319+
stringset.append(open_string)
319320

320321
pptx.set_slide(slide, six.text_type(soup))
321322

@@ -333,8 +334,9 @@ def compile(self, template, stringset, **kwargs):
333334
soup = BeautifulSoup(pptx.get_slide(slide), 'xml')
334335
rels_soup = BeautifulSoup(pptx.get_slide_rels(slide), 'xml')
335336

336-
for paragraph in soup.find_all('p:sp'):
337-
self.compile_paragraph(paragraph, rels_soup, stringset)
337+
for parent in soup.find_all('p:sp'):
338+
for paragraph in parent.find_all('a:p'):
339+
self.compile_paragraph(paragraph, rels_soup, stringset)
338340

339341
pptx.set_slide(slide, six.text_type(soup))
340342
pptx.set_slide_rels(slide, six.text_type(rels_soup))

openformats/tests/formats/pptx/test_pptx.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ def test_slide_notes(self):
505505
handler = PptxHandler()
506506
template, stringset = handler.parse(content)
507507

508-
self.assertEqual(len(stringset), 1)
508+
self.assertEqual(len(stringset), 2)
509509

510510
openstring = stringset[0]
511511
self.assertEqual(openstring.order, 0)
@@ -514,8 +514,17 @@ def test_slide_notes(self):
514514
''.join([
515515
u'This<tx> slide has only notes and </tx>',
516516
u'<tx href="http://www.transifex.com">hyperlinks</tx>'
517-
u'<tx>.</tx>',
518-
u'<tx>Another </tx>',
517+
u'.'
518+
])
519+
)
520+
self.assertEqual(openstring.tags, ['notes'])
521+
522+
openstring = stringset[1]
523+
self.assertEqual(openstring.order, 1)
524+
self.assertEqual(
525+
openstring.string,
526+
''.join([
527+
u'Another ',
519528
u'<tx>sentence</tx> below'
520529
])
521530
)
@@ -524,9 +533,10 @@ def test_slide_notes(self):
524533
translated_strings = [
525534
[
526535
u'Αυτό<tx> το slide έχει μόνο notes και </tx>',
527-
u'<tx href="http://el.transifex.com">συνδέσμους</tx>'
528-
u'<tx>.</tx>',
529-
u'<tx>Άλλη μια </tx>',
536+
u'<tx href="http://el.transifex.com">συνδέσμους</tx>.'
537+
],
538+
[
539+
u'Άλλη μια ',
530540
u'<tx>πρόταση</tx> από κάτω'
531541
]
532542
]
@@ -542,17 +552,24 @@ def test_slide_notes(self):
542552
content = handler.compile(template, translated_stringset)
543553
template, stringset = handler.parse(content)
544554

545-
self.assertEqual(len(stringset), 1)
555+
self.assertEqual(len(stringset), 2)
546556

547557
openstring = stringset[0]
548558
self.assertEqual(openstring.order, 0)
549559
self.assertEqual(
550560
openstring.string,
551561
''.join([
552562
u'Αυτό<tx> το slide έχει μόνο notes και </tx>',
553-
u'<tx href="http://el.transifex.com">συνδέσμους</tx>'
554-
u'<tx>.</tx>',
555-
u'<tx>Άλλη μια </tx>',
563+
u'<tx href="http://el.transifex.com">συνδέσμους</tx>.'
564+
])
565+
)
566+
567+
openstring = stringset[1]
568+
self.assertEqual(openstring.order, 1)
569+
self.assertEqual(
570+
openstring.string,
571+
''.join([
572+
u'Άλλη μια ',
556573
u'<tx>πρόταση</tx> από κάτω'
557574
])
558575
)

0 commit comments

Comments
 (0)