12
12
from openformats .handlers import Handler
13
13
from openformats .exceptions import MissingParentError
14
14
from openformats .formats .office_open_xml .parser import OfficeOpenXmlHandler
15
+ from openformats .strings import OpenString
16
+ from collections import defaultdict
15
17
16
18
17
19
class PptxFile (object ):
@@ -402,3 +404,175 @@ def compile(self, template, stringset, **kwargs):
402
404
result = pptx .compress ()
403
405
pptx .delete ()
404
406
return result
407
+
408
+ class PptxHandlerV2 (PptxHandler ):
409
+ """
410
+ New version of the PptxHandler that handles empty spaces in the text elements as
411
+ normal text, instead of prepending it to the next text element.
412
+ """
413
+ name = "PPTX_V2"
414
+
415
+ @classmethod
416
+ def parse_paragraph (cls , paragraph , rels_soup ):
417
+ paragraph_text = []
418
+ text_elements = paragraph .find_all (cls .TEXT_ELEMENT_TAG )
419
+ if not text_elements :
420
+ return None
421
+
422
+ text_elements_count = len (text_elements )
423
+
424
+ open_hyperlink = None
425
+ for index , text_element in enumerate (text_elements ):
426
+ text = text_element .text
427
+
428
+ try :
429
+ hyperlink_url = cls .get_hyperlink_url (
430
+ text_element , rels_soup
431
+ )
432
+ except MissingParentError :
433
+ continue
434
+
435
+ if all ([
436
+ text_elements_count == 2 ,
437
+ not hyperlink_url or hyperlink_url == open_hyperlink
438
+ ]) or all ([
439
+ index > 0 ,
440
+ index < text_elements_count - 1 ,
441
+ not hyperlink_url or hyperlink_url == open_hyperlink
442
+ ]):
443
+ # skip surrounding text with tags if:
444
+ # * first element
445
+ # * last element
446
+ # * opening hyperlink (we will add the tx tag later)
447
+ text = u'<tx>{}</tx>' .format (text )
448
+
449
+ if hyperlink_url and not open_hyperlink :
450
+ # open an a tag
451
+ text = u'<tx href="{}">{}' .format (
452
+ hyperlink_url , text
453
+ )
454
+ open_hyperlink = hyperlink_url
455
+
456
+ if not hyperlink_url and open_hyperlink :
457
+ # close a tag if open
458
+ text = u'</tx>{}' .format (text )
459
+ open_hyperlink = None
460
+
461
+ if hyperlink_url and open_hyperlink :
462
+ if hyperlink_url != open_hyperlink :
463
+ # close and open a new tag
464
+ text = u'</tx><tx href="{}">{}' .format (
465
+ hyperlink_url , text
466
+ )
467
+ open_hyperlink = hyperlink_url
468
+
469
+ paragraph_text .append (text )
470
+
471
+ if open_hyperlink :
472
+ # close the open tag
473
+ paragraph_text .append (u'</tx>' )
474
+ open_hyperlink = None
475
+
476
+ paragraph_text = u'' .join (paragraph_text )
477
+ if not paragraph_text .strip ():
478
+ return None
479
+
480
+ open_string = OpenString (
481
+ paragraph_text ,
482
+ paragraph_text ,
483
+ )
484
+ paragraph .attrs ['txid' ] = open_string .string_hash
485
+
486
+ return open_string
487
+
488
+ def compile_paragraph (cls , paragraph , rels_soup , stringset , is_rtl = False ):
489
+ text_elements = paragraph .find_all (cls .TEXT_ELEMENT_TAG )
490
+ if not text_elements :
491
+ return
492
+
493
+ txid = paragraph .attrs .get ('txid' )
494
+
495
+ if not txid :
496
+ return
497
+
498
+ if stringset .get (txid , None ) is None :
499
+ return
500
+
501
+ translation_string = stringset [txid ].string
502
+ escaped_translation_string = cls ._escape_xml (translation_string )
503
+
504
+ translation_soup = BeautifulSoup (
505
+ u'<wrapper>{}</wrapper>' .format (escaped_translation_string ), 'xml' ,
506
+ ).find_all (text = True )
507
+
508
+ added_hl_text_elements = defaultdict (list )
509
+ deleted_hl_text_elements = defaultdict (list )
510
+ elements_for_removal = []
511
+ last_element = None
512
+
513
+ # First of all try to replace each element translation
514
+ # this is the happiest path
515
+ if is_rtl :
516
+ cls .set_rtl_orientation (paragraph )
517
+
518
+ for _ , text_element in enumerate (text_elements ):
519
+ last_element = text_element
520
+ try :
521
+ hyperlink_url = cls .get_hyperlink_url (text_element , rels_soup )
522
+ except MissingParentError :
523
+ continue
524
+
525
+ # the text parts of the translation are less that the
526
+ # text parts of the document, so we will just remove
527
+ # any exceeding part from the document
528
+ if len (translation_soup ) == 0 :
529
+ elements_for_removal .append (text_element )
530
+ continue
531
+ else :
532
+ translation_part = translation_soup .pop (0 )
533
+ translation = six .text_type (translation_part )
534
+ translation_hyperlink_url = cls .get_translation_hyperlink (translation_part )
535
+
536
+ text_element .clear ()
537
+ text_element .insert (0 , translation )
538
+
539
+ # Edit in place hyperlink url
540
+ if hyperlink_url and translation_hyperlink_url :
541
+ cls .set_hyperlink_url (
542
+ text_element , rels_soup , translation_hyperlink_url
543
+ )
544
+ else :
545
+ if hyperlink_url :
546
+ deleted_hl_text_elements [hyperlink_url ]\
547
+ .append (text_element )
548
+ elif translation_hyperlink_url :
549
+ added_hl_text_elements [translation_hyperlink_url ]\
550
+ .append (text_element )
551
+
552
+ # the text parts of the translation are more that the
553
+ # text parts of the document, so we will compress the
554
+ # remaining translation parts into one string
555
+ if len (translation_soup ) > 0 and last_element and last_element .contents :
556
+ translation = last_element .contents [0 ] + \
557
+ "" .join ([six .text_type (t ) for t in translation_soup ]
558
+ )
559
+ last_element .clear ()
560
+ last_element .insert (0 , translation )
561
+
562
+ if len (added_hl_text_elements ) == len (deleted_hl_text_elements )\
563
+ and len (added_hl_text_elements ) > 0 :
564
+ cls .swap_hyperlink_elements (
565
+ added_hl_text_elements ,
566
+ deleted_hl_text_elements
567
+ )
568
+
569
+ for text_elements in six .itervalues (deleted_hl_text_elements ):
570
+ for text_element in text_elements :
571
+ cls .remove_hyperlink (text_element )
572
+
573
+ for url , text_elements in six .iteritems (added_hl_text_elements ):
574
+ for text_element in text_elements :
575
+ cls .create_hyperlink_url (text_element , rels_soup , url )
576
+
577
+ for element in elements_for_removal :
578
+ cls .remove_text_element (element )
0 commit comments