File tree Expand file tree Collapse file tree 1 file changed +13
-2
lines changed Expand file tree Collapse file tree 1 file changed +13
-2
lines changed Original file line number Diff line number Diff line change @@ -389,12 +389,12 @@ def detect(self,
389
389
yield author
390
390
391
391
392
- def get_tokens (numbered_lines , splitter = re .compile (r'[\t =;: ]+' ).split ):
392
+ def get_tokens (numbered_lines , splitter = re .compile (r'[\t =;]+' ).split ):
393
393
"""
394
394
Return an iterable of pygmars.Token built from a ``numbered_lines`` iterable
395
395
of tuples of (line number, text).
396
396
397
- We perform a simple tokenization on spaces, tabs and some punctuation: =;:
397
+ We perform a simple tokenization on spaces, tabs and some punctuation: =;
398
398
"""
399
399
last_line = ""
400
400
for start_line , line in numbered_lines :
@@ -436,6 +436,17 @@ def get_tokens(numbered_lines, splitter=re.compile(r'[\t =;:]+').split):
436
436
.rstrip (':' ) # strip trailing colons
437
437
.strip ()
438
438
)
439
+
440
+ # Split tokens like 'Author:Frankie.Chu' into 'Author' and 'Frankie.Chu'
441
+ if tok .startswith ("Author:" ):
442
+ parts = tok .split (":" , 1 )
443
+ if len (parts ) == 2 :
444
+ for part in parts :
445
+ part = part .strip ()
446
+ if part and part not in ':.' :
447
+ yield Token (value = part , start_line = start_line , pos = pos )
448
+ pos += 1
449
+ continue
439
450
440
451
# the tokenizer allows a single colon or dot to be a token and we discard these
441
452
if tok and tok not in ':.' :
You can’t perform that action at this time.
0 commit comments