Skip to content

Commit 9554cfd

Browse files
committed
split tokens only for author
Split tokens like 'Author:Frankie.Chu' into 'Author' and 'Frankie.Chu' Signed-off-by: Alok Kumar <alokkumarjipura9973@gmail.com>
1 parent 9b75832 commit 9554cfd

File tree

1 file changed

+13
-2
lines changed

1 file changed

+13
-2
lines changed

src/cluecode/copyrights.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -389,12 +389,12 @@ def detect(self,
389389
yield author
390390

391391

392-
def get_tokens(numbered_lines, splitter=re.compile(r'[\t =;:]+').split):
392+
def get_tokens(numbered_lines, splitter=re.compile(r'[\t =;]+').split):
393393
"""
394394
Return an iterable of pygmars.Token built from a ``numbered_lines`` iterable
395395
of tuples of (line number, text).
396396
397-
We perform a simple tokenization on spaces, tabs and some punctuation: =;:
397+
We perform a simple tokenization on spaces, tabs and some punctuation: =;
398398
"""
399399
last_line = ""
400400
for start_line, line in numbered_lines:
@@ -436,6 +436,17 @@ def get_tokens(numbered_lines, splitter=re.compile(r'[\t =;:]+').split):
436436
.rstrip(':') # strip trailing colons
437437
.strip()
438438
)
439+
440+
# Split tokens like 'Author:Frankie.Chu' into 'Author' and 'Frankie.Chu'
441+
if tok.startswith("Author:"):
442+
parts = tok.split(":", 1)
443+
if len(parts) == 2:
444+
for part in parts:
445+
part = part.strip()
446+
if part and part not in ':.':
447+
yield Token(value=part, start_line=start_line, pos=pos)
448+
pos += 1
449+
continue
439450

440451
# the tokenizer allows a single colon or dot to be a token and we discard these
441452
if tok and tok not in ':.':

0 commit comments

Comments
 (0)