From 2d10688ebe0a490e4278ddc259ff098ab7ccad33 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 5 May 2025 12:23:32 +0200 Subject: [PATCH] Detect "SPDX Short Identifier" tags #4301 Reference: https://github.com/aboutcode-org/scancode-toolkit/issues/4301 Signed-off-by: Philippe Ombredanne --- src/licensedcode/match_spdx_lid.py | 72 +- src/licensedcode/query.py | 11 +- tests/licensedcode/test_match_spdx_lid.py | 1053 ++++++++++----------- 3 files changed, 553 insertions(+), 583 deletions(-) diff --git a/src/licensedcode/match_spdx_lid.py b/src/licensedcode/match_spdx_lid.py index 545fe1c13b..aaf99e53ec 100644 --- a/src/licensedcode/match_spdx_lid.py +++ b/src/licensedcode/match_spdx_lid.py @@ -392,33 +392,71 @@ def clean_text(text): _split_spdx_lid = re.compile( - '(spd[xz][\\-\\s]+lin?[cs]en?[sc]es?[\\-\\s]+identifi?er\\s*:?\\s*)', - re.IGNORECASE).split - -_nuget_split_spdx_lid = re.compile( - '(licenses(?:\\.|\\s)+nuget(?:\\.|\\s)+org\\s*:?\\s*)', - re.IGNORECASE).split + r'(' + r'(?:' + r'spd[xz][_\-\s]+' + r'(?:lin?[cs]en?[sc]es?|short)[_\-\s]+' + 'identifi?ers?\s*:?' + r'|' + r'licenses[\.\s]+nuget[\.\s]+org\s*/?' + r')\s*' + r')', + re.IGNORECASE, +).split def split_spdx_lid(text): """ - Split text if it contains an "SPDX license identifier". Return a 2-tuple if if there is an SPDX + Split text if it contains an "SPDX license identifier". Return a 2-tuple if there is an SPDX license identifier where the first item contains the "SPDX license identifier" text proper and the second item contains the remainder of the line (expected to be a license expression). Otherwise return a 2-tuple where the first item is None and the second item contains the original text. - Also supports "https://licenses.nuget.org" followed by a license expression. + Also supports "https://licenses.nuget.org" followed by a license expression as well as minor + variants such as SPDX short Indentifier, and typos. + + Split regex examples:: + + >>> _split_spdx_lid("licenses.nuget.org/MIT%20OR%20Unlicense") + ['', 'licenses.nuget.org/', 'MIT%20OR%20Unlicense'] + >>> _split_spdx_lid("licenses.nuget.org / MIT") + ['', 'licenses.nuget.org / ', 'MIT'] + >>> _split_spdx_lid("licenseUrl:https://licenses.nuget.org/MIT%20OR%20Unlicense") + ['licenseUrl:https://', 'licenses.nuget.org/', 'MIT%20OR%20Unlicense'] + >>> _split_spdx_lid("SPDX-license-Identifier: MIT OR Unlicense") + ['', 'SPDX-license-Identifier: ', 'MIT OR Unlicense'] + >>> _split_spdx_lid("SPDX-license-Identifer: MIT OR Unlicense") + ['', 'SPDX-license-Identifer: ', 'MIT OR Unlicense'] + >>> _split_spdx_lid("SPDX short Identifer : MIT OR Unlicense") + ['', 'SPDX short Identifer : ', 'MIT OR Unlicense'] + >>> _split_spdx_lid("For OR Unlicense") + ['For OR Unlicense'] + >>> _split_spdx_lid(" REM DNL SPDX short Identifer : MIT OR Unlicense") + [' REM DNL ', 'SPDX short Identifer : ', 'MIT OR Unlicense'] + + Split full examples:: + + >>> split_spdx_lid("licenses.nuget.org/MIT%20OR%20Unlicense") + ('licenses.nuget.org/', 'MIT%20OR%20Unlicense') + >>> split_spdx_lid("licenses.nuget.org / MIT") + ('licenses.nuget.org / ', 'MIT') + >>> split_spdx_lid("licenseUrl:https://licenses.nuget.org/MIT%20OR%20Unlicense") + ('licenses.nuget.org/', 'MIT%20OR%20Unlicense') + >>> split_spdx_lid("SPDX-license-Identifier: MIT OR Unlicense") + ('SPDX-license-Identifier: ', 'MIT OR Unlicense') + >>> split_spdx_lid("SPDX-license-Identifer: MIT OR Unlicense") + ('SPDX-license-Identifer: ', 'MIT OR Unlicense') + >>> split_spdx_lid("SPDX short Identifer : MIT OR Unlicense") + ('SPDX short Identifer : ', 'MIT OR Unlicense') + >>> split_spdx_lid("For OR Unlicense") + (None, 'For OR Unlicense') """ segments = _split_spdx_lid(text) - expression = segments[-1] - if len(segments) > 1: - return segments[-2], expression + if len(segments) == 3: + # we matched on split OK with exactly three segments + _, prefix, expression = segments + return prefix, expression else: - segments = _nuget_split_spdx_lid(text) - expression = segments[-1] - if len(segments) > 1: - return segments[-2], expression - else: - return None, text + return None, text diff --git a/src/licensedcode/query.py b/src/licensedcode/query.py index 57f4ce82c3..d38d231ab7 100644 --- a/src/licensedcode/query.py +++ b/src/licensedcode/query.py @@ -102,7 +102,6 @@ def logger_debug(*args): # on a single line (e.g. minified JS or CSS). MAX_TOKEN_PER_LINE = 25 - # Break quary in runs if there are `LINES_THRESHOLD` number of empty # or non-legalese/junk lines LINES_THRESHOLD = 4 @@ -248,19 +247,23 @@ def __init__( # TODO: consider using an intbitset self.shorts_and_digits_pos = set() - # list of the three SPDX-License-Identifier tokens to identify to detect + # list of the base SPDX-License-Identifier tokens to identify and detect # a line for SPDX id matching. # note: this will not match anything if the index is not properly set dic_get = idx.dictionary.get spdxid = [dic_get(u'spdx'), dic_get(u'license'), dic_get(u'identifier')] + # "SPDX Short identifier" is also an unfortunate thing in the wild + # both with and without dash + spdxid2 = [dic_get(u'spdx'), dic_get(u'short'), dic_get(u'identifier')] + # There's also other spdx license identifiers like NuGet license URLs # Like: `https://licenses.nuget.org/(LGPL-2.0-only WITH FLTK-exception OR Apache-2.0+)` nuget_spdx_id = [dic_get(u'licenses'), dic_get(u'nuget'), dic_get(u'org')] # None, None None: this is mostly a possible issue in test mode self.spdx_lid_token_ids = [ - x for x in [spdxid, nuget_spdx_id, ] if x != [None, None, None] + x for x in [spdxid, nuget_spdx_id, spdxid2] if None not in x ] # list of tuple (original line text, start known pos, end known pos) for @@ -497,7 +500,7 @@ def tokens_by_line( spdx_start_offset = 2 if spdx_start_offset is not None: - + # keep the line, start/end known pos for SPDX matching spdx_prefix, spdx_expression = split_spdx_lid(line) spdx_text = ''.join([spdx_prefix or '', spdx_expression]) diff --git a/tests/licensedcode/test_match_spdx_lid.py b/tests/licensedcode/test_match_spdx_lid.py index b5ff8df6dc..be830792ea 100644 --- a/tests/licensedcode/test_match_spdx_lid.py +++ b/tests/licensedcode/test_match_spdx_lid.py @@ -31,7 +31,8 @@ from scancode_config import REGEN_TEST_FIXTURES from scancode.cli_test_utils import check_json_scan from scancode.cli_test_utils import run_scan_click - +import pytest +from typing import NamedTuple TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data') @@ -57,7 +58,6 @@ def test_spdx_license_detection_with_markup(self): check_json_scan(test_loc, result_file, regen=REGEN_TEST_FIXTURES) - class TestSpdxQueryLines(FileBasedTesting): test_data_dir = TEST_DATA_DIR @@ -70,13 +70,19 @@ def test_Query_with_spdx_basic(self): From uboot: the first two lines are patch-like: * SPDX-License-Identifier: GPL-2.0+ BSD-2-Clause + +Incorrect bu common short: + * SPDX Short Identifier: GPL-2.0+ BSD-2-Clause + ''' qry = Query(query_string=querys, idx=idx) expected = [ - ('SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)', 0, 15), - ('SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0',16, 34), - ('SPDX-License-Identifier: GPL-2.0+ BSD-2-Clause', 45, 53)] + ('SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)', 0, 15), + ('SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0', 16, 34), + ('SPDX-License-Identifier: GPL-2.0+ BSD-2-Clause', 45, 53), + ('SPDX Short Identifier: GPL-2.0+ BSD-2-Clause', 57, 65), + ] assert qry.spdx_lines == expected @@ -98,8 +104,8 @@ def test_Query_with_spdx_basic(self): qry = Query(query_string=querys, idx=idx) expected = [ - ('licenses.nuget.org/(LGPL-2.0-only WITH FLTK-exception OR Apache-2.0)', 1, 14), - ('SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0',15, 33), + ('licenses.nuget.org/(LGPL-2.0-only WITH FLTK-exception OR Apache-2.0)', 1, 14), + ('SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0', 15, 33), ('licenses.nuget.org/MIT', 45, 48), ('licenses.nuget.org/(MIT)', 50, 53) ] @@ -113,6 +119,7 @@ def get_query_spdx_lines_test_method(test_loc , expected_loc, regen=REGEN_TEST_F pos) for SPDX identifier lines found in the file at `test_loc` and assert results against expected results found in the JSON file at `expected_loc` """ + def test_method(self): idx = cache.get_index() qry = Query(location=test_loc, idx=idx) @@ -157,550 +164,485 @@ class TestSpdxQueryLinesDataDriven(unittest.TestCase): build_spdx_line_tests(clazz=TestSpdxQueryLinesDataDriven, regen=REGEN_TEST_FIXTURES) -class TestMatchSpdx(FileBasedTesting): - test_data_dir = TEST_DATA_DIR - - def test_clean_line(self): - tests = [ - '* SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)', - '* SPDX-License-Identifier: BSD-3-Clause ', - '// SPDX-License-Identifier: BSD-3-Clause (', - '# SPDX-License-Identifier: BSD-3-Clause', - '/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */', - '* SPDX-License-Identifier: GPL-2.0+', - '* SPDX-License-Identifier: GPL-2.0', - '; SPDX-License-Identifier: GPL-2.0', - ';;; SPDX-License-Identifier: GPL-2.0', - '! SPDX-License-Identifier: GPL-2.0', - '// SPDX-License-Identifier: GPL-2.0+', - '/* SPDX-License-Identifier: GPL-2.0+ */', - '* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause )', - '(/ SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)', - '// SPDX-License-Identifier: LGPL-2.1+', - '+SPDX-License-Identifier: GPL-2.0+', - '* SPDX-License-Identifier: GPL-2.0+ BSD-2-Clause', - '// SPDX License Identifier LGPL-2.1+', - ] - - expected = [ - 'SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)', - 'SPDX-License-Identifier: BSD-3-Clause', - 'SPDX-License-Identifier: BSD-3-Clause', - 'SPDX-License-Identifier: BSD-3-Clause', - 'SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note', - 'SPDX-License-Identifier: GPL-2.0+', - 'SPDX-License-Identifier: GPL-2.0', - 'SPDX-License-Identifier: GPL-2.0', - 'SPDX-License-Identifier: GPL-2.0', - 'SPDX-License-Identifier: GPL-2.0', - 'SPDX-License-Identifier: GPL-2.0+', - 'SPDX-License-Identifier: GPL-2.0+', - 'SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause )', - '(/ SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)', - 'SPDX-License-Identifier: LGPL-2.1+', - 'SPDX-License-Identifier: GPL-2.0+', - 'SPDX-License-Identifier: GPL-2.0+ BSD-2-Clause', - 'SPDX License Identifier LGPL-2.1+' - ] - results = [clean_text(test) for test in tests] - assert results == expected - - - def test_clean_line_nuget(self): - tests = [ - '* https://licenses.nuget.org/(LGPL-2.0-only WITH FLTK-exception OR Apache-2.0)', - '* https://licenses.nuget.org/MIT', - '* https://licenses.nuget.org/(MIT)' - ] - - expected = [ - 'https://licenses.nuget.org/(LGPL-2.0-only WITH FLTK-exception OR Apache-2.0)', - 'https://licenses.nuget.org/MIT', - 'https://licenses.nuget.org/(MIT)' - ] - results = [clean_text(test) for test in tests] - assert results == expected - - def test_clean_line_markup(self): - tests = [ - '

SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1

', - 'MIT', - 'Apache-2.0', - 'licenses.nuget.org /MIT\">MIT ' - ] - - expected = [ - 'SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1', - 'https://licenses.nuget.org/MIT', - 'https://licenses.nuget.org/Apache-2.0', - 'licenses.nuget.org /MIT' - ] - results = [clean_text(test) for test in tests] - assert results == expected - - def test_prepare_text(self): - tests = [ - '* SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)', - '* SPDX-License-Identifier: BSD-3-Clause ', - '// SPDX-License-Identifier: BSD-3-Clause (', - '# SPDX-License-Identifier: BSD-3-Clause', - '/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */', - '* SPDX-License-Identifier: GPL-2.0+', - '* SPDX-License-Identifier: GPL-2.0', - '; SPDX-License-Identifier: GPL-2.0', - ';;; SPDX-License-Identifier: GPL-2.0', - '! SPDX-License-Identifier: GPL-2.0', - '// SPDX-License-Identifier: GPL-2.0+', - '/* SPDX-License-Identifier: GPL-2.0+ */', - '* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause )', - '(/ SPDX-Licence--Identifier: (GPL-2.0 OR BSD-3-Clause)', - '// SPDX-License-Identifier: LGPL-2.1+', - '+SPDX-License-Identifier: GPL-2.0+', - '* SPDX-License-Identifier: GPL-2.0+ BSD-2-Clause', - '// SPDX Licence Identifier LGPL-2.1+', - ] +class SpdxLidTest(NamedTuple): + test: str + expected: [tuple | list] + + +clean_line_tests = [ + SpdxLidTest(test='* SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)', expected='SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)'), + SpdxLidTest(test='* SPDX-License-Identifier: BSD-3-Clause ', expected='SPDX-License-Identifier: BSD-3-Clause'), + SpdxLidTest(test='// SPDX-License-Identifier: BSD-3-Clause (', expected='SPDX-License-Identifier: BSD-3-Clause'), + SpdxLidTest(test='# SPDX-License-Identifier: BSD-3-Clause', expected='SPDX-License-Identifier: BSD-3-Clause'), + SpdxLidTest(test='/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */', expected='SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note'), + SpdxLidTest(test='* SPDX-License-Identifier: GPL-2.0+', expected='SPDX-License-Identifier: GPL-2.0+'), + SpdxLidTest(test='* SPDX-License-Identifier: GPL-2.0', expected='SPDX-License-Identifier: GPL-2.0'), + SpdxLidTest(test='; SPDX-License-Identifier: GPL-2.0', expected='SPDX-License-Identifier: GPL-2.0'), + SpdxLidTest(test=';;; SPDX-License-Identifier: GPL-2.0', expected='SPDX-License-Identifier: GPL-2.0'), + SpdxLidTest(test='! SPDX-License-Identifier: GPL-2.0', expected='SPDX-License-Identifier: GPL-2.0'), + SpdxLidTest(test='// SPDX-License-Identifier: GPL-2.0+', expected='SPDX-License-Identifier: GPL-2.0+'), + SpdxLidTest(test='/* SPDX-License-Identifier: GPL-2.0+ */', expected='SPDX-License-Identifier: GPL-2.0+'), + SpdxLidTest(test='* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause )', expected='SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause )'), + SpdxLidTest(test='(/ SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)', expected='(/ SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)'), + SpdxLidTest(test='// SPDX-License-Identifier: LGPL-2.1+', expected='SPDX-License-Identifier: LGPL-2.1+'), + SpdxLidTest(test='+SPDX-License-Identifier: GPL-2.0+', expected='SPDX-License-Identifier: GPL-2.0+'), + SpdxLidTest(test='* SPDX-License-Identifier: GPL-2.0+ BSD-2-Clause', expected='SPDX-License-Identifier: GPL-2.0+ BSD-2-Clause'), + SpdxLidTest(test='// SPDX License Identifier LGPL-2.1+', expected='SPDX License Identifier LGPL-2.1+'), + SpdxLidTest(test='* https://licenses.nuget.org/(LGPL-2.0-only WITH FLTK-exception OR Apache-2.0)', expected='https://licenses.nuget.org/(LGPL-2.0-only WITH FLTK-exception OR Apache-2.0)'), + SpdxLidTest(test='* https://licenses.nuget.org/MIT', expected='https://licenses.nuget.org/MIT'), + SpdxLidTest(test='* https://licenses.nuget.org/(MIT)', expected='https://licenses.nuget.org/(MIT)'), + SpdxLidTest(test='

SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1

', expected='SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1'), + SpdxLidTest(test='MIT', expected='https://licenses.nuget.org/MIT'), + SpdxLidTest(test='Apache-2.0', expected='https://licenses.nuget.org/Apache-2.0'), + SpdxLidTest(test='licenses.nuget.org /MIT\">MIT ', expected='licenses.nuget.org /MIT'), +] + + +@pytest.mark.parametrize('test, expected', clean_line_tests) +def test_clean_line(test, expected): + result = clean_text(test) + assert result == expected + + +prepare_text_tests = [ + SpdxLidTest(test='* SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)', expected=('SPDX-License-Identifier:', '(BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)')), + SpdxLidTest(test='* SPDX-License-Identifier: BSD-3-Clause ', expected=('SPDX-License-Identifier:', 'BSD-3-Clause')), + SpdxLidTest(test='// SPDX-License-Identifier: BSD-3-Clause (', expected=('SPDX-License-Identifier:', 'BSD-3-Clause')), + SpdxLidTest(test='# SPDX-License-Identifier: BSD-3-Clause', expected=('SPDX-License-Identifier:', 'BSD-3-Clause')), + SpdxLidTest(test='/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */', expected=('SPDX-License-Identifier:', 'GPL-1.0+ WITH Linux-syscall-note')), + SpdxLidTest(test='* SPDX-License-Identifier: GPL-2.0+', expected=('SPDX-License-Identifier:', 'GPL-2.0+')), + SpdxLidTest(test='* SPDX-License-Identifier: GPL-2.0', expected=('SPDX-License-Identifier:', 'GPL-2.0')), + SpdxLidTest(test='; SPDX-License-Identifier: GPL-2.0', expected=('SPDX-License-Identifier:', 'GPL-2.0')), + SpdxLidTest(test=';;; SPDX-License-Identifier: GPL-2.0', expected=('SPDX-License-Identifier:', 'GPL-2.0')), + SpdxLidTest(test='! SPDX-License-Identifier: GPL-2.0', expected=('SPDX-License-Identifier:', 'GPL-2.0')), + SpdxLidTest(test='// SPDX-License-Identifier: GPL-2.0+', expected=('SPDX-License-Identifier:', 'GPL-2.0+')), + SpdxLidTest(test='/* SPDX-License-Identifier: GPL-2.0+ */', expected=('SPDX-License-Identifier:', 'GPL-2.0+')), + SpdxLidTest(test='* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause )', expected=('SPDX-License-Identifier:', '(GPL-2.0+ OR BSD-3-Clause )')), + SpdxLidTest(test='(/ SPDX-Licence--Identifier: (GPL-2.0 OR BSD-3-Clause)', expected=('SPDX-Licence--Identifier:', '(GPL-2.0 OR BSD-3-Clause)')), + SpdxLidTest(test='// SPDX-License-Identifier: LGPL-2.1+', expected=('SPDX-License-Identifier:', 'LGPL-2.1+')), + SpdxLidTest(test='+SPDX-License-Identifier: GPL-2.0+', expected=('SPDX-License-Identifier:', 'GPL-2.0+')), + SpdxLidTest(test='* SPDX-License-Identifier: GPL-2.0+ BSD-2-Clause', expected=('SPDX-License-Identifier:', 'GPL-2.0+ BSD-2-Clause')), + SpdxLidTest(test='// SPDX Licence Identifier LGPL-2.1+', expected=('SPDX Licence Identifier', 'LGPL-2.1+')), + SpdxLidTest(test='

SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1

', expected=('SPDX-License-Identifier:', 'Apache-2.0 WITH SHL-2.1')), + SpdxLidTest(test='MIT', expected=('licenses.nuget.org/', 'MIT')), + SpdxLidTest(test='Apache-2.0', expected=('licenses.nuget.org/', 'Apache-2.0')), + SpdxLidTest(test='@REM # SPDX-License-Identifier: BSD-2-Clause-Patent', expected=('SPDX-License-Identifier:', 'BSD-2-Clause-Patent')), + SpdxLidTest(test='* https://licenses.nuget.org/(LGPL-2.0-only WITH FLTK-exception OR Apache-2.0)', expected=('licenses.nuget.org/', '(LGPL-2.0-only WITH FLTK-exception OR Apache-2.0)')), + SpdxLidTest(test='* https://licenses.nuget.org/MIT', expected=('licenses.nuget.org/', 'MIT')), + SpdxLidTest(test='* https://licenses.nuget.org/(MIT)' , expected=('licenses.nuget.org/', '(MIT)')), + SpdxLidTest(test='', expected=(None, '')), +] + + +@pytest.mark.parametrize('test, expected', prepare_text_tests) +def test_prepare_text(test, expected): + result = prepare_text(test) + assert result == expected + + +split_spdx_lids_tests = [ + SpdxLidTest(test='SPDX License Identifier : BSD-3-Clause', expected=('SPDX License Identifier : ', 'BSD-3-Clause')), + SpdxLidTest(test='SPDX-License-Identifier : BSD-3-Clause', expected=('SPDX-License-Identifier : ', 'BSD-3-Clause')), + SpdxLidTest(test='spdx-license- identifier : BSD-3-Clause', expected=('spdx-license- identifier : ', 'BSD-3-Clause')), + SpdxLidTest(test=' SPDX License--Identifier: BSD-3-Clause', expected=('SPDX License--Identifier: ', 'BSD-3-Clause')), + SpdxLidTest(test='SPDX-License-Identifier : BSD-3-Clause', expected=('SPDX-License-Identifier : ', 'BSD-3-Clause')), + SpdxLidTest(test='SPDx-Licence-Identifier : BSD-3-Clause', expected=('SPDx-Licence-Identifier : ', 'BSD-3-Clause')), + SpdxLidTest(test='SPD-Licence-Identifier : BSD-3-Clause', expected=(None, 'SPD-Licence-Identifier : BSD-3-Clause')), + SpdxLidTest(test='SPDx Short Identifier : BSD-3-Clause', expected=('SPDx Short Identifier : ', 'BSD-3-Clause')), + SpdxLidTest(test='SPDx-Licence-Identifier:BSD-3-Clause', expected=('SPDx-Licence-Identifier:', 'BSD-3-Clause')), + + SpdxLidTest(test='https://licenses.nuget.org/(LGPL-2.0-only WITH FLTK-exception OR Apache-2.0)', expected=('licenses.nuget.org/', '(LGPL-2.0-only WITH FLTK-exception OR Apache-2.0)')), + SpdxLidTest(test='* https://licenses.nuget.org/(MIT)', expected=('licenses.nuget.org/', '(MIT)')), + SpdxLidTest(test='https://licenses.nuget.org/MIT', expected=('licenses.nuget.org/', 'MIT')), + SpdxLidTest(test='http://licenses.nuget.org/MIT', expected=('licenses.nuget.org/', 'MIT')), + SpdxLidTest(test='licenses.nuget.org/MIT', expected=('licenses.nuget.org/', 'MIT')), + SpdxLidTest(test='Licenses NuGet ORG MIT', expected=('Licenses NuGet ORG ', 'MIT')), + SpdxLidTest(test='licenses nuget org MIT', expected=('licenses nuget org ', 'MIT')), + SpdxLidTest(test='licenses MIT', expected=(None, 'licenses MIT')), + SpdxLidTest(test='URL:http://licenses.nuget.org/MIT', expected=('licenses.nuget.org/', 'MIT')), +] + + +@pytest.mark.parametrize('test, expected', split_spdx_lids_tests) +def test_split_spdx_lids(test, expected): + result = split_spdx_lid(test) + assert result == expected + + +split_spdx_lid_regex_tests = [ + SpdxLidTest(test='REM DNL SPDX License Identifier : BSD-3-Clause', expected=['REM DNL ', 'SPDX License Identifier : ', 'BSD-3-Clause']), + SpdxLidTest(test='SPDX-License-Identifier : BSD-3-Clause', expected=['', 'SPDX-License-Identifier : ', 'BSD-3-Clause']), + SpdxLidTest(test='spdx-license- identifier : BSD-3-Clause', expected=['', 'spdx-license- identifier : ', 'BSD-3-Clause']), + SpdxLidTest(test=' SPDX License--Identifier: BSD-3-Clause', expected=[' ', 'SPDX License--Identifier: ', 'BSD-3-Clause']), + SpdxLidTest(test='SPDX-License-Identifier : BSD-3-Clause', expected=['', 'SPDX-License-Identifier : ', 'BSD-3-Clause']), + SpdxLidTest(test='SPDX-License-Identifer : BSD-3-Clause', expected=['' , 'SPDX-License-Identifer : ', 'BSD-3-Clause']), + SpdxLidTest(test='SPDX--License--Identifer : BSD-3-Clause', expected=['' , 'SPDX--License--Identifer : ', 'BSD-3-Clause']), + SpdxLidTest(test='SPDZ-License-Identifier : BSD-3-Clause', expected=['' , 'SPDZ-License-Identifier : ', 'BSD-3-Clause']), + SpdxLidTest(test='SPDX-Lincense-Identifier : BSD-3-Clause', expected=['' , 'SPDX-Lincense-Identifier : ', 'BSD-3-Clause']), + SpdxLidTest(test='SPDX-Lisense-Identifier : BSD-3-Clause', expected=['' , 'SPDX-Lisense-Identifier : ', 'BSD-3-Clause']), + SpdxLidTest(test='SPDX-Licence-Identifier : BSD-3-Clause', expected=['' , 'SPDX-Licence-Identifier : ', 'BSD-3-Clause']), + SpdxLidTest(test='SPDX-Licece-Identifier : BSD-3-Clause', expected=['' , 'SPDX-Licece-Identifier : ', 'BSD-3-Clause']), + SpdxLidTest(test='SPDZ-Licece-Identifer : BSD-3-Clause', expected=['' , 'SPDZ-Licece-Identifer : ', 'BSD-3-Clause']), + SpdxLidTest(test='SPDX-Licenses-Identifier : BSD-3-Clause', expected=['' , 'SPDX-Licenses-Identifier : ', 'BSD-3-Clause']), + SpdxLidTest(test='SPDX - - Licenses - - Identifier : BSD-3-Clause', expected=['', 'SPDX - - Licenses - - Identifier : ', 'BSD-3-Clause']), +] + + +@pytest.mark.parametrize('test, expected', split_spdx_lid_regex_tests) +def test__split_spdx_lid(test, expected): + result = _split_spdx_lid(test) + assert result == expected + + +def test_get_expression_quoted(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = '''LIST "SPDX-License-Identifier: GPL-2.0"''' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression.render() == 'gpl-2.0' + + +def test_get_expression_nuget(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = 'https://licenses.nuget.org/MIT' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression.render() == 'mit' + + +def test_get_expression_multiple_or(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = '* SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression.render() == 'bsd-new OR epl-1.0 OR apache-2.0 OR mit' + + +def test_get_expression_multiple_or_nuget(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = 'https://licenses.nuget.org/(LGPL-2.0-only WITH FLTK-exception OR Apache-2.0)' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression.render() == 'lgpl-2.0 WITH fltk-exception-lgpl-2.0 OR apache-2.0' + + +def test_get_expression_simple(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = '* SPDX-License-Identifier: BSD-3-Clause' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression.render() == 'bsd-new' + + +def test_get_expression_with_exception(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = '/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression.render() == 'gpl-1.0-plus WITH linux-syscall-exception-gpl' + + +def test_get_expression_with_plus(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = '* SPDX-License-Identifier: GPL-2.0+' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression.render() == 'gpl-2.0-plus' + + +def test_get_expression_with_extra_parens(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = '* SPDX-License-Identifier: (GPL-2.0+ OR MIT)' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression.render() == 'gpl-2.0-plus OR mit' + + +def test_get_expression_with_extra_parens2(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = 'https://licenses.nuget.org/(GPL-2.0+ OR MIT)' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression.render() == 'gpl-2.0-plus OR mit' + + +def test_get_expression_extra_parens_2(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = '// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression.render() == 'gpl-2.0 OR bsd-simplified' + + +def test_get_expression_with_parens_and_with(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = '/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) AND MIT) */' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression.render() == 'gpl-2.0 WITH linux-syscall-exception-gpl AND mit' + + +def test_get_expression_simple_with(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = '/* SPDX-License-Identifier: LGPL-2.0+ WITH Linux-syscall-note */' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression.render() == 'lgpl-2.0-plus WITH linux-syscall-exception-gpl' + + +def test_get_expression_license_ref(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = '/* SPDX-License-Identifier: LicenseRef-ABC */' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression.render() == 'unknown-spdx' + + +def test_get_expression_from_html(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = "

SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1

" + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression.render() == 'apache-2.0 WITH shl-2.1' + + +def test_get_expression_from_nuget_license_html(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = 'MIT' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression.render() == 'mit' + + +def test_get_expression_complex(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = ('* SPDX-License-Identifier: ' + 'EPL-2.0 OR aPache-2.0 OR ' + 'GPL-2.0 WITH classpath-exception-2.0 OR ' + 'GPL-2.0') + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + + expected = 'epl-2.0 OR apache-2.0 OR gpl-2.0 WITH classpath-exception-2.0 OR gpl-2.0' + assert expression.render() == expected + + expected = ['epl-2.0', u'apache-2.0', u'gpl-2.0', u'classpath-exception-2.0'] + assert licensing.license_keys(expression, unique=True) == expected + + assert all(s.wrapped for s in licensing.license_symbols(expression, decompose=True)) + + +def test_get_expression_without_lid(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = ('EPL-2.0 OR Apache-2.0 OR ' + 'GPL-2.0 WITH Classpath-exception-2.0 OR ' + 'GPL-2.0') + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + + expected = 'epl-2.0 OR apache-2.0 OR gpl-2.0 WITH classpath-exception-2.0 OR gpl-2.0' + assert expression.render() == expected + + expected = ['epl-2.0', u'apache-2.0', u'gpl-2.0', u'classpath-exception-2.0', u'gpl-2.0'] + assert licensing.license_keys(expression, unique=False) == expected + + assert all(s.wrapped for s in licensing.license_symbols(expression, decompose=True)) + + +def test_get_expression_complex_with_other_spdx_symbols_and_refs(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = ('* SPDX-License-Identifier: ' + 'EPL-2.0 OR Apache-2.0 ' + 'OR GPL-2.0 WITH Classpath-exception-2.0 ' + 'OR LicenseRef-GPL-2.0 WITH Assembly-exception') + + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + + expected = 'epl-2.0 OR apache-2.0 OR gpl-2.0 WITH classpath-exception-2.0 OR gpl-2.0 WITH openjdk-exception' + assert expression.render() == expected + + expected = ['epl-2.0', 'apache-2.0', 'gpl-2.0', 'classpath-exception-2.0', 'gpl-2.0', 'openjdk-exception'] + assert licensing.license_keys(expression, unique=False) == expected + + assert all(s.wrapped for s in licensing.license_symbols(expression, decompose=True)) + + +def test__parse_expression_without_and_raise_exception(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = '* SPDX-License-Identifier: GPL-2.0+ BSD-2-Clause' + try: + _parse_expression(line_text, licensing, spdx_symbols, unknown_symbol) + pytest.fail('exception should be raised') + except: + pass + + +def test_get_expression_without_and_should_not_return_unknown(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = '* SPDX-License-Identifier: GPL-2.0+ BSD-2-Clause' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression != unknown_symbol + + +def test__reparse_invalid_expression_without_or_should_return_a_proper_expression(): + # this is a uboot-style legacy expression without OR + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = 'GPL-2.0+ BSD-2-Clause' + expression = _reparse_invalid_expression(line_text, licensing, spdx_symbols, unknown_symbol) + expected = 'gpl-2.0-plus OR bsd-simplified' + assert expression.render() == expected + + +def test__reparse_invalid_expression_with_improper_keyword_should_return_a_proper_expression(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = 'or GPL-2.0+ BSD-2-Clause ' + expression = _reparse_invalid_expression(line_text, licensing, spdx_symbols, unknown_symbol) + expected = '(gpl-2.0-plus AND bsd-simplified) AND unknown-spdx' + assert expression.render() == expected + + +def test__reparse_invalid_expression_with_non_balanced_parens_should_return_a_proper_expression(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = '(GPL-2.0+ and (BSD-2-Clause ' + expression = _reparse_invalid_expression(line_text, licensing, spdx_symbols, unknown_symbol) + expected = '(gpl-2.0-plus AND bsd-simplified) AND unknown-spdx' + assert expression.render() == expected + + +def test__parse_expression_with_empty_expression_should_raise_ExpressionError(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = '* SPDX-License-Identifier:' + try: + _parse_expression(line_text, licensing, spdx_symbols, unknown_symbol) + pytest.fail('ExpressionError not raised') + except ExpressionError: + pass + + +def test_get_expression_with_empty_expression_should_return_unknown(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = '* SPDX-License-Identifier:' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression == None + + +def test_get_expression_with_empty_expression_should_return_unknown_nuget(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = 'https://licenses.nuget.org/' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression == None + + +def test__parse_expression_with_empty_expression2_should_return_None(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = '' + expression = _parse_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression is None + + +def test_get_expression_with_empty_expression2_should_return_unknown(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = '' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression == None + + +def test_all_spdx_tokens_exists_in_dictionary(): + idx = cache.get_index() + dic = idx.dictionary + licenses = cache.get_licenses_db() + tokens = set(models.get_all_spdx_key_tokens(licenses)) + keys = set(idx.dictionary) + try: + assert tokens.issubset(keys) + except: + for token in tokens: + dic[token] + + +@pytest.mark.parametrize( + 'test, expected', + [ + ('eCos-2.0', 'gpl-2.0-plus WITH ecos-exception-2.0'), + ('GPL-2.0-with-autoconf-exception', 'gpl-2.0 WITH autoconf-exception-2.0'), + ('GPL-2.0-with-bison-exception', 'gpl-2.0 WITH bison-exception-2.2'), + ('GPL-2.0-with-classpath-exception', 'gpl-2.0 WITH classpath-exception-2.0'), + ('GPL-2.0-with-font-exception', 'gpl-2.0 WITH font-exception-gpl'), + ('GPL-2.0-with-GCC-exception', 'gpl-2.0 WITH gcc-linking-exception-2.0'), + ('GPL-3.0-with-autoconf-exception', 'gpl-3.0 WITH autoconf-exception-3.0'), + ('GPL-3.0-with-GCC-exception', 'gpl-3.0 WITH gcc-exception-3.1'), + ('wxWindows', 'lgpl-2.0-plus WITH wxwindows-exception-3.1'), + ] +) +def test_get_expression_works_for_legacy_deprecated_old_spdx_symbols(test, expected): + licensing = Licensing() + symbols_by_spdx = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + result = get_expression( + text=test, + licensing=licensing, + expression_symbols=symbols_by_spdx, + unknown_symbol=unknown_symbol, + ) + assert result.render() == expected + + +def test_get_expression_does_not_fail_on_empty(): + licensing = Licensing() + spdx_symbols = get_spdx_symbols() + unknown_symbol = get_unknown_spdx_symbol() + line_text = 'SPDX-License-Identifier: ' + expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) + assert expression == None + + +def test_Index_match_does_not_fail_on_empty(): + idx = cache.get_index() + matches = list(idx.match(query_string='SPDX-License-Identifier: ')) + assert not matches - expected = [ - ('SPDX-License-Identifier:', '(BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)'), - ('SPDX-License-Identifier:', 'BSD-3-Clause'), - ('SPDX-License-Identifier:', 'BSD-3-Clause'), - ('SPDX-License-Identifier:', 'BSD-3-Clause'), - ('SPDX-License-Identifier:', 'GPL-1.0+ WITH Linux-syscall-note'), - ('SPDX-License-Identifier:', 'GPL-2.0+'), - ('SPDX-License-Identifier:', 'GPL-2.0'), - ('SPDX-License-Identifier:', 'GPL-2.0'), - ('SPDX-License-Identifier:', 'GPL-2.0'), - ('SPDX-License-Identifier:', 'GPL-2.0'), - ('SPDX-License-Identifier:', 'GPL-2.0+'), - ('SPDX-License-Identifier:', 'GPL-2.0+'), - ('SPDX-License-Identifier:', '(GPL-2.0+ OR BSD-3-Clause )'), - ('SPDX-Licence--Identifier:', '(GPL-2.0 OR BSD-3-Clause)'), - ('SPDX-License-Identifier:', 'LGPL-2.1+'), - ('SPDX-License-Identifier:', 'GPL-2.0+'), - ('SPDX-License-Identifier:', 'GPL-2.0+ BSD-2-Clause'), - ('SPDX Licence Identifier', 'LGPL-2.1+') - ] - results = [prepare_text(test) for test in tests] - assert results == expected - def test_prepare_text_nuget(self): - tests = [ - '* https://licenses.nuget.org/(LGPL-2.0-only WITH FLTK-exception OR Apache-2.0)', - '* https://licenses.nuget.org/MIT', - '* https://licenses.nuget.org/(MIT)' - ] - - expected = [ - ('licenses.nuget.org', '(LGPL-2.0-only WITH FLTK-exception OR Apache-2.0)'), - ('licenses.nuget.org', 'MIT'), - ('licenses.nuget.org', '(MIT)'), - ] - results = [prepare_text(test) for test in tests] - assert results == expected - - def test_prepare_text_with_markup(self): - tests = [ - '

SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1

', - 'MIT', - 'Apache-2.0' - ] - expected = [ - ('SPDX-License-Identifier:', 'Apache-2.0 WITH SHL-2.1'), - ('licenses.nuget.org', 'MIT'), - ('licenses.nuget.org', 'Apache-2.0'), - ] - results = [prepare_text(test) for test in tests] - assert results == expected - - def test_prepare_text_with_rem(self): - assert prepare_text('') == (None, '') - assert prepare_text('@REM # SPDX-License-Identifier: BSD-2-Clause-Patent') == ('SPDX-License-Identifier:', 'BSD-2-Clause-Patent') - - def test_split_spdx_lid(self): - test = [ - 'SPDX License Identifier : BSD-3-Clause', - 'SPDX-License-Identifier : BSD-3-Clause', - 'spdx-license- identifier : BSD-3-Clause', - ' SPDX License--Identifier: BSD-3-Clause', - 'SPDX-License-Identifier : BSD-3-Clause', - 'SPDx-Licence-Identifier : BSD-3-Clause', - 'SPD-Licence-Identifier : BSD-3-Clause', - ] - results = [split_spdx_lid(l) for l in test] - expected = [ - ('SPDX License Identifier : ', 'BSD-3-Clause'), - ('SPDX-License-Identifier : ', 'BSD-3-Clause'), - ('spdx-license- identifier : ', 'BSD-3-Clause'), - ('SPDX License--Identifier: ', 'BSD-3-Clause'), - ('SPDX-License-Identifier : ', 'BSD-3-Clause'), - ('SPDx-Licence-Identifier : ', 'BSD-3-Clause'), - (None, 'SPD-Licence-Identifier : BSD-3-Clause'), - ] - assert results == expected - - def test_split_spdx_lid_nuget(self): - test = [ - 'https://licenses.nuget.org/(LGPL-2.0-only WITH FLTK-exception OR Apache-2.0)', - '* https://licenses.nuget.org/(MIT)', - 'https://licenses.nuget.org/MIT', - 'http://licenses.nuget.org/MIT', - 'licenses.nuget.org/MIT', - 'Licenses NuGet ORG MIT', - 'licenses nuget org MIT', - 'licenses MIT', - ] - results = [split_spdx_lid(l) for l in test] - expected = [ - ('licenses.nuget.org', '/(LGPL-2.0-only WITH FLTK-exception OR Apache-2.0)'), - ('licenses.nuget.org', '/(MIT)'), - ('licenses.nuget.org', '/MIT'), - ('licenses.nuget.org', '/MIT'), - ('licenses.nuget.org', '/MIT'), - ('Licenses NuGet ORG ', 'MIT'), - ('licenses nuget org ', 'MIT'), - (None, 'licenses MIT'), - ] - assert results == expected - - def test__split_spdx_lid(self): - test = [ - 'SPDX-License-Identifier: BSD-3-Clause', - 'REM DNL SPDX License Identifier : BSD-3-Clause', - 'SPDX-License-Identifier : BSD-3-Clause', - 'spdx-license- identifier : BSD-3-Clause', - ' SPDX License--Identifier: BSD-3-Clause', - 'SPDX-License-Identifier : BSD-3-Clause', - 'SPDX-License-Identifer : BSD-3-Clause', - 'SPDX--License--Identifer : BSD-3-Clause', - - # weird spellings - 'SPDZ-License-Identifier : BSD-3-Clause', - 'SPDX-Lincense-Identifier : BSD-3-Clause', - 'SPDX-Lisense-Identifier : BSD-3-Clause', - 'SPDX-Licence-Identifier : BSD-3-Clause', - 'SPDX-Licece-Identifier : BSD-3-Clause', - 'SPDZ-Licece-Identifer : BSD-3-Clause', - 'SPDX-Licenses-Identifier : BSD-3-Clause', - 'SPDX - - Licenses - - Identifier : BSD-3-Clause', - ] - results = [_split_spdx_lid(l) for l in test] - expected = [ - ['', 'SPDX-License-Identifier: ', 'BSD-3-Clause'], - ['REM DNL ', 'SPDX License Identifier : ', 'BSD-3-Clause'], - ['', 'SPDX-License-Identifier : ', 'BSD-3-Clause'], - ['', 'spdx-license- identifier : ', 'BSD-3-Clause'], - [' ', 'SPDX License--Identifier: ', 'BSD-3-Clause'], - ['', 'SPDX-License-Identifier : ', 'BSD-3-Clause'], - ['' ,'SPDX-License-Identifer : ', 'BSD-3-Clause'], - ['' ,'SPDX--License--Identifer : ', 'BSD-3-Clause'], - - ['' ,'SPDZ-License-Identifier : ', 'BSD-3-Clause'], - ['' ,'SPDX-Lincense-Identifier : ', 'BSD-3-Clause'], - ['' ,'SPDX-Lisense-Identifier : ', 'BSD-3-Clause'], - ['' ,'SPDX-Licence-Identifier : ', 'BSD-3-Clause'], - ['' ,'SPDX-Licece-Identifier : ', 'BSD-3-Clause'], - ['' ,'SPDZ-Licece-Identifer : ', 'BSD-3-Clause'], - ['' ,'SPDX-Licenses-Identifier : ', 'BSD-3-Clause'], - ['', 'SPDX - - Licenses - - Identifier : ', 'BSD-3-Clause'], - ] - assert results == expected - - def test_get_expression_quoted(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = '''LIST "SPDX-License-Identifier: GPL-2.0"''' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression.render() == 'gpl-2.0' - - def test_get_expression_nuget(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = 'https://licenses.nuget.org/MIT' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression.render() == 'mit' - - def test_get_expression_multiple_or(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = '* SPDX-License-Identifier: (BSD-3-Clause OR EPL-1.0 OR Apache-2.0 OR MIT)' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression.render() == 'bsd-new OR epl-1.0 OR apache-2.0 OR mit' - - def test_get_expression_multiple_or_nuget(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = 'https://licenses.nuget.org/(LGPL-2.0-only WITH FLTK-exception OR Apache-2.0)' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression.render() == 'lgpl-2.0 WITH fltk-exception-lgpl-2.0 OR apache-2.0' - - def test_get_expression_simple(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = '* SPDX-License-Identifier: BSD-3-Clause' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression.render() == 'bsd-new' - - def test_get_expression_with_exception(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = '/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression.render() == 'gpl-1.0-plus WITH linux-syscall-exception-gpl' - - def test_get_expression_with_plus(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = '* SPDX-License-Identifier: GPL-2.0+' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression.render() == 'gpl-2.0-plus' - - def test_get_expression_with_extra_parens(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = '* SPDX-License-Identifier: (GPL-2.0+ OR MIT)' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression.render() == 'gpl-2.0-plus OR mit' - - def test_get_expression_with_extra_parens2(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = 'https://licenses.nuget.org/(GPL-2.0+ OR MIT)' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression.render() == 'gpl-2.0-plus OR mit' - - def test_get_expression_extra_parens_2(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = '// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression.render() == 'gpl-2.0 OR bsd-simplified' - - def test_get_expression_with_parens_and_with(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = '/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) AND MIT) */' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression.render() == 'gpl-2.0 WITH linux-syscall-exception-gpl AND mit' - - def test_get_expression_simple_with(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = '/* SPDX-License-Identifier: LGPL-2.0+ WITH Linux-syscall-note */' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression.render() == 'lgpl-2.0-plus WITH linux-syscall-exception-gpl' - - def test_get_expression_license_ref(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = '/* SPDX-License-Identifier: LicenseRef-ABC */' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression.render() == 'unknown-spdx' - - def test_get_expression_from_html(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = "

SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1

" - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression.render() == 'apache-2.0 WITH shl-2.1' - - def test_get_expression_from_nuget_license_html(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = 'MIT' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression.render() == 'mit' - - def test_get_expression_complex(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = ('* SPDX-License-Identifier: ' - 'EPL-2.0 OR aPache-2.0 OR ' - 'GPL-2.0 WITH classpath-exception-2.0 OR ' - 'GPL-2.0') - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - - expected = 'epl-2.0 OR apache-2.0 OR gpl-2.0 WITH classpath-exception-2.0 OR gpl-2.0' - assert expression.render() == expected - - expected = ['epl-2.0', u'apache-2.0', u'gpl-2.0', u'classpath-exception-2.0'] - assert licensing.license_keys(expression, unique=True) == expected - - assert all(s.wrapped for s in licensing.license_symbols(expression, decompose=True)) - - def test_get_expression_without_lid(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = ('EPL-2.0 OR Apache-2.0 OR ' - 'GPL-2.0 WITH Classpath-exception-2.0 OR ' - 'GPL-2.0') - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - - expected = 'epl-2.0 OR apache-2.0 OR gpl-2.0 WITH classpath-exception-2.0 OR gpl-2.0' - assert expression.render() == expected - - expected = ['epl-2.0', u'apache-2.0', u'gpl-2.0', u'classpath-exception-2.0', u'gpl-2.0'] - assert licensing.license_keys(expression, unique=False) == expected - - assert all(s.wrapped for s in licensing.license_symbols(expression, decompose=True)) - - def test_get_expression_complex_with_other_spdx_symbols_and_refs(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = ('* SPDX-License-Identifier: ' - 'EPL-2.0 OR Apache-2.0 ' - 'OR GPL-2.0 WITH Classpath-exception-2.0 ' - 'OR LicenseRef-GPL-2.0 WITH Assembly-exception') - - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - - expected = 'epl-2.0 OR apache-2.0 OR gpl-2.0 WITH classpath-exception-2.0 OR gpl-2.0 WITH openjdk-exception' - assert expression.render() == expected - - expected = ['epl-2.0', 'apache-2.0', 'gpl-2.0', 'classpath-exception-2.0', 'gpl-2.0', 'openjdk-exception'] - assert licensing.license_keys(expression, unique=False) == expected - - assert all(s.wrapped for s in licensing.license_symbols(expression, decompose=True)) - - def test__parse_expression_without_and_raise_exception(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = '* SPDX-License-Identifier: GPL-2.0+ BSD-2-Clause' - try: - _parse_expression(line_text, licensing, spdx_symbols, unknown_symbol) - self.fail('exception should be raised') - except: - pass - - def test_get_expression_without_and_should_not_return_unknown(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = '* SPDX-License-Identifier: GPL-2.0+ BSD-2-Clause' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression != unknown_symbol - - def test__reparse_invalid_expression_without_or_should_return_a_proper_expression(self): - # this is a uboot-style legacy expression without OR - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = 'GPL-2.0+ BSD-2-Clause' - expression = _reparse_invalid_expression(line_text, licensing, spdx_symbols, unknown_symbol) - expected = 'gpl-2.0-plus OR bsd-simplified' - assert expression.render() == expected - - def test__reparse_invalid_expression_with_improper_keyword_should_return_a_proper_expression(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = 'or GPL-2.0+ BSD-2-Clause ' - expression = _reparse_invalid_expression(line_text, licensing, spdx_symbols, unknown_symbol) - expected = '(gpl-2.0-plus AND bsd-simplified) AND unknown-spdx' - assert expression.render() == expected - - def test__reparse_invalid_expression_with_non_balanced_parens_should_return_a_proper_expression(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = '(GPL-2.0+ and (BSD-2-Clause ' - expression = _reparse_invalid_expression(line_text, licensing, spdx_symbols, unknown_symbol) - expected = '(gpl-2.0-plus AND bsd-simplified) AND unknown-spdx' - assert expression.render() == expected - - def test__parse_expression_with_empty_expression_should_raise_ExpressionError(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = '* SPDX-License-Identifier:' - try: - _parse_expression(line_text, licensing, spdx_symbols, unknown_symbol) - self.fail('ExpressionError not raised') - except ExpressionError: - pass - - def test_get_expression_with_empty_expression_should_return_unknown(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = '* SPDX-License-Identifier:' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression == None - - def test_get_expression_with_empty_expression_should_return_unknown_nuget(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = 'https://licenses.nuget.org/' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression == None - - def test__parse_expression_with_empty_expression2_should_return_None(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = '' - expression = _parse_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression is None - - def test_get_expression_with_empty_expression2_should_return_unknown(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = '' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression == None - - def test_all_spdx_tokens_exists_in_dictionary(self): - idx = cache.get_index() - dic = idx.dictionary - licenses = cache.get_licenses_db() - tokens = set(models.get_all_spdx_key_tokens(licenses)) - keys = set(idx.dictionary) - try: - assert tokens.issubset(keys) - except: - for token in tokens: - dic[token] - - def test_get_expression_works_for_legacy_deprecated_old_spdx_symbols(self): - exp_by_old = { - 'eCos-2.0': 'gpl-2.0-plus WITH ecos-exception-2.0', - 'GPL-2.0-with-autoconf-exception': 'gpl-2.0 WITH autoconf-exception-2.0', - 'GPL-2.0-with-bison-exception': 'gpl-2.0 WITH bison-exception-2.2', - 'GPL-2.0-with-classpath-exception': 'gpl-2.0 WITH classpath-exception-2.0', - 'GPL-2.0-with-font-exception': 'gpl-2.0 WITH font-exception-gpl', - 'GPL-2.0-with-GCC-exception': 'gpl-2.0 WITH gcc-linking-exception-2.0', - 'GPL-3.0-with-autoconf-exception': 'gpl-3.0 WITH autoconf-exception-3.0', - 'GPL-3.0-with-GCC-exception': 'gpl-3.0 WITH gcc-exception-3.1', - 'wxWindows': 'lgpl-2.0-plus WITH wxwindows-exception-3.1', - } - licensing = Licensing() - symbols_by_spdx = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - - for test, expected in exp_by_old.items(): - result = get_expression( - test, licensing, symbols_by_spdx, unknown_symbol).render() - assert result == expected +class TestMatchSpdx(FileBasedTesting): + test_data_dir = TEST_DATA_DIR def test_spdx_match_contains_spdx_prefix(self): from licensedcode import index @@ -719,16 +661,3 @@ def test_spdx_match_contains_spdx_prefix(self): expected_itext = 'spdx license identifier bsd 3 clause no nuclear warranty' assert itext == expected_itext - def test_get_expression_does_not_fail_on_empty(self): - licensing = Licensing() - spdx_symbols = get_spdx_symbols() - unknown_symbol = get_unknown_spdx_symbol() - line_text = 'SPDX-License-Identifier: ' - expression = get_expression(line_text, licensing, spdx_symbols, unknown_symbol) - assert expression == None - - def test_Index_match_does_not_fail_on_empty(self): - idx = cache.get_index() - matches = list(idx.match(query_string='SPDX-License-Identifier: ')) - assert not matches -