diff --git a/src/packagedcode/pypi.py b/src/packagedcode/pypi.py index d7e710e877..d99af91d61 100644 --- a/src/packagedcode/pypi.py +++ b/src/packagedcode/pypi.py @@ -74,6 +74,8 @@ def logger_debug(*args): def logger_debug(*args): return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args)) +def is_private_package(classifiers): + return any('Private ::' in classifier for classifier in classifiers if classifier) class PythonEggPkgInfoFile(models.DatafileHandler): datasource_id = 'pypi_egg_pkginfo' @@ -473,7 +475,8 @@ def parse(cls, location, package_only=False): description = project_data.get('description') or '' description = description.strip() - urls, extra_data = get_urls(metainfo=project_data, name=name, version=version) + is_private = is_private_package(project_data.get('classifiers', [])) + urls, extra_data = get_urls(metainfo=project_data, name=name, version=version,is_private=is_private) extracted_license_statement, license_file = get_declared_license(project_data) if license_file: @@ -504,12 +507,13 @@ def parse(cls, location, package_only=False): keywords=get_keywords(project_data), parties=get_pyproject_toml_parties(project_data), dependencies=dependencies, + is_private=is_private, extra_data=extra_data, **urls, + download_url=urls.get('download'), ) yield models.PackageData.from_data(package_data, package_only) - def is_poetry_pyproject_toml(location): with open(location, 'r') as file: data = file.read() @@ -698,6 +702,8 @@ def parse(cls, location, package_only=False): ) dependencies.append(dependency.to_dict()) + is_private = is_private_package(poetry_data.get('classifiers', [])) + package_data = dict( datasource_id=cls.datasource_id, type=cls.default_package_type, @@ -709,6 +715,7 @@ def parse(cls, location, package_only=False): keywords=get_keywords(poetry_data), parties=get_pyproject_toml_parties(poetry_data), extra_data=extra_data, + is_private=is_private, dependencies=dependencies, **urls, ) @@ -976,6 +983,9 @@ def parse_metadata(location, datasource_id, package_type, package_only=False): if license_file: extra_data['license_file'] = license_file + classifiers = get_attribute(meta, 'Classifier', multiple=True) + is_private = is_private_package(classifiers) + # FIXME: We are getting dependencies from other sibling files, this is duplicated # data at the package_data level, is this necessary? We also have the entire dependency # relationships here at requires.txt present in ``.egg-info`` should we store these @@ -996,6 +1006,7 @@ def parse_metadata(location, datasource_id, package_type, package_only=False): dependencies=dependencies, file_references=file_references, extra_data=extra_data, + is_private=is_private, **urls, ) return models.PackageData.from_data(package_data, package_only) @@ -1161,7 +1172,9 @@ def parse(cls, location, package_only=False): # search for possible dunder versions here and elsewhere version = detect_version_attribute(location) - urls, extra_data = get_urls(metainfo=setup_args, name=name, version=version) + is_private = is_private_package(setup_args.get('classifiers', [])) + + urls, extra_data = get_urls(metainfo=setup_args, name=name, version=version,is_private=is_private) dependencies = get_setup_py_dependencies(setup_args) python_requires = get_setup_py_python_requires(setup_args) @@ -1171,6 +1184,7 @@ def parse(cls, location, package_only=False): if license_file: extra_data['license_file'] = license_file + package_data = dict( datasource_id=cls.datasource_id, type=cls.default_package_type, @@ -1182,6 +1196,7 @@ def parse(cls, location, package_only=False): extracted_license_statement=extracted_license_statement, dependencies=dependencies, keywords=get_keywords(setup_args), + is_private=is_private, extra_data=extra_data, **urls, ) @@ -1300,6 +1315,9 @@ def parse(cls, location, package_only=False): extracted_license_statement = '' extracted_license_statement += f" license_files: {license_file_references}" + classifiers = parser.get('metadata', 'classifiers', fallback='').splitlines() + is_private = is_private_package(classifiers) + package_data = dict( datasource_id=cls.datasource_id, type=cls.default_package_type, @@ -1309,6 +1327,7 @@ def parse(cls, location, package_only=False): homepage_url=metadata.get('url'), primary_language=cls.default_primary_language, dependencies=dependent_packages, + is_private=is_private, extracted_license_statement=extracted_license_statement, ) yield models.PackageData.from_data(package_data, package_only) @@ -2243,7 +2262,7 @@ def get_pypi_urls(name, version, **kwargs): ) -def get_urls(metainfo, name, version, poetry=False): +def get_urls(metainfo, name, version, is_private=False, poetry=False): """ Return a mapping of standard URLs and a mapping of extra-data URls for URLs of this package: @@ -2285,6 +2304,9 @@ def get_urls(metainfo, name, version, poetry=False): # Project-URL: Say Thanks! extra_data = {} + if is_private: + return {}, {} + urls = get_pypi_urls(name, version) def add_url(_url, _utype=None, _attribute=None): diff --git a/src/packagedcode/utils.py b/src/packagedcode/utils.py index 727792b95e..f4249e87dd 100644 --- a/src/packagedcode/utils.py +++ b/src/packagedcode/utils.py @@ -304,3 +304,4 @@ def is_simple_path(path): def is_simple_path_pattern(path): return path.endswith('*') and path.count('*') == 1 + diff --git a/tests/packagedcode/data/pypi/develop/PKG-INFO b/tests/packagedcode/data/pypi/develop/PKG-INFO new file mode 100644 index 0000000000..03d906228f --- /dev/null +++ b/tests/packagedcode/data/pypi/develop/PKG-INFO @@ -0,0 +1,5 @@ +Metadata-Version: 2.1 +Name: example_egg +Version: 1.0.0 +Classifier: Development Status :: 5 - Production/Stable +Classifier: Private :: Do Not Upload \ No newline at end of file diff --git a/tests/packagedcode/data/pypi/develop/private-classifier-egg-info-expected.json b/tests/packagedcode/data/pypi/develop/private-classifier-egg-info-expected.json new file mode 100644 index 0000000000..b6ec9bc832 --- /dev/null +++ b/tests/packagedcode/data/pypi/develop/private-classifier-egg-info-expected.json @@ -0,0 +1,49 @@ +[ + { + "type": "pypi", + "namespace": null, + "name": "example_egg", + "version": "1.0.0", + "qualifiers": {}, + "subpath": null, + "primary_language": "Python", + "description": "", + "release_date": null, + "parties": [], + "keywords": [ + "Development Status :: 5 - Production/Stable", + "Private :: Do Not Upload" + ], + "homepage_url": null, + "download_url": null, + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "holder": null, + "declared_license_expression": null, + "declared_license_expression_spdx": null, + "license_detections": [], + "other_license_expression": null, + "other_license_expression_spdx": null, + "other_license_detections": [], + "extracted_license_statement": null, + "notice_text": null, + "source_packages": [], + "file_references": [], + "is_private": true, + "is_virtual": false, + "extra_data": {}, + "dependencies": [], + "repository_homepage_url": "https://pypi.org/project/example_egg", + "repository_download_url": "https://pypi.org/packages/source/e/example_egg/example_egg-1.0.0.tar.gz", + "api_data_url": "https://pypi.org/pypi/example_egg/1.0.0/json", + "datasource_id": "pypi_egg_info", + "purl": "pkg:pypi/example-egg@1.0.0" + } +] \ No newline at end of file diff --git a/tests/packagedcode/data/pypi/pyproject-toml/standard/private-classifier-pyproject.toml-expected.json b/tests/packagedcode/data/pypi/pyproject-toml/standard/private-classifier-pyproject.toml-expected.json new file mode 100644 index 0000000000..a188cdc385 --- /dev/null +++ b/tests/packagedcode/data/pypi/pyproject-toml/standard/private-classifier-pyproject.toml-expected.json @@ -0,0 +1,114 @@ +[ + { + "type": "pypi", + "namespace": null, + "name": "titanic_ml", + "version": "0.1.0", + "qualifiers": {}, + "subpath": null, + "primary_language": "Python", + "description": "titanic_ml example package", + "release_date": null, + "parties": [ + { + "type": "person", + "role": "author", + "name": "Niels Zeilemaker", + "email": "nielszeilemaker@xebia.com", + "url": null + } + ], + "keywords": [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Private :: Do Not Upload" + ], + "homepage_url": null, + "download_url": null, + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "holder": null, + "declared_license_expression": null, + "declared_license_expression_spdx": null, + "license_detections": [], + "other_license_expression": null, + "other_license_expression_spdx": null, + "other_license_detections": [], + "extracted_license_statement": null, + "notice_text": null, + "source_packages": [], + "file_references": [], + "is_private": true, + "is_virtual": false, + "extra_data": {}, + "dependencies": [ + { + "purl": "pkg:pypi/pyspark", + "extracted_requirement": null, + "scope": "install", + "is_runtime": true, + "is_optional": false, + "is_pinned": false, + "is_direct": true, + "resolved_package": {}, + "extra_data": {} + }, + { + "purl": "pkg:pypi/sklearn", + "extracted_requirement": null, + "scope": "install", + "is_runtime": true, + "is_optional": false, + "is_pinned": false, + "is_direct": true, + "resolved_package": {}, + "extra_data": {} + }, + { + "purl": "pkg:pypi/tox", + "extracted_requirement": null, + "scope": "dev", + "is_runtime": true, + "is_optional": true, + "is_pinned": false, + "is_direct": true, + "resolved_package": {}, + "extra_data": {} + }, + { + "purl": "pkg:pypi/pre-commit", + "extracted_requirement": null, + "scope": "dev", + "is_runtime": true, + "is_optional": true, + "is_pinned": false, + "is_direct": true, + "resolved_package": {}, + "extra_data": {} + }, + { + "purl": "pkg:pypi/bump2version", + "extracted_requirement": null, + "scope": "dev", + "is_runtime": true, + "is_optional": true, + "is_pinned": false, + "is_direct": true, + "resolved_package": {}, + "extra_data": {} + } + ], + "repository_homepage_url": null, + "repository_download_url": null, + "api_data_url": null, + "datasource_id": "pypi_pyproject_toml", + "purl": "pkg:pypi/titanic-ml@0.1.0" + } +] \ No newline at end of file diff --git a/tests/packagedcode/data/pypi/pyproject-toml/standard/python-private-classifier/pyproject.toml b/tests/packagedcode/data/pypi/pyproject-toml/standard/python-private-classifier/pyproject.toml new file mode 100644 index 0000000000..d03383271f --- /dev/null +++ b/tests/packagedcode/data/pypi/pyproject-toml/standard/python-private-classifier/pyproject.toml @@ -0,0 +1,29 @@ +#Taken from : https://xebia.com/blog/minimal-pyproject-toml-example/ + +[project] +name = "titanic_ml" +description = "titanic_ml example package" +version = "0.1.0" +authors = [ + { name = "Niels Zeilemaker", email = "nielszeilemaker@xebia.com" } +] +dependencies = [ + "pyspark[ml]", + "sklearn" +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Private :: Do Not Upload" +] + +[project.optional-dependencies] +dev = [ + "tox", + "pre-commit", + "bump2version" +] + +[build-system] +build-backend = "flit_core.buildapi" +requires = ["flit_core >=3.2,<4"] diff --git a/tests/packagedcode/data/pypi/setup.py/private-classifier-setup.py b/tests/packagedcode/data/pypi/setup.py/private-classifier-setup.py new file mode 100644 index 0000000000..574fc24db9 --- /dev/null +++ b/tests/packagedcode/data/pypi/setup.py/private-classifier-setup.py @@ -0,0 +1,10 @@ +from setuptools import setup + +setup( + name="example_setup", + version="1.0.0", + classifiers=[ + "Development Status :: 5 - Production/Stable", + "Private :: Do Not Upload", + ], +) \ No newline at end of file diff --git a/tests/packagedcode/data/pypi/setup.py/private-classifier-setup.py.expected.json b/tests/packagedcode/data/pypi/setup.py/private-classifier-setup.py.expected.json new file mode 100644 index 0000000000..66208de11a --- /dev/null +++ b/tests/packagedcode/data/pypi/setup.py/private-classifier-setup.py.expected.json @@ -0,0 +1,49 @@ +[ + { + "type": "pypi", + "namespace": null, + "name": "example_setup", + "version": "1.0.0", + "qualifiers": {}, + "subpath": null, + "primary_language": "Python", + "description": "", + "release_date": null, + "parties": [], + "keywords": [ + "Development Status :: 5 - Production/Stable", + "Private :: Do Not Upload" + ], + "homepage_url": null, + "download_url": null, + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "holder": null, + "declared_license_expression": null, + "declared_license_expression_spdx": null, + "license_detections": [], + "other_license_expression": null, + "other_license_expression_spdx": null, + "other_license_detections": [], + "extracted_license_statement": null, + "notice_text": null, + "source_packages": [], + "file_references": [], + "is_private": true, + "is_virtual": false, + "extra_data": {}, + "dependencies": [], + "repository_homepage_url": null, + "repository_download_url": null, + "api_data_url": null, + "datasource_id": "pypi_setup_py", + "purl": "pkg:pypi/example-setup@1.0.0" + } +] \ No newline at end of file diff --git a/tests/packagedcode/test_pypi.py b/tests/packagedcode/test_pypi.py index 984cd8eb38..c3870e37a6 100644 --- a/tests/packagedcode/test_pypi.py +++ b/tests/packagedcode/test_pypi.py @@ -369,7 +369,23 @@ def test_parse_pyproject_toml_standard_lc0(self): expected_loc = self.get_test_loc('pypi/pyproject-toml/standard/lc0-pyproject.toml-expected.json') self.check_packages_data(package, expected_loc, regen=REGEN_TEST_FIXTURES) - + def test_parse_pyproject_toml_private_package(self): + test_file = self.get_test_loc('pypi/pyproject-toml/standard/python-private-classifier/pyproject.toml') + packages = pypi.PyprojectTomlHandler.parse(test_file) + expected_loc = self.get_test_loc('pypi/pyproject-toml/standard/private-classifier-pyproject.toml-expected.json') + self.check_packages_data(packages, expected_loc,regen=REGEN_TEST_FIXTURES) + + def test_parse_setup_py_private_package(self): + test_file = self.get_test_loc('pypi/setup.py/private-classifier-setup.py') + packages = pypi.PythonSetupPyHandler.parse(test_file) + expected_loc = self.get_test_loc('pypi/setup.py/private-classifier-setup.py.expected.json') + self.check_packages_data(packages, expected_loc, regen=True) + + def test_parse_egg_info_private_package(self): + test_file = self.get_test_loc('pypi/develop/PKG-INFO') + packages = pypi.parse_metadata(test_file, datasource_id="pypi_egg_info", package_type="pypi") + expected_loc = self.get_test_loc('pypi/develop/private-classifier-egg-info-expected.json') + self.check_packages_data([packages], expected_loc, regen=True) class TestPoetryHandler(PackageTester): def test_is_pyproject_toml_poetry(self):