Skip to content

Commit 69ea6b7

Browse files
Add npm and pnpm workspace support #3746
Reference: #3746 Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent ae2af48 commit 69ea6b7

File tree

27 files changed

+186779
-143
lines changed

27 files changed

+186779
-143
lines changed

src/packagedcode/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@
151151
npm.YarnLockV2Handler,
152152
npm.PnpmShrinkwrapYamlHandler,
153153
npm.PnpmLockYamlHandler,
154+
npm.PnpmWorkspaceYamlHandler,
154155

155156
nuget.NugetNupkgHandler,
156157
nuget.NugetNuspecHandler,

src/packagedcode/npm.py

Lines changed: 199 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#
99
import base64
1010
import io
11+
import fnmatch
1112
import os
1213
import logging
1314
import json
@@ -101,55 +102,77 @@ def assemble(cls, package_data, resource, codebase, package_adder):
101102
if package_resource:
102103
package_resource = package_resource[0]
103104

104-
if package_resource:
105-
assert len(package_resource.package_data) == 1, f'Invalid package.json for {package_resource.path}'
106-
pkg_data = package_resource.package_data[0]
107-
pkg_data = models.PackageData.from_dict(pkg_data)
105+
if not package_resource:
106+
# we do not have a package.json
107+
yield from yield_dependencies_from_package_resource(resource)
108+
return
109+
110+
assert len(package_resource.package_data) == 1, f'Invalid package.json for {package_resource.path}'
111+
pkg_data = package_resource.package_data[0]
112+
pkg_data = models.PackageData.from_dict(pkg_data)
113+
114+
workspace_root_path = package_resource.parent(codebase).path
115+
workspaces = pkg_data.extra_data.get('workspaces') or []
116+
# Also look for pnpm workspaces
117+
if not workspaces:
118+
pnpm_workspace_path = os.path.join(workspace_root_path, 'pnpm-workspace.yaml')
119+
pnpm_workspace = codebase.get_resource(path=pnpm_workspace_path)
120+
if pnpm_workspace:
121+
pnpm_workspace_pkg_data = pnpm_workspace.package_data
122+
if pnpm_workspace_pkg_data:
123+
workspace_package = pnpm_workspace_pkg_data[0]
124+
extra_data = workspace_package.get('extra_data')
125+
workspaces = extra_data.get('workspaces')
126+
127+
workspace_members = cls.get_workspace_members(
128+
workspaces=workspaces,
129+
codebase=codebase,
130+
workspace_root_path=workspace_root_path,
131+
)
108132

109-
# do we have enough to create a package?
110-
if pkg_data.purl:
111-
package = models.Package.from_package_data(
112-
package_data=pkg_data,
113-
datafile_path=package_resource.path,
114-
)
115-
package_uid = package.package_uid
133+
cls.update_workspace_members(workspace_members, codebase)
116134

117-
package.populate_license_fields()
135+
# do we have enough to create a package?
136+
if pkg_data.purl:
137+
package = models.Package.from_package_data(
138+
package_data=pkg_data,
139+
datafile_path=package_resource.path,
140+
)
141+
package_uid = package.package_uid
118142

119-
# Always yield the package resource in all cases and first!
120-
yield package
143+
package.populate_license_fields()
121144

122-
root = package_resource.parent(codebase)
123-
if root:
124-
for npm_res in cls.walk_npm(resource=root, codebase=codebase):
125-
if package_uid and package_uid not in npm_res.for_packages:
126-
package_adder(package_uid, npm_res, codebase)
127-
yield npm_res
128-
elif codebase.has_single_resource:
129-
if package_uid and package_uid not in package_resource.for_packages:
130-
package_adder(package_uid, package_resource, codebase)
131-
yield package_resource
145+
# Always yield the package resource in all cases and first!
146+
yield package
147+
148+
root = package_resource.parent(codebase)
149+
if root:
150+
for npm_res in cls.walk_npm(resource=root, codebase=codebase):
151+
if package_uid and package_uid not in npm_res.for_packages:
152+
package_adder(package_uid, npm_res, codebase)
153+
yield npm_res
154+
elif codebase.has_single_resource:
155+
if package_uid and package_uid not in package_resource.for_packages:
156+
package_adder(package_uid, package_resource, codebase)
157+
yield package_resource
132158

133-
else:
134-
# we have no package, so deps are not for a specific package uid
135-
package_uid = None
159+
else:
160+
# we have no package, so deps are not for a specific package uid
161+
package_uid = None
136162

137-
# in all cases yield possible dependencies
138-
yield from yield_dependencies_from_package_data(pkg_data, package_resource.path, package_uid)
163+
# in all cases yield possible dependencies
164+
yield from yield_dependencies_from_package_data(pkg_data, package_resource.path, package_uid)
139165

140-
# we yield this as we do not want this further processed
141-
yield package_resource
166+
# we yield this as we do not want this further processed
167+
yield package_resource
142168

143-
for lock_file in package_resource.siblings(codebase):
144-
if lock_file.name in lockfile_names:
145-
yield from yield_dependencies_from_package_resource(lock_file, package_uid)
169+
for lock_file in package_resource.siblings(codebase):
170+
if lock_file.name in lockfile_names:
171+
yield from yield_dependencies_from_package_resource(lock_file, package_uid)
146172

147-
if package_uid and package_uid not in lock_file.for_packages:
148-
package_adder(package_uid, lock_file, codebase)
149-
yield lock_file
150-
else:
151-
# we do not have a package.json
152-
yield from yield_dependencies_from_package_resource(resource)
173+
if package_uid and package_uid not in lock_file.for_packages:
174+
package_adder(package_uid, lock_file, codebase)
175+
yield lock_file
153176

154177
@classmethod
155178
def walk_npm(cls, resource, codebase, depth=0):
@@ -244,6 +267,100 @@ def update_dependencies_by_purl(
244267
)
245268
dependecies_by_purl[dep_purl] = dep_package
246269

270+
@classmethod
271+
def get_workspace_members(cls, workspaces, codebase, workspace_root_path):
272+
"""
273+
Given the workspaces, a list of paths/glob path patterns for npm
274+
workspaces present in package.json, the codebase, and the
275+
workspace_root_path, which is the parent directory of the
276+
package.json which contains the workspaces, get a list of
277+
workspace member package.json resources.
278+
"""
279+
280+
workspace_members = []
281+
282+
for workspace_path in workspaces:
283+
284+
# Case 1: A definite path, instead of a pattern (only one package.json)
285+
if '*' not in workspace_path:
286+
287+
workspace_dir_path = os.path.join(workspace_root_path, workspace_path)
288+
workspace_member_path = os.path.join(workspace_dir_path, 'package.json')
289+
workspace_member = codebase.get_resource(path=workspace_member_path)
290+
if workspace_member and workspace_member.package_data:
291+
workspace_members.append(workspace_member)
292+
293+
# Case 2: we have glob path which is a directory, relative to the workspace root
294+
# Here we have only one * at the last (This is an optimization, this is a very
295+
# commonly encountered subcase of case 3)
296+
elif '*' == workspace_path[-1] and '*' not in workspace_path.replace('*', ''):
297+
workspace_pattern_prefix = workspace_path.replace('*', '')
298+
workspace_dir_path = os.path.join(workspace_root_path, workspace_pattern_prefix)
299+
workspace_search_dir = codebase.get_resource(path=workspace_dir_path)
300+
if not workspace_search_dir:
301+
continue
302+
303+
for resource in workspace_search_dir.walk(codebase):
304+
if resource.package_data and NpmPackageJsonHandler.is_datafile(
305+
location=resource.location,
306+
):
307+
workspace_members.append(resource)
308+
309+
# Case 3: This is a complex glob pattern, we are doing a full codebase walk
310+
# and glob matching each resource
311+
else:
312+
for resource in workspace_root_path:
313+
if NpmPackageJsonHandler.is_datafile(resource.location) and fnmatch.fnmatch(
314+
name=resource.location, pat=workspace_path,
315+
):
316+
workspace_members.append(resource)
317+
318+
return workspace_members
319+
320+
@classmethod
321+
def update_workspace_members(cls, workspace_members, codebase):
322+
"""
323+
"""
324+
# Collect info needed from all workspace member
325+
workspace_package_versions_by_base_purl = {}
326+
workspace_dependencies_by_base_purl = {}
327+
for workspace_manifest in workspace_members:
328+
workspace_package_data = workspace_manifest.package_data[0]
329+
330+
dependencies = workspace_package_data.get('dependencies')
331+
for dependency in dependencies:
332+
dep_purl = dependency.get('purl')
333+
workspace_dependencies_by_base_purl[dep_purl] = dependency
334+
335+
is_private = workspace_package_data.get("is_private")
336+
package_url = workspace_package_data.get('purl')
337+
if is_private or not package_url:
338+
continue
339+
340+
purl = PackageURL.from_string(package_url)
341+
base_purl = PackageURL(
342+
type=purl.type,
343+
namespace=purl.namespace,
344+
name=purl.name,
345+
).to_string()
346+
347+
version = workspace_package_data.get('version')
348+
if purl and version:
349+
workspace_package_versions_by_base_purl[base_purl] = version
350+
351+
# Update workspace member package information from
352+
# workspace level data
353+
for base_purl, dependency in workspace_dependencies_by_base_purl.items():
354+
extracted_requirement = dependency.get('extracted_requirement')
355+
if 'workspace' in extracted_requirement:
356+
version = workspace_package_versions_by_base_purl.get(base_purl)
357+
if version:
358+
new_requirement = extracted_requirement.replace('workspace', version)
359+
dependency['extracted_requirement'] = new_requirement
360+
361+
for member in workspace_members:
362+
member.save(codebase)
363+
247364

248365
def get_urls(namespace, name, version, **kwargs):
249366
return dict(
@@ -303,18 +420,27 @@ def _parse(cls, json_data, package_only=False):
303420
('author', partial(party_mapper, party_type='author')),
304421
('contributors', partial(party_mapper, party_type='contributor')),
305422
('maintainers', partial(party_mapper, party_type='maintainer')),
306-
307423
('dependencies', partial(deps_mapper, field_name='dependencies')),
308424
('devDependencies', partial(deps_mapper, field_name='devDependencies')),
309425
('peerDependencies', partial(deps_mapper, field_name='peerDependencies')),
310426
('optionalDependencies', partial(deps_mapper, field_name='optionalDependencies')),
311427
('bundledDependencies', bundle_deps_mapper),
428+
('resolutions', partial(deps_mapper, field_name='resolutions')),
312429
('repository', partial(vcs_repository_mapper, vcs_revision=vcs_revision)),
313430
('keywords', keywords_mapper,),
314431
('bugs', bugs_mapper),
315432
('dist', dist_mapper),
316433
]
317434

435+
extra_data = {}
436+
extra_data_fields = ['workspaces', 'engines', 'packageManager']
437+
for extra_data_field in extra_data_fields:
438+
value = json_data.get(extra_data_field)
439+
if value:
440+
extra_data[extra_data_field] = value
441+
442+
package.extra_data = extra_data
443+
318444
for source, func in field_mappers:
319445
value = json_data.get(source) or None
320446
if value:
@@ -1034,6 +1160,36 @@ class PnpmLockYamlHandler(BasePnpmLockHandler):
10341160
documentation_url = 'https://github.com/pnpm/spec/blob/master/lockfile/6.0.md'
10351161

10361162

1163+
class PnpmWorkspaceYamlHandler(models.NonAssemblableDatafileHandler):
1164+
datasource_id = 'pnpm_workspace_yaml'
1165+
path_patterns = ('*/pnpm-workspace.yaml',)
1166+
default_package_type = 'npm'
1167+
default_primary_language = 'JavaScript'
1168+
description = 'pnpm workspace yaml file'
1169+
documentation_url = 'https://pnpm.io/pnpm-workspace_yaml'
1170+
1171+
@classmethod
1172+
def parse(cls, location, package_only=False):
1173+
"""
1174+
Parses and gets pnpm workspace locations from the file.
1175+
"""
1176+
with open(location) as yl:
1177+
workspace_data = saneyaml.load(yl.read())
1178+
1179+
workspaces = workspace_data.get('packages')
1180+
if workspaces:
1181+
extra_data = {
1182+
'workspaces': workspaces,
1183+
}
1184+
root_package_data = dict(
1185+
datasource_id=cls.datasource_id,
1186+
type=cls.default_package_type,
1187+
primary_language=cls.default_primary_language,
1188+
extra_data=extra_data,
1189+
)
1190+
yield models.PackageData.from_data(root_package_data)
1191+
1192+
10371193
def get_checksum_and_url(url):
10381194
"""
10391195
Return a mapping of {download_url, sha1} where the checksum can be a
@@ -1430,11 +1586,13 @@ def deps_mapper(deps, package, field_name):
14301586
https://docs.npmjs.com/files/package.json#devdependencies
14311587
https://docs.npmjs.com/files/package.json#optionaldependencies
14321588
"""
1589+
#TODO: verify, merge and use logic at BaseNpmHandler.update_dependencies_by_purl
14331590
npm_dependency_scopes_attributes = {
14341591
'dependencies': dict(is_runtime=True, is_optional=False),
14351592
'devDependencies': dict(is_runtime=False, is_optional=True),
14361593
'peerDependencies': dict(is_runtime=True, is_optional=False),
14371594
'optionalDependencies': dict(is_runtime=True, is_optional=True),
1595+
'resolutions': dict(is_runtime=True, is_optional=False, is_resolved=True),
14381596
}
14391597
dependencies = package.dependencies
14401598

tests/formattedcode/data/common/manifests-expected.json

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,11 @@
176176
"notice_text": null,
177177
"source_packages": [],
178178
"is_private": false,
179-
"extra_data": {},
179+
"extra_data": {
180+
"engines": {
181+
"node": ">= 0.8.0"
182+
}
183+
},
180184
"repository_homepage_url": "https://www.npmjs.com/package/grunt-esvm",
181185
"repository_download_url": "https://registry.npmjs.org/grunt-esvm/-/grunt-esvm-3.2.8.tgz",
182186
"api_data_url": "https://registry.npmjs.org/grunt-esvm/3.2.8",
@@ -1101,7 +1105,11 @@
11011105
"source_packages": [],
11021106
"file_references": [],
11031107
"is_private": false,
1104-
"extra_data": {},
1108+
"extra_data": {
1109+
"engines": {
1110+
"node": ">= 0.8.0"
1111+
}
1112+
},
11051113
"dependencies": [
11061114
{
11071115
"purl": "pkg:npm/bluebird",

tests/formattedcode/data/common/manifests-expected.jsonlines

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
"system_environment": {
2121
"operating_system": "linux",
2222
"cpu_architecture": "64",
23-
"platform": "Linux-5.15.0-106-generic-x86_64-with-glibc2.35",
24-
"platform_version": "#116-Ubuntu SMP Wed Apr 17 09:17:56 UTC 2024",
23+
"platform": "Linux-5.15.0-107-generic-x86_64-with-glibc2.35",
24+
"platform_version": "#117-Ubuntu SMP Fri Apr 26 12:26:49 UTC 2024",
2525
"python_version": "3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]"
2626
},
2727
"spdx_license_list_version": "3.23",
@@ -208,7 +208,11 @@
208208
"notice_text": null,
209209
"source_packages": [],
210210
"is_private": false,
211-
"extra_data": {},
211+
"extra_data": {
212+
"engines": {
213+
"node": ">= 0.8.0"
214+
}
215+
},
212216
"repository_homepage_url": "https://www.npmjs.com/package/grunt-esvm",
213217
"repository_download_url": "https://registry.npmjs.org/grunt-esvm/-/grunt-esvm-3.2.8.tgz",
214218
"api_data_url": "https://registry.npmjs.org/grunt-esvm/3.2.8",
@@ -1155,7 +1159,11 @@
11551159
"source_packages": [],
11561160
"file_references": [],
11571161
"is_private": false,
1158-
"extra_data": {},
1162+
"extra_data": {
1163+
"engines": {
1164+
"node": ">= 0.8.0"
1165+
}
1166+
},
11591167
"dependencies": [
11601168
{
11611169
"purl": "pkg:npm/bluebird",

0 commit comments

Comments
 (0)