Skip to content

Commit 276bc2f

Browse files
committed
feat(scanner): Merge duplicate scan results that share a provenance
When the SpdxDocumentFile package manager is used, the *project* and all contained *packages* often resolve to the **same VCS provenance** (e.g. the root of the Git repository). Before this change ORT stored two separate `ScanResult`s for such a provenance – one keyed to the project, one keyed to the package. That caused two follow-on problems: * Both results appeared in the `OrtResult`, so evaluators saw **duplicate findings** for the *same* source tree. * Because projects and packages are handled by different rules the package result was additionally **padded with a `SpdxConstants.NONE` finding** whenever `includeFilesWithoutFindings` was enabled. The evaluator therefore compared *real* license findings from the project result with `NONE` from the package result and failed with a violation. This patch * groups scan results by the pair `(provenance, scanner)` and folds them into a single `ScanResult`, * unions the inner finding sets to avoid duplicates, and * performs the "pad with NONE" step only **after** deduplication, so every path is represented exactly once. As a consequence the evaluator now receives one consistent set of license findings per provenance / scanner, eliminating the false mismatch. Signed-off-by: Jonatan Männchen <jonatan@maennchen.ch>
1 parent 98eb203 commit 276bc2f

6 files changed

+335
-78
lines changed

scanner/src/funTest/kotlin/scanners/ScannerIntegrationFunTest.kt

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import org.ossreviewtoolkit.model.Package
3434
import org.ossreviewtoolkit.model.PackageReference
3535
import org.ossreviewtoolkit.model.PackageType
3636
import org.ossreviewtoolkit.model.Project
37+
import org.ossreviewtoolkit.model.Repository
3738
import org.ossreviewtoolkit.model.ScanSummary
3839
import org.ossreviewtoolkit.model.Scope
3940
import org.ossreviewtoolkit.model.TextLocation
@@ -61,7 +62,7 @@ import org.ossreviewtoolkit.utils.test.readResource
6162

6263
class ScannerIntegrationFunTest : WordSpec({
6364
"Scanning all packages corresponding to a single VCS" should {
64-
val analyzerResult = createAnalyzerResult(pkg0, pkg1, pkg2, pkg3, pkg4)
65+
val analyzerResult = createAnalyzerResult(project0, pkg0, pkg1, pkg2, pkg3, pkg4)
6566
val ortResult = createScanner().scan(analyzerResult, skipExcluded = false, emptyMap())
6667

6768
"return the expected ORT result" {
@@ -91,7 +92,7 @@ class ScannerIntegrationFunTest : WordSpec({
9192

9293
"Scanning a subset of the packages corresponding to a single VCS" should {
9394
"return the expected ORT result" {
94-
val analyzerResult = createAnalyzerResult(pkg1, pkg3)
95+
val analyzerResult = createAnalyzerResult(project0, pkg1, pkg3)
9596
val expectedResult = readResource("/scanner-integration-subset-pkgs-expected-ort-result.yml")
9697

9798
val ortResult = createScanner().scan(analyzerResult, skipExcluded = false, emptyMap())
@@ -127,26 +128,31 @@ internal fun createScanner(scannerWrappers: Map<PackageType, List<ScannerWrapper
127128
)
128129
}
129130

130-
private fun createAnalyzerResult(vararg packages: Package): OrtResult {
131+
private fun createAnalyzerResult(project: Project, vararg packages: Package): OrtResult {
131132
val scope = Scope(
132133
name = "deps",
133134
dependencies = packages.mapTo(mutableSetOf()) { PackageReference(it.id) }
134135
)
135136

136-
val project = Project.EMPTY.copy(
137-
id = createId("project"),
137+
val projectWithScope = project.copy(
138138
scopeDependencies = setOf(scope)
139139
)
140140

141141
val analyzerRun = AnalyzerRun.EMPTY.copy(
142142
result = AnalyzerResult.EMPTY.copy(
143-
projects = setOf(project),
143+
projects = setOf(projectWithScope),
144144
packages = packages.toSet()
145145
),
146146
config = AnalyzerConfiguration(enabledPackageManagers = emptyList())
147147
)
148148

149-
return OrtResult.EMPTY.copy(analyzer = analyzerRun)
149+
return OrtResult.EMPTY.copy(
150+
analyzer = analyzerRun,
151+
repository = Repository.EMPTY.copy(
152+
vcsProcessed = projectWithScope.vcsProcessed,
153+
vcs = projectWithScope.vcs
154+
)
155+
)
150156
}
151157

152158
private fun createId(name: String): Identifier = Identifier("Dummy::$name:1.0.0")
@@ -158,6 +164,24 @@ private fun createPackage(name: String, vcs: VcsInfo): Package =
158164
vcsProcessed = vcs.normalize()
159165
)
160166

167+
private fun createProject(name: String, vcs: VcsInfo): Project =
168+
Project.EMPTY.copy(
169+
id = createId(name),
170+
vcs = vcs,
171+
vcsProcessed = vcs.normalize()
172+
)
173+
174+
// Project package
175+
private val project0 = createProject(
176+
name = "project",
177+
vcs = VcsInfo(
178+
type = VcsType.GIT,
179+
url = "https://github.com/oss-review-toolkit/ort-test-data-scanner.git",
180+
revision = "97d57bb4795bc41f496e1a8e2c7751cefc7da7ec",
181+
path = ""
182+
)
183+
)
184+
161185
// A package with an empty VCS path.
162186
private val pkg0 = createPackage(
163187
name = "pkg0",

scanner/src/funTest/resources/scanner-integration-all-pkgs-expected-ort-result.yml

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
---
22
repository:
33
vcs:
4-
type: ""
5-
url: ""
6-
revision: ""
4+
type: "Git"
5+
url: "https://github.com/oss-review-toolkit/ort-test-data-scanner.git"
6+
revision: "97d57bb4795bc41f496e1a8e2c7751cefc7da7ec"
77
path: ""
88
vcs_processed:
9-
type: ""
10-
url: ""
11-
revision: ""
9+
type: "Git"
10+
url: "https://github.com/oss-review-toolkit/ort-test-data-scanner.git"
11+
revision: "97d57bb4795bc41f496e1a8e2c7751cefc7da7ec"
1212
path: ""
1313
config: {}
1414
analyzer:
@@ -34,14 +34,14 @@ analyzer:
3434
declared_licenses: []
3535
declared_licenses_processed: {}
3636
vcs:
37-
type: ""
38-
url: ""
39-
revision: ""
37+
type: "Git"
38+
url: "https://github.com/oss-review-toolkit/ort-test-data-scanner.git"
39+
revision: "97d57bb4795bc41f496e1a8e2c7751cefc7da7ec"
4040
path: ""
4141
vcs_processed:
42-
type: ""
43-
url: ""
44-
revision: ""
42+
type: "Git"
43+
url: "https://github.com/oss-review-toolkit/ort-test-data-scanner.git"
44+
revision: "97d57bb4795bc41f496e1a8e2c7751cefc7da7ec"
4545
path: ""
4646
homepage_url: ""
4747
scopes:
@@ -289,12 +289,24 @@ scanner:
289289
revision: "6431fd85188db22b942deb66c7a8c1a53921fc35"
290290
path: ""
291291
- id: "Dummy::project:1.0.0"
292-
package_provenance_resolution_issue:
293-
timestamp: "1970-01-01T00:00:00Z"
294-
source: "Scanner"
295-
message: "IOException: Could not resolve provenance for package 'Dummy::project:1.0.0'\
296-
\ for source code origins [VCS, ARTIFACT]."
297-
severity: "ERROR"
292+
package_provenance:
293+
vcs_info:
294+
type: "Git"
295+
url: "https://github.com/oss-review-toolkit/ort-test-data-scanner.git"
296+
revision: "97d57bb4795bc41f496e1a8e2c7751cefc7da7ec"
297+
path: ""
298+
resolved_revision: "97d57bb4795bc41f496e1a8e2c7751cefc7da7ec"
299+
sub_repositories:
300+
pkg3/subrepo:
301+
type: "Git"
302+
url: "https://github.com/oss-review-toolkit/ort-test-data-scanner-subrepo.git"
303+
revision: "a732695e03efcbd74539208af98c297ee86e49d5"
304+
path: ""
305+
pkg4/subrepo:
306+
type: "Git"
307+
url: "https://github.com/oss-review-toolkit/ort-test-data-scanner-subrepo2.git"
308+
revision: "6431fd85188db22b942deb66c7a8c1a53921fc35"
309+
path: ""
298310
scan_results:
299311
- provenance:
300312
vcs_info:

scanner/src/funTest/resources/scanner-integration-expected-file-lists.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,38 @@ Dummy::pkg4:1.0.0:
9090
sha1: "ae8044f7fce7ee914a853c30c3085895e9be8b9c"
9191
- path: "pkg4/subrepo/pkg-s2/pkg-s2.txt"
9292
sha1: "37996d13eceb6b29db43a381ce8df375b5eee8e9"
93+
Dummy::project:1.0.0:
94+
provenance:
95+
vcs_info:
96+
type: "Git"
97+
url: "https://github.com/oss-review-toolkit/ort-test-data-scanner.git"
98+
revision: "97d57bb4795bc41f496e1a8e2c7751cefc7da7ec"
99+
path: ""
100+
resolved_revision: "97d57bb4795bc41f496e1a8e2c7751cefc7da7ec"
101+
files:
102+
- path: ".gitmodules"
103+
sha1: "d7f070ddbe0b6dd8a173714d565a1240dd96eacd"
104+
- path: "LICENSE"
105+
sha1: "7df059597099bb7dcf25d2a9aedfaf4465f72d8d"
106+
- path: "README"
107+
sha1: "82cfc115138054ce5b5e6839f38687c9d7186710"
108+
- path: "pkg1/pkg1.txt"
109+
sha1: "22eb73bd30d47540a4e05781f0f6e07640857cae"
110+
- path: "pkg2/pkg2.txt"
111+
sha1: "cc8f97cebe1dc0ed889a31f504bcf491d5241aaa"
112+
- path: "pkg3/pkg3.txt"
113+
sha1: "859d66be2d153968cdaa8ec7265270c241eea024"
114+
- path: "pkg3/subrepo/LICENSE"
115+
sha1: "7df059597099bb7dcf25d2a9aedfaf4465f72d8d"
116+
- path: "pkg3/subrepo/README"
117+
sha1: "ae8044f7fce7ee914a853c30c3085895e9be8b9c"
118+
- path: "pkg3/subrepo/pkg-s1/pkg-s1.txt"
119+
sha1: "e5fb17f8f4f4ef0748bb5ba137fd0e091dd5a1f6"
120+
- path: "pkg4/pkg4.txt"
121+
sha1: "3cba29011be2b9d59f6204d6fa0a386b1b2dbd90"
122+
- path: "pkg4/subrepo/LICENSE"
123+
sha1: "7df059597099bb7dcf25d2a9aedfaf4465f72d8d"
124+
- path: "pkg4/subrepo/README"
125+
sha1: "ae8044f7fce7ee914a853c30c3085895e9be8b9c"
126+
- path: "pkg4/subrepo/pkg-s2/pkg-s2.txt"
127+
sha1: "37996d13eceb6b29db43a381ce8df375b5eee8e9"

scanner/src/funTest/resources/scanner-integration-expected-scan-results.yml

Lines changed: 75 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -215,17 +215,83 @@ Dummy::pkg4:1.0.0:
215215
start_line: -1
216216
end_line: -1
217217
Dummy::project:1.0.0:
218-
- provenance: {}
218+
- provenance:
219+
vcs_info:
220+
type: "Git"
221+
url: "https://github.com/oss-review-toolkit/ort-test-data-scanner.git"
222+
revision: "97d57bb4795bc41f496e1a8e2c7751cefc7da7ec"
223+
path: ""
224+
resolved_revision: "97d57bb4795bc41f496e1a8e2c7751cefc7da7ec"
219225
scanner:
220-
name: "ProvenanceResolver"
221-
version: ""
226+
name: "Dummy"
227+
version: "1.0.0"
222228
configuration: ""
223229
summary:
224230
start_time: "1970-01-01T00:00:00Z"
225231
end_time: "1970-01-01T00:00:00Z"
226-
issues:
227-
- timestamp: "1970-01-01T00:00:00Z"
228-
source: "Scanner"
229-
message: "IOException: Could not resolve provenance for package 'Dummy::project:1.0.0'\
230-
\ for source code origins [VCS, ARTIFACT]."
231-
severity: "ERROR"
232+
licenses:
233+
- license: "NOASSERTION"
234+
location:
235+
path: ".gitmodules"
236+
start_line: -1
237+
end_line: -1
238+
- license: "NOASSERTION"
239+
location:
240+
path: "LICENSE"
241+
start_line: -1
242+
end_line: -1
243+
- license: "NOASSERTION"
244+
location:
245+
path: "README"
246+
start_line: -1
247+
end_line: -1
248+
- license: "NOASSERTION"
249+
location:
250+
path: "pkg1/pkg1.txt"
251+
start_line: -1
252+
end_line: -1
253+
- license: "NOASSERTION"
254+
location:
255+
path: "pkg2/pkg2.txt"
256+
start_line: -1
257+
end_line: -1
258+
- license: "NOASSERTION"
259+
location:
260+
path: "pkg3/pkg3.txt"
261+
start_line: -1
262+
end_line: -1
263+
- license: "NOASSERTION"
264+
location:
265+
path: "pkg3/subrepo/LICENSE"
266+
start_line: -1
267+
end_line: -1
268+
- license: "NOASSERTION"
269+
location:
270+
path: "pkg3/subrepo/README"
271+
start_line: -1
272+
end_line: -1
273+
- license: "NOASSERTION"
274+
location:
275+
path: "pkg3/subrepo/pkg-s1/pkg-s1.txt"
276+
start_line: -1
277+
end_line: -1
278+
- license: "NOASSERTION"
279+
location:
280+
path: "pkg4/pkg4.txt"
281+
start_line: -1
282+
end_line: -1
283+
- license: "NOASSERTION"
284+
location:
285+
path: "pkg4/subrepo/LICENSE"
286+
start_line: -1
287+
end_line: -1
288+
- license: "NOASSERTION"
289+
location:
290+
path: "pkg4/subrepo/README"
291+
start_line: -1
292+
end_line: -1
293+
- license: "NOASSERTION"
294+
location:
295+
path: "pkg4/subrepo/pkg-s2/pkg-s2.txt"
296+
start_line: -1
297+
end_line: -1

0 commit comments

Comments
 (0)