Skip to content

Commit ed1df5f

Browse files
committed
feat(scanner): Merge duplicate scan results that share a provenance
When the SpdxDocumentFile package manager is used, the *project* and all contained *packages* often resolve to the **same VCS provenance** (e.g. the root of the Git repository). Before this change ORT stored two separate `ScanResult`s for such a provenance – one keyed to the project, one keyed to the package. That caused two follow-on problems: * Both results appeared in the `OrtResult`, so evaluators saw **duplicate findings** for the *same* source tree. * Because projects and packages are handled by different rules the package result was additionally **padded with a `SpdxConstants.NONE` finding** whenever `includeFilesWithoutFindings` was enabled. The evaluator therefore compared *real* license findings from the project result with `NONE` from the package result and failed with a violation. This patch * groups scan results by the pair `(provenance, scanner)` and folds them into a single `ScanResult`, * unions the inner finding sets to avoid duplicates, and * performs the "pad with NONE" step only **after** deduplication, so every path is represented exactly once. As a consequence the evaluator now receives one consistent set of license findings per provenance / scanner, eliminating the false mismatch. Signed-off-by: Jonatan Männchen <jonatan@maennchen.ch>
1 parent 2a5d430 commit ed1df5f

File tree

1 file changed

+60
-26
lines changed

1 file changed

+60
-26
lines changed

scanner/src/main/kotlin/Scanner.kt

Lines changed: 60 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -141,15 +141,72 @@ class Scanner(
141141
}
142142
}
143143

144+
val filesByProvenance = (projectResults.files + packageResults.files)
145+
.groupBy { it.provenance }
146+
.values
147+
.map { fileLists ->
148+
fileLists.reduce { acc, next ->
149+
acc.copy(
150+
files = acc.files + next.files
151+
)
152+
}
153+
}
154+
.toSet()
155+
156+
val deduplicatedScanResults = (projectResults.scanResults + packageResults.scanResults)
157+
.groupBy { it.provenance to it.scanner }
158+
.values
159+
.map { scanResults ->
160+
scanResults.reduce { acc, next ->
161+
acc.copy(
162+
summary = acc.summary.copy(
163+
startTime = minOf(acc.summary.startTime, next.summary.startTime),
164+
endTime = maxOf(acc.summary.endTime, next.summary.endTime),
165+
licenseFindings = acc.summary.licenseFindings.union(next.summary.licenseFindings),
166+
copyrightFindings = acc.summary.copyrightFindings.union(next.summary.copyrightFindings),
167+
snippetFindings = acc.summary.snippetFindings.union(next.summary.snippetFindings),
168+
issues = acc.summary.issues + next.summary.issues
169+
),
170+
additionalData = acc.additionalData + next.additionalData
171+
)
172+
}
173+
}
174+
.toSet()
175+
176+
val pathsByProvenance = filesByProvenance.associate {
177+
it.provenance to it.files.map { file -> file.path }.toSet()
178+
}
179+
180+
val checkedScanResults = if (scannerConfig.includeFilesWithoutFindings) {
181+
deduplicatedScanResults.mapTo(mutableSetOf()) { scanResult ->
182+
val allPaths = pathsByProvenance[scanResult.provenance].orEmpty()
183+
184+
val pathsWithFindings = scanResult.summary.licenseFindings.mapTo(mutableSetOf()) { it.location.path }
185+
val pathsWithoutFindings = allPaths - pathsWithFindings
186+
187+
val findingsThatAreNone = pathsWithoutFindings.mapTo(mutableSetOf()) {
188+
LicenseFinding(SpdxConstants.NONE, TextLocation(it, TextLocation.UNKNOWN_LINE))
189+
}
190+
191+
scanResult.copy(
192+
summary = scanResult.summary.copy(
193+
licenseFindings = scanResult.summary.licenseFindings + findingsThatAreNone
194+
)
195+
)
196+
}
197+
} else {
198+
deduplicatedScanResults
199+
}
200+
144201
val scannerRun = ScannerRun(
145202
startTime = startTime,
146203
endTime = endTime,
147204
environment = Environment(toolVersions = toolVersions),
148205
config = scannerConfig,
149206
provenances = projectResults.provenances + packageResults.provenances,
150-
scanResults = projectResults.scanResults + packageResults.scanResults,
207+
scanResults = checkedScanResults,
151208
issues = projectResults.issues + packageResults.issues,
152-
files = projectResults.files + packageResults.files,
209+
files = filesByProvenance,
153210
scanners = projectResults.scanners + packageResults.scanners
154211
)
155212

@@ -193,7 +250,7 @@ class Scanner(
193250

194251
val vcsPathsForProvenances = getVcsPathsForProvenances(provenances)
195252

196-
val filteredScanResults = filterScanResultsByVcsPaths(controller.getAllScanResults(), vcsPathsForProvenances)
253+
val scanResults = filterScanResultsByVcsPaths(controller.getAllScanResults(), vcsPathsForProvenances)
197254

198255
val files = controller.getAllFileLists().mapTo(mutableSetOf()) { (provenance, fileList) ->
199256
FileList(
@@ -208,29 +265,6 @@ class Scanner(
208265
}
209266
}
210267

211-
val scanResults = if (scannerConfig.includeFilesWithoutFindings) {
212-
filteredScanResults.mapTo(mutableSetOf()) { scanResult ->
213-
val allPaths = controller.getAllFileLists()[scanResult.provenance]?.files?.mapTo(mutableSetOf()) {
214-
it.path
215-
}.orEmpty()
216-
217-
val pathsWithFindings = scanResult.summary.licenseFindings.mapTo(mutableSetOf()) { it.location.path }
218-
val pathsWithoutFindings = allPaths - pathsWithFindings
219-
220-
val findingsThatAreNone = pathsWithoutFindings.mapTo(mutableSetOf()) {
221-
LicenseFinding(SpdxConstants.NONE, TextLocation(it, TextLocation.UNKNOWN_LINE))
222-
}
223-
224-
scanResult.copy(
225-
summary = scanResult.summary.copy(
226-
licenseFindings = scanResult.summary.licenseFindings + findingsThatAreNone
227-
)
228-
)
229-
}
230-
} else {
231-
filteredScanResults
232-
}
233-
234268
val scannerIds = scannerWrappers.mapTo(mutableSetOf()) { it.descriptor.id }
235269
val scanners = packages.associateBy({ it.id }) { scannerIds }
236270

0 commit comments

Comments
 (0)