Skip to content

Commit 610689c

Browse files
AyanSinhaMahapatraVarshaUN
authored andcommitted
Reference: #3954
Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com> Signed-off-by: Jono Yang <jyang@nexb.com> Signed-off-by: Jono Yang <jyang@nexb.com> addded support to parse labels in dockerfile Signed-off-by: Varsha U N <varshamaddur2006@gmail.com>
1 parent bc78721 commit 610689c

16 files changed

+297
-5
lines changed

requirements-linux.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
packagedcode-msitools==0.101.210706
22
regipy==3.1.0
33
rpm-inspector-rpm==4.16.1.3.210404
4-
go-inspector==0.3.1
4+
go-inspector==0.5.0

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jaraco.functools==4.1.0
3535
javaproperties==0.8.1
3636
Jinja2==3.1.3
3737
jsonstreams==0.6.0
38-
license-expression==30.3.0
38+
license-expression==30.4.0
3939
lxml==5.1.0
4040
MarkupSafe==2.1.5
4141
more-itertools==8.13.0

setup-mini.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ packages =
149149
rpm_inspector_rpm >= 4.16.1.3; platform_system == 'Linux'
150150
regipy >= 3.1.0; platform_system == 'Linux'
151151
packagedcode_msitools >= 0.101.210706; platform_system == 'Linux'
152-
go-inspector >= 0.3.1; platform_system == 'Linux'
152+
go-inspector >= 0.5.0; platform_system == 'Linux'
153153

154154

155155
[options.entry_points]

setup.cfg

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ install_requires =
8484
javaproperties >= 0.5
8585
jinja2 >= 2.7.0
8686
jsonstreams >= 0.5.0
87-
license_expression >= 30.1.1
87+
license_expression >= 30.4.0
8888
lxml >= 4.9.2
8989
MarkupSafe >= 2.1.2
9090
packageurl_python >= 0.9.0
@@ -149,7 +149,7 @@ packages =
149149
rpm_inspector_rpm >= 4.16.1.3; platform_system == 'Linux'
150150
regipy >= 3.1.0; platform_system == 'Linux'
151151
packagedcode_msitools >= 0.101.210706; platform_system == 'Linux'
152-
go-inspector >= 0.3.1; platform_system == 'Linux'
152+
go-inspector >= 0.5.0; platform_system == 'Linux'
153153

154154

155155
[options.entry_points]

src/licensedcode/tokenize.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,7 @@ def select_ngrams(ngrams, with_pos=False):
336336
>>> list(select_ngrams(x for x in [(2, 1, 3), (1, 1, 3), (5, 1, 3), (2, 6, 1), (7, 3, 4)]))
337337
[(2, 1, 3), (1, 1, 3), (5, 1, 3), (2, 6, 1), (7, 3, 4)]
338338
"""
339+
ngram = None
339340
last = None
340341
for pos, ngram in enumerate(ngrams):
341342
# FIXME: use a proper hash

src/packagedcode/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from packagedcode import debian
2121
from packagedcode import debian_copyright
2222
from packagedcode import distro
23+
from packagedcode import dockerfile
2324
from packagedcode import conda
2425
from packagedcode import conan
2526
from packagedcode import cocoapods
@@ -97,6 +98,7 @@
9798
debian.DebianSourcePackageTarballHandler,
9899

99100
distro.EtcOsReleaseHandler,
101+
dockerfile.DockerfileHandler,
100102

101103
freebsd.CompactManifestHandler,
102104

src/packagedcode/dockerfile.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# ScanCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/scancode-toolkit for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
11+
12+
import io
13+
from pathlib import Path
14+
from dockerfile_parse import DockerfileParser
15+
from packagedcode import models
16+
from packagedcode import utils
17+
import fnmatch
18+
19+
20+
class DockerfileHandler(models.DatafileHandler):
21+
datasource_id = 'dockerfile_oci_labels'
22+
23+
@classmethod
24+
def is_datafile(cls, path):
25+
patterns = ['Dockerfile', 'containerfile', '*.dockerfile']
26+
filename=os.path.basename(path)
27+
for pattern in patterns:
28+
if fnmatch.fnmatch(filename, pattern):
29+
return True
30+
return False
31+
32+
@classmethod
33+
def parse(cls, location, package_only=False):
34+
"""
35+
Parse a Dockerfile and yield one or more PackageData objects with OCI labels and metadata.
36+
"""
37+
labels = cls.extract_oci_labels_from_dockerfile(location)
38+
package_data = {
39+
'datasource_id': cls.datasource_id,
40+
'type': cls.default_package_type,
41+
'name': labels.get('name', 'None'),
42+
'version': labels.get('version', 'None'),
43+
'license_expression': labels.get('license', 'None'),
44+
'labels': labels,
45+
}
46+
47+
yield models.PackageData.from_data(package_data, package_only)
48+
49+
@classmethod
50+
def extract_oci_labels_from_dockerfile(cls, dockerfile_path):
51+
"""
52+
Extract OCI labels from the Dockerfile using DockerfileParser.
53+
"""
54+
labels = {}
55+
parser = DockerfileParser()
56+
with open(dockerfile_path, 'r') as dockerfile:
57+
parser.content = dockerfile.read()
58+
labels = parser.labels
59+
return labels

src/packagedcode/recognize.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,11 @@ def _parse(
113113

114114
if TRACE:
115115
raise
116+
117+
except Exception as e:
118+
# We should continue when an Exception has occured when trying to
119+
# recognize a package
120+
if TRACE:
121+
logger_debug(f'_parse: Exception: {str(e)}')
122+
123+
continue
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
[
2+
{
3+
"datasource_id": "dockerfile_oci_labels",
4+
"type": "default",
5+
"name": "Unknown",
6+
"version": "Unknown",
7+
"license_expression": "GPL-2.0-only AND BSD-2-Clause",
8+
"labels": {
9+
"source": "https://github.com/kubernetes-sigs/blixt",
10+
"licenses": "GPL-2.0-only,BSD-2-Clause"
11+
}
12+
}
13+
]
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[
2+
{
3+
"datasource_id": "dockerfile_oci_labels",
4+
"type": "default",
5+
"license_expression": "MIT",
6+
"labels": {
7+
"source": "https://github.com/kreneskyp/ix"
8+
}
9+
}
10+
]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
FROM postgres:15.3
2+
LABEL org.opencontainers.image.source https://github.com/kreneskyp/ix
3+
4+
RUN apt update -y && \
5+
apt install -y postgresql-15-pgvector \
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[
2+
{
3+
"datasource_id": "dockerfile_oci_labels",
4+
"type": "default",
5+
"name": "Kanboard",
6+
"version": "1.2.42",
7+
"license_expression": "MIT",
8+
"labels": {
9+
"source": "https://github.com/kanboard/kanboard",
10+
"title": "Kanboard",
11+
"description": "Kanboard is project management software that focuses on the Kanban methodology",
12+
"vendor": "Kanboard",
13+
"licenses": "MIT",
14+
"url": "https://kanboard.org",
15+
"documentation": "https://docs.kanboard.org"
16+
}
17+
}
18+
]
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#Copied from https://github.com/kubernetes-sigs/blixt/blob
2+
3+
4+
FROM rust:1.79-slim-bookworm as builder
5+
6+
ARG TARGETARCH
7+
ARG LLVM_VERSION=19
8+
9+
RUN apt-get update
10+
RUN apt-get install --yes \
11+
build-essential \
12+
protobuf-compiler \
13+
pkg-config \
14+
musl-tools \
15+
clang \
16+
wget
17+
18+
RUN apt install --yes lsb-release software-properties-common gnupg
19+
RUN wget -O /tmp/llvm.sh https://apt.llvm.org/llvm.sh
20+
RUN chmod +x /tmp/llvm.sh
21+
RUN /bin/sh -c "/tmp/llvm.sh ${LLVM_VERSION} all"
22+
23+
RUN rustup default stable
24+
RUN rustup install nightly
25+
RUN rustup component add rust-src --toolchain nightly
26+
RUN --mount=type=cache,target=/root/.cargo/registry \
27+
cargo install bpf-linker
28+
29+
WORKDIR /workspace
30+
# Docker uses the amd64/arm64 convention while Rust uses the x86_64/aarch64 convention.
31+
# Since Dockerfile doesn't support conditional variables (sigh), write the arch in Rust's
32+
# convention to a file for later usage.
33+
RUN if [ "$TARGETARCH" = "amd64" ]; \
34+
then echo "x86_64" >> arch; \
35+
else echo "aarch64" >> arch; \
36+
fi
37+
RUN rustup target add $(eval cat arch)-unknown-linux-musl
38+
39+
COPY dataplane dataplane
40+
COPY tools/udp-test-server tools/udp-test-server
41+
COPY xtask xtask
42+
COPY Cargo.toml Cargo.toml
43+
COPY Cargo.lock Cargo.lock
44+
COPY .cargo .cargo
45+
46+
# We need to tell bpf-linker where it can find LLVM's shared library file.
47+
# Ref: https://github.com/aya-rs/rustc-llvm-proxy/blob/cbcb3c6/src/lib.rs#L48
48+
ENV LD_LIBRARY_PATH="/usr/lib/llvm-$LLVM_VERSION/lib"
49+
ENV CC_aarch64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/clang"
50+
ENV AR_aarch64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/llvm-ar"
51+
ENV CC_x86_64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/clang"
52+
ENV AR_x86_64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/llvm-ar"
53+
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_MUSL_RUSTFLAGS="-Clink-self-contained=yes -Clinker=rust-lld"
54+
55+
RUN --mount=type=cache,target=/workspace/target/ \
56+
--mount=type=cache,target=/root/.cargo/registry \
57+
cargo xtask build-ebpf --release
58+
RUN --mount=type=cache,target=/workspace/target/ \
59+
--mount=type=cache,target=/root/.cargo/registry \
60+
RUSTFLAGS=-Ctarget-feature=+crt-static cargo build \
61+
--workspace \
62+
--exclude ebpf \
63+
--release \
64+
--target=$(eval cat arch)-unknown-linux-musl
65+
RUN --mount=type=cache,target=/workspace/target/ \
66+
cp /workspace/target/$(eval cat arch)-unknown-linux-musl/release/loader /workspace/dataplane-release
67+
68+
FROM alpine
69+
70+
LABEL org.opencontainers.image.source=https://github.com/kubernetes-sigs/blixt
71+
LABEL org.opencontainers.image.licenses=GPL-2.0-only,BSD-2-Clause
72+
73+
WORKDIR /opt/blixt/
74+
75+
COPY --from=builder /workspace/dataplane-release /opt/blixt/dataplane
76+
77+
COPY dataplane/LICENSE.GPL-2.0 /opt/blixt/LICENSE.GPL-2.0
78+
COPY dataplane/LICENSE.BSD-2-Clause /opt/blixt/LICENSE.BSD-2-Clause
79+
80+
ENTRYPOINT ["/opt/blixt/dataplane"]
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#Copied from https://github.com/kanboard/kanboard
2+
3+
FROM alpine:3.21
4+
5+
LABEL org.opencontainers.image.source https://github.com/kanboard/kanboard
6+
LABEL org.opencontainers.image.title=Kanboard
7+
LABEL org.opencontainers.image.description="Kanboard is project management software that focuses on the Kanban methodology"
8+
LABEL org.opencontainers.image.vendor=Kanboard
9+
LABEL org.opencontainers.image.licenses=MIT
10+
LABEL org.opencontainers.image.url=https://kanboard.org
11+
LABEL org.opencontainers.image.documentation=https://docs.kanboard.org
12+
13+
VOLUME /var/www/app/data
14+
VOLUME /var/www/app/plugins
15+
VOLUME /etc/nginx/ssl
16+
17+
EXPOSE 80 443
18+
19+
ARG VERSION
20+
21+
RUN apk --no-cache --update add \
22+
tzdata openssl unzip nginx bash ca-certificates s6 curl ssmtp mailx php83 php83-phar php83-curl \
23+
php83-fpm php83-json php83-zlib php83-xml php83-dom php83-ctype php83-opcache php83-zip php83-iconv \
24+
php83-pdo php83-pdo_mysql php83-pdo_sqlite php83-pdo_pgsql php83-mbstring php83-session php83-bcmath \
25+
php83-gd php83-openssl php83-sockets php83-posix php83-ldap php83-simplexml php83-xmlwriter && \
26+
rm -rf /var/www/localhost && \
27+
rm -f /etc/php83/php-fpm.d/www.conf && \
28+
ln -sf /usr/bin/php83 /usr/bin/php
29+
30+
ADD . /var/www/app
31+
ADD docker/ /
32+
33+
RUN rm -rf /var/www/app/docker && echo $VERSION > /var/www/app/app/version.txt
34+
35+
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
36+
CMD []

tests/packagedcode/test_dockerfile.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# ScanCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/scancode-toolkit for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
from packagedcode import dockerfile
11+
import pytest
12+
import os.path
13+
import json
14+
from pathlib import Path
15+
from packagedcode.dockerfile import DockerfileHandler
16+
17+
class TestDockerfileHandler:
18+
19+
def get_test_loc(self, path):
20+
return Path(os.path.join(os.path.dirname(__file__), 'data'))
21+
22+
def load_expected(self, expected_file):
23+
with open(expected_file) as f:
24+
return json.load(f)
25+
26+
def test_is_datafile(self):
27+
dockerfiles = [
28+
'test.dockerfile',
29+
'test.containerfile',
30+
'psql.dockerfile'
31+
]
32+
for dockerfile in dockerfiles:
33+
test_file = self.get_test_loc(f'data/docker/{dockerfile}')
34+
assert DockerfileHandler.is_datafile(str(test_file))
35+
36+
def test_parse_dockerfile(self):
37+
test_files = [
38+
('test.dockerfile', 'test-dockerfile-expected.json'),
39+
('test.containerfile', 'containerfile-expected.json'),
40+
('psql.dockerfile', 'psql-expected.json')
41+
]
42+
for dockerfile, expected in test_files:
43+
test_file = self.get_test_loc(f'data/docker/{dockerfile}')
44+
expected_loc = self.get_test_loc(f'data/docker/{expected}')
45+
packages = list(DockerfileHandler.parse(str(test_file)))
46+
expected_packages = self.load_expected(expected_loc)
47+
assert packages == expected_packages
48+
49+
def test_extract_oci_labels_from_dockerfile(self, mocker):
50+
dockerfiles = [
51+
'test.dockerfile',
52+
'test.containerfile',
53+
'psql.dockerfile'
54+
]
55+
for dockerfile in dockerfiles:
56+
dockerfile_path = self.get_test_loc(f'data/docker/{dockerfile}')
57+
labels = DockerfileHandler.extract_oci_labels_from_dockerfile(str(dockerfile_path))
58+
expected_loc = self.get_test_loc(f'data/docker/{dockerfile.replace(".dockerfile", "-expected.json").replace(".containerfile", "-expected.json")}')
59+
expected_labels = self.load_expected(expected_loc)[0]['labels']
60+
assert labels == expected_labels

0 commit comments

Comments
 (0)