Skip to content

Commit 74948f9

Browse files
committed
improve performance of package ingester
1 parent f1f1574 commit 74948f9

File tree

2 files changed

+39
-2
lines changed

2 files changed

+39
-2
lines changed

lunatrace/bsl/ingest-worker/pkg/metadata/ingester/sql.go

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ package ingester
1313
import (
1414
"context"
1515
"database/sql"
16+
"sync"
1617
"time"
1718

1819
"github.com/google/uuid"
@@ -33,6 +34,12 @@ type PackageSqlIngesterParams struct {
3334

3435
type packageSqlIngester struct {
3536
deps PackageSqlIngesterParams
37+
38+
maintainerCacheMutex sync.Mutex
39+
maintainerIDCache map[string]uuid.UUID
40+
41+
releaseDependencyCacheMutex sync.Mutex
42+
releaseDependencyIDCache map[string]uuid.UUID
3643
}
3744

3845
type PackageSqlIngester interface {
@@ -104,13 +111,24 @@ func (s *packageSqlIngester) mapReleases(ctx context.Context, packageId uuid.UUI
104111
return releaseIds, nil
105112
}
106113

114+
func releaseDependencyCacheKey(dep metadata.Dependency) string {
115+
return dep.Name + dep.Version
116+
}
117+
107118
func (s *packageSqlIngester) mapReleaseDependencies(
108119
ctx context.Context,
109120
releaseId uuid.UUID,
110121
ds []metadata.Dependency,
111122
) ([]uuid.UUID, error) {
112123
var releaseDependencyIds []uuid.UUID
113124
for _, dep := range ds {
125+
// there are a lot of maintainer updates for a given package, so we try to cache them
126+
cacheKey := releaseDependencyCacheKey(dep)
127+
if cachedReleaseDependencyID, ok := s.maintainerIDCache[cacheKey]; ok {
128+
releaseDependencyIds = append(releaseDependencyIds, cachedReleaseDependencyID)
129+
continue
130+
}
131+
114132
dependencyPackageId, err := upsertReleaseDependencyPackage(ctx, s.deps.DB, model.Package{
115133
Name: dep.Name,
116134
PackageManager: mapper.NpmV,
@@ -140,14 +158,29 @@ func (s *packageSqlIngester) mapReleaseDependencies(
140158
return releaseDependencyIds, nil
141159
}
142160

161+
func packageMaintainerCacheKey(packageId uuid.UUID, pm metadata.Maintainer) string {
162+
return packageId.String() + pm.Email + pm.Name
163+
}
164+
143165
func (s *packageSqlIngester) mapMaintainers(ctx context.Context, packageId uuid.UUID, p []metadata.Maintainer) ([]uuid.UUID, error) {
144166
var maintainerIds []uuid.UUID
145167
for _, pm := range p {
168+
// there are a lot of maintainer updates for a given package, so we try to cache them
169+
cacheKey := packageMaintainerCacheKey(packageId, pm)
170+
if cachedMaintainerID, ok := s.maintainerIDCache[cacheKey]; ok {
171+
maintainerIds = append(maintainerIds, cachedMaintainerID)
172+
continue
173+
}
174+
146175
insertedId, err := s.mapMaintainer(ctx, pm)
147176
if err != nil {
148177
return maintainerIds, err
149178
}
150179

180+
s.maintainerCacheMutex.Lock()
181+
s.maintainerIDCache[cacheKey] = insertedId
182+
s.maintainerCacheMutex.Unlock()
183+
151184
err = upsertPackageMaintainer(ctx, s.deps.DB, model.PackageMaintainer{
152185
PackageID: packageId,
153186
MaintainerID: insertedId,
@@ -185,6 +218,7 @@ func (s *packageSqlIngester) Ingest(ctx context.Context, pkg *metadata.PackageMe
185218

186219
func NewPackageSqlIngester(deps PackageSqlIngesterParams) PackageSqlIngester {
187220
return &packageSqlIngester{
188-
deps: deps,
221+
deps: deps,
222+
maintainerIDCache: map[string]uuid.UUID{},
189223
}
190224
}

lunatrace/bsl/ingest-worker/pkg/metadata/ingester/sqlstms.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ func upsertRelease(ctx context.Context, db *sql.DB, r model.Release) (id uuid.UU
128128

129129
func upsertReleaseDependencyPackage(ctx context.Context, db *sql.DB, p model.Package) (id uuid.UUID, err error) {
130130
selectReleaseDependencyPackage := Package.SELECT(
131+
Package.ID,
131132
Package.Name,
132133
Package.PackageManager,
133134
Package.CustomRegistry,
@@ -182,7 +183,8 @@ func upsertReleaseDependency(ctx context.Context, db *sql.DB, r model.ReleaseDep
182183
if err == nil {
183184
// If the release dependency already exists, we don't need to do anything
184185
if releaseDependency.ReleaseID == r.ReleaseID &&
185-
releaseDependency.IsDev == r.IsDev {
186+
releaseDependency.IsDev == r.IsDev &&
187+
releaseDependency.DependencyPackageID == r.DependencyPackageID {
186188
return releaseDependency.ID, nil
187189
}
188190
}
@@ -200,6 +202,7 @@ func upsertReleaseDependency(ctx context.Context, db *sql.DB, r model.ReleaseDep
200202
postgres.SET(
201203
ReleaseDependency.ReleaseID.SET(ReleaseDependency.EXCLUDED.ReleaseID),
202204
ReleaseDependency.IsDev.SET(ReleaseDependency.EXCLUDED.IsDev),
205+
ReleaseDependency.DependencyPackageID.SET(ReleaseDependency.EXCLUDED.DependencyPackageID),
203206
),
204207
).
205208
RETURNING(ReleaseDependency.ID)

0 commit comments

Comments
 (0)