@@ -13,6 +13,7 @@ package ingester
13
13
import (
14
14
"context"
15
15
"database/sql"
16
+ "sync"
16
17
"time"
17
18
18
19
"github.com/google/uuid"
@@ -33,6 +34,12 @@ type PackageSqlIngesterParams struct {
33
34
34
35
type packageSqlIngester struct {
35
36
deps PackageSqlIngesterParams
37
+
38
+ maintainerCacheMutex sync.Mutex
39
+ maintainerIDCache map [string ]uuid.UUID
40
+
41
+ releaseDependencyCacheMutex sync.Mutex
42
+ releaseDependencyIDCache map [string ]uuid.UUID
36
43
}
37
44
38
45
type PackageSqlIngester interface {
@@ -104,13 +111,24 @@ func (s *packageSqlIngester) mapReleases(ctx context.Context, packageId uuid.UUI
104
111
return releaseIds , nil
105
112
}
106
113
114
+ func releaseDependencyCacheKey (dep metadata.Dependency ) string {
115
+ return dep .Name + dep .Version
116
+ }
117
+
107
118
func (s * packageSqlIngester ) mapReleaseDependencies (
108
119
ctx context.Context ,
109
120
releaseId uuid.UUID ,
110
121
ds []metadata.Dependency ,
111
122
) ([]uuid.UUID , error ) {
112
123
var releaseDependencyIds []uuid.UUID
113
124
for _ , dep := range ds {
125
+ // there are a lot of maintainer updates for a given package, so we try to cache them
126
+ cacheKey := releaseDependencyCacheKey (dep )
127
+ if cachedReleaseDependencyID , ok := s .maintainerIDCache [cacheKey ]; ok {
128
+ releaseDependencyIds = append (releaseDependencyIds , cachedReleaseDependencyID )
129
+ continue
130
+ }
131
+
114
132
dependencyPackageId , err := upsertReleaseDependencyPackage (ctx , s .deps .DB , model.Package {
115
133
Name : dep .Name ,
116
134
PackageManager : mapper .NpmV ,
@@ -140,14 +158,29 @@ func (s *packageSqlIngester) mapReleaseDependencies(
140
158
return releaseDependencyIds , nil
141
159
}
142
160
161
+ func packageMaintainerCacheKey (packageId uuid.UUID , pm metadata.Maintainer ) string {
162
+ return packageId .String () + pm .Email + pm .Name
163
+ }
164
+
143
165
func (s * packageSqlIngester ) mapMaintainers (ctx context.Context , packageId uuid.UUID , p []metadata.Maintainer ) ([]uuid.UUID , error ) {
144
166
var maintainerIds []uuid.UUID
145
167
for _ , pm := range p {
168
+ // there are a lot of maintainer updates for a given package, so we try to cache them
169
+ cacheKey := packageMaintainerCacheKey (packageId , pm )
170
+ if cachedMaintainerID , ok := s .maintainerIDCache [cacheKey ]; ok {
171
+ maintainerIds = append (maintainerIds , cachedMaintainerID )
172
+ continue
173
+ }
174
+
146
175
insertedId , err := s .mapMaintainer (ctx , pm )
147
176
if err != nil {
148
177
return maintainerIds , err
149
178
}
150
179
180
+ s .maintainerCacheMutex .Lock ()
181
+ s .maintainerIDCache [cacheKey ] = insertedId
182
+ s .maintainerCacheMutex .Unlock ()
183
+
151
184
err = upsertPackageMaintainer (ctx , s .deps .DB , model.PackageMaintainer {
152
185
PackageID : packageId ,
153
186
MaintainerID : insertedId ,
@@ -185,6 +218,7 @@ func (s *packageSqlIngester) Ingest(ctx context.Context, pkg *metadata.PackageMe
185
218
186
219
func NewPackageSqlIngester (deps PackageSqlIngesterParams ) PackageSqlIngester {
187
220
return & packageSqlIngester {
188
- deps : deps ,
221
+ deps : deps ,
222
+ maintainerIDCache : map [string ]uuid.UUID {},
189
223
}
190
224
}
0 commit comments