Skip to content

Commit 1afe79e

Browse files
authored
feat: add metrics for grpc api of the cache task (#3539)
Signed-off-by: Gaius <gaius.qi@gmail.com>
1 parent 61c3cf4 commit 1afe79e

File tree

5 files changed

+238
-25
lines changed

5 files changed

+238
-25
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ module d7y.io/dragonfly/v2
33
go 1.21
44

55
require (
6-
d7y.io/api/v2 v2.0.158
6+
d7y.io/api/v2 v2.0.159
77
github.com/MysteriousPotato/go-lockable v1.0.0
88
github.com/RichardKnop/machinery v1.10.8
99
github.com/Showmax/go-fqdn v1.0.0

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0Zeo
5353
cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=
5454
cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs=
5555
cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0=
56-
d7y.io/api/v2 v2.0.158 h1:uX5Lg8AWX2Pc69bBka3AErH4vtHk6xbntTj4jhCABgo=
57-
d7y.io/api/v2 v2.0.158/go.mod h1:VOnTWgLrGtivgyyofZCfiSDTAKDJ9ohVqM6l3S8EPCE=
56+
d7y.io/api/v2 v2.0.159 h1:xSLq0GjqV0F8TgfZ13EDJa+eqaWcqhrEepybAoT9OnI=
57+
d7y.io/api/v2 v2.0.159/go.mod h1:VOnTWgLrGtivgyyofZCfiSDTAKDJ9ohVqM6l3S8EPCE=
5858
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
5959
github.com/Azure/azure-sdk-for-go v16.2.1+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
6060
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.0.0/go.mod h1:uGG2W01BaETf0Ozp+QxxKJdMBNRWPdstHG0Fmdwn1/U=

scheduler/metrics/metrics.go

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,118 @@ var (
289289
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001},
290290
})
291291

292+
AnnounceCachePeerCount = promauto.NewCounter(prometheus.CounterOpts{
293+
Namespace: types.MetricsNamespace,
294+
Subsystem: types.SchedulerMetricsName,
295+
Name: "announce_cache_peer_total",
296+
Help: "Counter of the number of the announcing cache peer.",
297+
})
298+
299+
AnnounceCachePeerFailureCount = promauto.NewCounter(prometheus.CounterOpts{
300+
Namespace: types.MetricsNamespace,
301+
Subsystem: types.SchedulerMetricsName,
302+
Name: "announce_cache_peer_failure_total",
303+
Help: "Counter of the number of failed of the announcing cache peer.",
304+
})
305+
306+
StatCachePeerCount = promauto.NewCounter(prometheus.CounterOpts{
307+
Namespace: types.MetricsNamespace,
308+
Subsystem: types.SchedulerMetricsName,
309+
Name: "stat_cache_peer_total",
310+
Help: "Counter of the number of the stat cache peer.",
311+
})
312+
313+
StatCachePeerFailureCount = promauto.NewCounter(prometheus.CounterOpts{
314+
Namespace: types.MetricsNamespace,
315+
Subsystem: types.SchedulerMetricsName,
316+
Name: "stat_cache_peer_failure_total",
317+
Help: "Counter of the number of failed of the stat cache peer.",
318+
})
319+
320+
DeleteCachePeerCount = promauto.NewCounter(prometheus.CounterOpts{
321+
Namespace: types.MetricsNamespace,
322+
Subsystem: types.SchedulerMetricsName,
323+
Name: "delete_cache_peer_total",
324+
Help: "Counter of the number of the deleting cache peer.",
325+
})
326+
327+
DeleteCachePeerFailureCount = promauto.NewCounter(prometheus.CounterOpts{
328+
Namespace: types.MetricsNamespace,
329+
Subsystem: types.SchedulerMetricsName,
330+
Name: "delete_cache_peer_failure_total",
331+
Help: "Counter of the number of failed of the deleting cache peer.",
332+
})
333+
334+
UploadCacheTaskStartedCount = promauto.NewCounter(prometheus.CounterOpts{
335+
Namespace: types.MetricsNamespace,
336+
Subsystem: types.SchedulerMetricsName,
337+
Name: "upload_cache_peer_started_total",
338+
Help: "Counter of the number of the started uploading cache peer.",
339+
})
340+
341+
UploadCacheTaskStartedFailureCount = promauto.NewCounter(prometheus.CounterOpts{
342+
Namespace: types.MetricsNamespace,
343+
Subsystem: types.SchedulerMetricsName,
344+
Name: "upload_cache_peer_started_failure_total",
345+
Help: "Counter of the number of failed of the started uploading cache peer.",
346+
})
347+
348+
UploadCacheTaskFinishedCount = promauto.NewCounter(prometheus.CounterOpts{
349+
Namespace: types.MetricsNamespace,
350+
Subsystem: types.SchedulerMetricsName,
351+
Name: "upload_cache_peer_finished_total",
352+
Help: "Counter of the number of the finished uploading cache peer.",
353+
})
354+
355+
UploadCacheTaskFinishedFailureCount = promauto.NewCounter(prometheus.CounterOpts{
356+
Namespace: types.MetricsNamespace,
357+
Subsystem: types.SchedulerMetricsName,
358+
Name: "upload_cache_peer_finished_failure_total",
359+
Help: "Counter of the number of failed of the finished uploading cache peer.",
360+
})
361+
362+
UploadCacheTaskFailedCount = promauto.NewCounter(prometheus.CounterOpts{
363+
Namespace: types.MetricsNamespace,
364+
Subsystem: types.SchedulerMetricsName,
365+
Name: "upload_cache_peer_failed_total",
366+
Help: "Counter of the number of the failed uploading cache peer.",
367+
})
368+
369+
UploadCacheTaskFailedFailureCount = promauto.NewCounter(prometheus.CounterOpts{
370+
Namespace: types.MetricsNamespace,
371+
Subsystem: types.SchedulerMetricsName,
372+
Name: "upload_cache_peer_failed_failure_total",
373+
Help: "Counter of the number of failed of the failed uploading cache peer.",
374+
})
375+
376+
StatCacheTaskCount = promauto.NewCounter(prometheus.CounterOpts{
377+
Namespace: types.MetricsNamespace,
378+
Subsystem: types.SchedulerMetricsName,
379+
Name: "stat_cache_task_total",
380+
Help: "Counter of the number of the stat cache task.",
381+
})
382+
383+
StatCacheTaskFailureCount = promauto.NewCounter(prometheus.CounterOpts{
384+
Namespace: types.MetricsNamespace,
385+
Subsystem: types.SchedulerMetricsName,
386+
Name: "stat_cache_task_failure_total",
387+
Help: "Counter of the number of failed of the stat cache task.",
388+
})
389+
390+
DeleteCacheTaskCount = promauto.NewCounter(prometheus.CounterOpts{
391+
Namespace: types.MetricsNamespace,
392+
Subsystem: types.SchedulerMetricsName,
393+
Name: "delete_cache_task_total",
394+
Help: "Counter of the number of the delete cache task.",
395+
})
396+
397+
DeleteCacheTaskFailureCount = promauto.NewCounter(prometheus.CounterOpts{
398+
Namespace: types.MetricsNamespace,
399+
Subsystem: types.SchedulerMetricsName,
400+
Name: "delete_cache_task_failure_total",
401+
Help: "Counter of the number of failed of the delete cache task.",
402+
})
403+
292404
VersionGauge = promauto.NewGaugeVec(prometheus.GaugeOpts{
293405
Namespace: types.MetricsNamespace,
294406
Subsystem: types.SchedulerMetricsName,

scheduler/rpcserver/scheduler_server_v2.go

Lines changed: 75 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -170,56 +170,109 @@ func (s *schedulerServerV2) SyncProbes(stream schedulerv2.Scheduler_SyncProbesSe
170170
return nil
171171
}
172172

173-
// TODO Implement the following methods.
174-
// AnnouncePeers announces peers to scheduler.
175-
func (s *schedulerServerV2) AnnouncePeers(stream schedulerv2.Scheduler_AnnouncePeersServer) error {
176-
return nil
177-
}
178-
179-
// TODO Implement the following methods.
180173
// AnnounceCachePeer announces cache peer to scheduler.
181174
func (s *schedulerServerV2) AnnounceCachePeer(stream schedulerv2.Scheduler_AnnounceCachePeerServer) error {
175+
// Collect AnnounceCachePeerCount metrics.
176+
metrics.AnnounceCachePeerCount.Inc()
177+
if err := s.service.AnnounceCachePeer(stream); err != nil {
178+
// Collect AnnounceCachePeerFailureCount metrics.
179+
metrics.AnnounceCachePeerFailureCount.Inc()
180+
return err
181+
}
182+
182183
return nil
183184
}
184185

185-
// TODO Implement the following methods.
186186
// StatCachePeer checks information of cache peer.
187187
func (s *schedulerServerV2) StatCachePeer(ctx context.Context, req *schedulerv2.StatCachePeerRequest) (*commonv2.CachePeer, error) {
188-
return nil, nil
188+
// Collect StatCachePeerCount metrics.
189+
metrics.StatCachePeerCount.Inc()
190+
resp, err := s.service.StatCachePeer(ctx, req)
191+
if err != nil {
192+
// Collect StatCachePeerFailureCount metrics.
193+
metrics.StatCachePeerFailureCount.Inc()
194+
return nil, err
195+
}
196+
197+
return resp, nil
189198
}
190199

191-
// TODO Implement the following methods.
192200
// DeleteCachePeer releases cache peer in scheduler.
193201
func (s *schedulerServerV2) DeleteCachePeer(ctx context.Context, req *schedulerv2.DeleteCachePeerRequest) (*emptypb.Empty, error) {
202+
// Collect DeleteCachePeerCount metrics.
203+
metrics.DeleteCachePeerCount.Inc()
204+
if err := s.service.DeleteCachePeer(ctx, req); err != nil {
205+
// Collect DeleteCachePeerFailureCount metrics.
206+
metrics.DeleteCachePeerFailureCount.Inc()
207+
return nil, err
208+
}
209+
194210
return new(emptypb.Empty), nil
195211
}
196212

197-
// TODO Implement the following methods.
198213
// UploadCacheTaskStarted uploads the metadata of the cache task started.
199-
func (s *schedulerServerV2) UploadCacheTaskStarted(ctx context.Context, request *schedulerv2.UploadCacheTaskStartedRequest) (*emptypb.Empty, error) {
200-
return nil, nil
214+
func (s *schedulerServerV2) UploadCacheTaskStarted(ctx context.Context, req *schedulerv2.UploadCacheTaskStartedRequest) (*emptypb.Empty, error) {
215+
// Collect UploadCacheTaskStartedCount metrics.
216+
metrics.UploadCacheTaskStartedCount.Inc()
217+
if err := s.service.UploadCacheTaskStarted(ctx, req); err != nil {
218+
// Collect UploadCacheTaskStartedFailureCount metrics.
219+
metrics.UploadCacheTaskStartedFailureCount.Inc()
220+
return nil, err
221+
}
222+
223+
return new(emptypb.Empty), nil
201224
}
202225

203-
// TODO Implement the following methods.
204226
// UploadCacheTaskFinished uploads the metadata of the cache task finished.
205-
func (s *schedulerServerV2) UploadCacheTaskFinished(ctx context.Context, request *schedulerv2.UploadCacheTaskFinishedRequest) (*commonv2.CacheTask, error) {
206-
return nil, nil
227+
func (s *schedulerServerV2) UploadCacheTaskFinished(ctx context.Context, req *schedulerv2.UploadCacheTaskFinishedRequest) (*commonv2.CacheTask, error) {
228+
// Collect UploadCacheTaskFinishedCount metrics.
229+
metrics.UploadCacheTaskFinishedCount.Inc()
230+
resp, err := s.service.UploadCacheTaskFinished(ctx, req)
231+
if err != nil {
232+
// Collect UploadCacheTaskFinishedFailureCount metrics.
233+
metrics.UploadCacheTaskFinishedFailureCount.Inc()
234+
return nil, err
235+
}
236+
237+
return resp, nil
207238
}
208239

209-
// TODO Implement the following methods.
210240
// UploadCacheTaskFailed uploads the metadata of the cache task failed.
211-
func (s *schedulerServerV2) UploadCacheTaskFailed(ctx context.Context, request *schedulerv2.UploadCacheTaskFailedRequest) (*emptypb.Empty, error) {
212-
return nil, nil
241+
func (s *schedulerServerV2) UploadCacheTaskFailed(ctx context.Context, req *schedulerv2.UploadCacheTaskFailedRequest) (*emptypb.Empty, error) {
242+
// Collect UploadCacheTaskFailedCount metrics.
243+
metrics.UploadCacheTaskFailedCount.Inc()
244+
if err := s.service.UploadCacheTaskFailed(ctx, req); err != nil {
245+
// Collect UploadCacheTaskFailedFailureCount metrics.
246+
metrics.UploadCacheTaskFailedFailureCount.Inc()
247+
return nil, err
248+
}
249+
250+
return new(emptypb.Empty), nil
213251
}
214252

215-
// TODO Implement the following methods.
216253
// StatCacheTask checks information of cache task.
217254
func (s *schedulerServerV2) StatCacheTask(ctx context.Context, req *schedulerv2.StatCacheTaskRequest) (*commonv2.CacheTask, error) {
218-
return nil, nil
255+
// Collect StatCacheTaskCount metrics.
256+
metrics.StatCacheTaskCount.Inc()
257+
resp, err := s.service.StatCacheTask(ctx, req)
258+
if err != nil {
259+
// Collect StatCacheTaskFailureCount metrics.
260+
metrics.StatCacheTaskFailureCount.Inc()
261+
return nil, err
262+
}
263+
264+
return resp, nil
219265
}
220266

221-
// TODO Implement the following methods.
222267
// DeleteCacheTask releases cache task in scheduler.
223268
func (s *schedulerServerV2) DeleteCacheTask(ctx context.Context, req *schedulerv2.DeleteCacheTaskRequest) (*emptypb.Empty, error) {
269+
// Collect DeleteCacheTaskCount metrics.
270+
metrics.DeleteCacheTaskCount.Inc()
271+
if err := s.service.DeleteCacheTask(ctx, req); err != nil {
272+
// Collect DeleteCacheTaskFailureCount metrics.
273+
metrics.DeleteCacheTaskFailureCount.Inc()
274+
return nil, err
275+
}
276+
224277
return new(emptypb.Empty), nil
225278
}

scheduler/service/service_v2.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1451,3 +1451,51 @@ func (v *V2) downloadTaskBySeedPeer(ctx context.Context, taskID string, download
14511451

14521452
return nil
14531453
}
1454+
1455+
// TODO Implement the following methods.
1456+
// AnnounceCachePeer announces cache peer to scheduler.
1457+
func (v *V2) AnnounceCachePeer(stream schedulerv2.Scheduler_AnnounceCachePeerServer) error {
1458+
return nil
1459+
}
1460+
1461+
// TODO Implement the following methods.
1462+
// StatCachePeer checks information of cache peer.
1463+
func (v *V2) StatCachePeer(ctx context.Context, req *schedulerv2.StatCachePeerRequest) (*commonv2.CachePeer, error) {
1464+
return nil, nil
1465+
}
1466+
1467+
// TODO Implement the following methods.
1468+
// DeleteCachePeer releases cache peer in scheduler.
1469+
func (v *V2) DeleteCachePeer(ctx context.Context, req *schedulerv2.DeleteCachePeerRequest) error {
1470+
return nil
1471+
}
1472+
1473+
// TODO Implement the following methods.
1474+
// UploadCacheTaskStarted uploads the metadata of the cache task started.
1475+
func (v *V2) UploadCacheTaskStarted(ctx context.Context, req *schedulerv2.UploadCacheTaskStartedRequest) error {
1476+
return nil
1477+
}
1478+
1479+
// TODO Implement the following methods.
1480+
// UploadCacheTaskFinished uploads the metadata of the cache task finished.
1481+
func (v *V2) UploadCacheTaskFinished(ctx context.Context, req *schedulerv2.UploadCacheTaskFinishedRequest) (*commonv2.CacheTask, error) {
1482+
return nil, nil
1483+
}
1484+
1485+
// TODO Implement the following methods.
1486+
// UploadCacheTaskFailed uploads the metadata of the cache task failed.
1487+
func (v *V2) UploadCacheTaskFailed(ctx context.Context, req *schedulerv2.UploadCacheTaskFailedRequest) error {
1488+
return nil
1489+
}
1490+
1491+
// TODO Implement the following methods.
1492+
// StatCacheTask checks information of cache task.
1493+
func (v *V2) StatCacheTask(ctx context.Context, req *schedulerv2.StatCacheTaskRequest) (*commonv2.CacheTask, error) {
1494+
return nil, nil
1495+
}
1496+
1497+
// TODO Implement the following methods.
1498+
// DeleteCacheTask releases cache task in scheduler.
1499+
func (v *V2) DeleteCacheTask(ctx context.Context, req *schedulerv2.DeleteCacheTaskRequest) error {
1500+
return nil
1501+
}

0 commit comments

Comments
 (0)