@@ -23,25 +23,19 @@ import (
23
23
"errors"
24
24
"fmt"
25
25
"io"
26
- "math"
27
26
"strings"
28
- "sync"
29
27
"time"
30
28
31
29
"github.com/RichardKnop/machinery/v1"
32
30
"github.com/go-playground/validator/v10"
33
- grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware"
34
- grpc_zap "github.com/grpc-ecosystem/go-grpc-middleware/logging/zap"
35
- grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
36
- grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
37
- "google.golang.org/grpc"
38
31
"google.golang.org/grpc/codes"
39
32
"google.golang.org/grpc/status"
40
33
41
34
cdnsystemv1 "d7y.io/api/v2/pkg/apis/cdnsystem/v1"
42
35
commonv1 "d7y.io/api/v2/pkg/apis/common/v1"
43
36
commonv2 "d7y.io/api/v2/pkg/apis/common/v2"
44
37
dfdaemonv2 "d7y.io/api/v2/pkg/apis/dfdaemon/v2"
38
+ dfdaemonclient "d7y.io/dragonfly/v2/pkg/rpc/dfdaemon/client"
45
39
46
40
logger "d7y.io/dragonfly/v2/internal/dflog"
47
41
internaljob "d7y.io/dragonfly/v2/internal/job"
@@ -55,12 +49,6 @@ const (
55
49
preheatTimeout = 20 * time .Minute
56
50
// deleteTaskTimeout is timeout of deleting task.
57
51
deleteTaskTimeout = 20 * time .Minute
58
- // deleteTaskConcurrency is the number of concurrent delete tasks.
59
- deleteTaskConcurrency = 10
60
- // deleteTaskMaxRetries is the maximum number of retries for delete tasks.
61
- deleteTaskMaxRetries = 3
62
- // deleteTaskBackoffWaitBetween is waiting for a fixed period of time between calls in backoff linear.
63
- deleteTaskBackoffWaitBetween = 500 * time .Millisecond
64
52
)
65
53
66
54
// Job is an interface for job.
@@ -329,7 +317,7 @@ func (j *job) listTasks(ctx context.Context, data string) (string, error) {
329
317
}
330
318
331
319
// Get all peers by task id
332
- peers , err := j .getValidPeers (req .TaskID )
320
+ peers , err := j .getFinishedPeers (req .TaskID )
333
321
if err != nil {
334
322
logger .Errorf ("get peers by task id %s failed: %s" , req .TaskID , err .Error ())
335
323
return "" , err
@@ -359,7 +347,7 @@ func (j *job) deleteTask(ctx context.Context, data string) (string, error) {
359
347
}
360
348
361
349
// Get all peers by task id
362
- peers , err := j .getValidPeers (req .TaskID )
350
+ peers , err := j .getFinishedPeers (req .TaskID )
363
351
if err != nil {
364
352
logger .Errorf ("get peers by task id %s failed: %s" , req .TaskID , err .Error ())
365
353
return "" , err
@@ -369,71 +357,40 @@ func (j *job) deleteTask(ctx context.Context, data string) (string, error) {
369
357
successTasks := make ([]* internaljob.Task , 0 )
370
358
failureTasks := make ([]* internaljob.Task , 0 )
371
359
372
- // Create a wait group to limit delete rpc concurrency
360
+ // TODO: Create a limiter to limit delete rpc concurrency
373
361
// and avoid too many rpc requests to the host.
374
- wg := sync.WaitGroup {}
375
- deleteTaskLimit := make (chan struct {}, deleteTaskConcurrency )
376
362
for _ , peer := range peers {
377
- wg .Add (1 )
378
- deleteTaskLimit <- struct {}{}
379
- go func (peer * resource.Peer ) {
380
- defer func () {
381
- wg .Done ()
382
- <- deleteTaskLimit
383
- }()
384
-
385
- // Get dfdaemon client from host
386
- target := fmt .Sprintf ("%s:%d" , peer .Host .IP , peer .Host .Port )
387
- conn , err := grpc .DialContext (
388
- ctx ,
389
- target ,
390
- grpc .WithIdleTimeout (0 ),
391
- grpc .WithDefaultCallOptions (
392
- grpc .MaxCallRecvMsgSize (math .MaxInt32 ),
393
- grpc .MaxCallSendMsgSize (math .MaxInt32 ),
394
- ),
395
- grpc .WithUnaryInterceptor (grpc_middleware .ChainUnaryClient (
396
- grpc_prometheus .UnaryClientInterceptor ,
397
- grpc_zap .UnaryClientInterceptor (logger .GrpcLogger .Desugar ()),
398
- grpc_retry .UnaryClientInterceptor (
399
- grpc_retry .WithMax (deleteTaskMaxRetries ),
400
- grpc_retry .WithBackoff (grpc_retry .BackoffLinear (deleteTaskBackoffWaitBetween )),
401
- ),
402
- )),
403
- )
404
- if err != nil {
405
- logger .Errorf ("create grpc client to %s failed: %s" , target , err .Error ())
406
- failureTasks = append (failureTasks , & internaljob.Task {
407
- Task : peer .Task ,
408
- Peer : peer ,
409
- Description : err .Error (),
410
- })
411
- return
412
- }
413
-
414
- dfdaemonUploadClient := dfdaemonv2 .NewDfdaemonUploadClient (conn )
415
- _ , err = dfdaemonUploadClient .DeleteCacheTask (ctx , & dfdaemonv2.DeleteCacheTaskRequest {
416
- TaskId : req .TaskID ,
363
+ // Get dfdaemon client from host
364
+ target := fmt .Sprintf ("%s:%d" , peer .Host .IP , peer .Host .Port )
365
+ dfdaemonUploadClient , err := dfdaemonclient .GetV2ByAddr (ctx , target )
366
+ if err != nil {
367
+ logger .Errorf ("get dfdaemon client from %s failed: %s" , target , err .Error ())
368
+ failureTasks = append (failureTasks , & internaljob.Task {
369
+ Task : peer .Task ,
370
+ Peer : peer ,
371
+ Description : err .Error (),
417
372
})
418
- if err != nil {
419
- logger .Errorf ("delete task %s from %s failed: %s" , req .TaskID , target , err .Error ())
420
- failureTasks = append (failureTasks , & internaljob.Task {
421
- Task : peer .Task ,
422
- Peer : peer ,
423
- Description : err .Error (),
424
- })
425
- return
426
- }
427
-
428
- successTasks = append (successTasks , & internaljob.Task {
373
+ continue
374
+ }
375
+ err = dfdaemonUploadClient .DeleteCacheTask (ctx , & dfdaemonv2.DeleteCacheTaskRequest {
376
+ TaskId : req .TaskID ,
377
+ })
378
+ if err != nil {
379
+ logger .Errorf ("delete task %s from %s failed: %s" , req .TaskID , target , err .Error ())
380
+ failureTasks = append (failureTasks , & internaljob.Task {
429
381
Task : peer .Task ,
430
382
Peer : peer ,
431
- Description : fmt . Sprintf ( "delete task %s from %s success" , req . TaskID , target ),
383
+ Description : err . Error ( ),
432
384
})
433
- }( peer )
434
- }
385
+ continue
386
+ }
435
387
436
- wg .Wait ()
388
+ successTasks = append (successTasks , & internaljob.Task {
389
+ Task : peer .Task ,
390
+ Peer : peer ,
391
+ Description : fmt .Sprintf ("delete task %s from %s success" , req .TaskID , target ),
392
+ })
393
+ }
437
394
438
395
deleteTaskResponse := & internaljob.DeleteTaskResponse {
439
396
SuccessTasks : successTasks ,
@@ -443,34 +400,14 @@ func (j *job) deleteTask(ctx context.Context, data string) (string, error) {
443
400
return internaljob .MarshalResponse (deleteTaskResponse )
444
401
}
445
402
446
- // getValidPeers try to get valid peers by task id
447
- func (j * job ) getValidPeers (taskID string ) ([]* resource.Peer , error ) {
403
+ // getFinishedPeers try to get valid peers by task id
404
+ func (j * job ) getFinishedPeers (taskID string ) ([]* resource.Peer , error ) {
448
405
// get task info by task id
449
406
task , ok := j .resource .TaskManager ().Load (taskID )
450
407
if ! ok {
451
408
logger .Errorf ("task %s not found" , taskID )
452
409
return nil , fmt .Errorf ("task %s not found" , taskID )
453
410
}
454
411
455
- // get peer info by task info
456
- peers := make ([]* resource.Peer , 0 )
457
- for _ , vertex := range task .DAG .GetVertices () {
458
- peer := vertex .Value
459
- if peer == nil {
460
- continue
461
- }
462
-
463
- peers = append (peers , peer )
464
- }
465
-
466
- // Choose finished peers as list tasks result
467
- finishedPeers := make ([]* resource.Peer , len (peers ))
468
- for _ , peer := range peers {
469
- currentState := peer .FSM .Current ()
470
- if currentState == resource .PeerStateSucceeded || currentState == resource .PeerStateFailed {
471
- finishedPeers = append (finishedPeers , peer )
472
- }
473
- }
474
-
475
- return finishedPeers , nil
412
+ return task .LoadFinishedPeers (), nil
476
413
}
0 commit comments