Skip to content

Commit 08e6d6b

Browse files
committed
feat: add the storage pkg for stat task
Signed-off-by: chlins <chlins.zhang@gmail.com>
1 parent b2664ab commit 08e6d6b

File tree

4 files changed

+550
-0
lines changed

4 files changed

+550
-0
lines changed

pkg/storage/piece.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/*
2+
* Copyright 2025 The Dragonfly Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package storage
18+
19+
import (
20+
"math/bits"
21+
)
22+
23+
const (
24+
// MAX_PIECE_COUNT is the maximum piece count. If the piece count is upper
25+
// than MAX_PIECE_COUNT, the piece length will be optimized by the file length.
26+
// When piece length became the MAX_PIECE_LENGTH, the piece count
27+
// probably will be upper than MAX_PIECE_COUNT.
28+
MAX_PIECE_COUNT uint64 = 500
29+
30+
// MIN_PIECE_LENGTH is the minimum piece length.
31+
MIN_PIECE_LENGTH uint64 = 4 * 1024 * 1024
32+
33+
// MAX_PIECE_LENGTH is the maximum piece length.
34+
MAX_PIECE_LENGTH uint64 = 64 * 1024 * 1024
35+
)
36+
37+
// nextPowerOfTwo returns the smallest power of two greater than or equal to n.
38+
func nextPowerOfTwo(n uint64) uint64 {
39+
if n == 0 {
40+
return 1
41+
}
42+
// If n is already a power of two, return n.
43+
if (n > 0) && (n&(n-1) == 0) {
44+
return n
45+
}
46+
// Otherwise, find the next power of two
47+
// bits.Len64(n) returns the smallest k such that n < 2^k.
48+
// So 1 << bits.Len64(n) is the smallest power of two strictly greater than n,
49+
// if n is not itself a power of two.
50+
return uint64(1) << bits.Len64(n)
51+
}
52+
53+
// CalculatePieceLength calculates the piece size based on the given content length.
54+
func CalculatePieceLength(contentLength uint64) uint64 {
55+
// If content length is 0, return the minimum piece length.
56+
if contentLength == 0 {
57+
return MIN_PIECE_LENGTH
58+
}
59+
60+
// Calculate initial piece length: (content_length / MAX_PIECE_COUNT)
61+
// This performs float division and truncates.
62+
pieceLength := uint64(float64(contentLength) / float64(MAX_PIECE_COUNT))
63+
64+
// Find the next power of two.
65+
actualPieceLength := nextPowerOfTwo(pieceLength)
66+
67+
if actualPieceLength < MIN_PIECE_LENGTH {
68+
return MIN_PIECE_LENGTH
69+
}
70+
71+
if actualPieceLength > MAX_PIECE_LENGTH {
72+
return MAX_PIECE_LENGTH
73+
}
74+
75+
return actualPieceLength
76+
}

pkg/storage/piece_test.go

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
* Copyright 2025 The Dragonfly Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package storage
18+
19+
import (
20+
"testing"
21+
)
22+
23+
func TestCalculatePieceLength(t *testing.T) {
24+
mb := uint64(1024 * 1024)
25+
26+
tests := []struct {
27+
name string
28+
contentLength uint64
29+
want uint64
30+
}{
31+
{
32+
name: "contentLength is 0",
33+
contentLength: 0,
34+
want: MIN_PIECE_LENGTH,
35+
},
36+
{
37+
name: "contentLength results in pieceLength less than MIN_PIECE_LENGTH (after nextPowerOfTwo)",
38+
contentLength: 100 * mb, // pieceLength = 100MB/500 = 0.2MB = 209715. nextPowerOfTwo(209715) = 262144. 262144 < MIN_PIECE_LENGTH.
39+
want: MIN_PIECE_LENGTH,
40+
},
41+
{
42+
name: "contentLength results in pieceLength greater than MAX_PIECE_LENGTH (after nextPowerOfTwo)",
43+
contentLength: 40000 * mb, // pieceLength = 40000MB/500 = 80MB = 83886080. nextPowerOfTwo(83886080) = 128MB. 128MB > MAX_PIECE_LENGTH.
44+
want: MAX_PIECE_LENGTH,
45+
},
46+
{
47+
name: "contentLength where initial pieceLength becomes MIN_PIECE_LENGTH",
48+
contentLength: MIN_PIECE_LENGTH * MAX_PIECE_COUNT, // pieceLength = MIN_PIECE_LENGTH. nextPowerOfTwo is MIN_PIECE_LENGTH.
49+
want: MIN_PIECE_LENGTH,
50+
},
51+
{
52+
name: "contentLength where initial pieceLength becomes MAX_PIECE_LENGTH",
53+
contentLength: MAX_PIECE_LENGTH * MAX_PIECE_COUNT, // pieceLength = MAX_PIECE_LENGTH. nextPowerOfTwo is MAX_PIECE_LENGTH.
54+
want: MAX_PIECE_LENGTH,
55+
},
56+
{
57+
name: "contentLength results in pieceLength that's a power of two (8MB) and in range",
58+
contentLength: (8 * mb) * MAX_PIECE_COUNT, // pieceLength = 8MB. nextPowerOfTwo is 8MB.
59+
want: 8 * mb,
60+
},
61+
{
62+
name: "contentLength results in pieceLength rounded up to next power of two (8MB) and in range",
63+
contentLength: (6 * mb) * MAX_PIECE_COUNT, // pieceLength = 6MB. nextPowerOfTwo is 8MB.
64+
want: 8 * mb,
65+
},
66+
{
67+
name: "contentLength results in MIN_PIECE_LENGTH after rounding up from pieceLength=(MIN_PIECE_LENGTH/2)+1",
68+
contentLength: ((MIN_PIECE_LENGTH / 2) + 1) * MAX_PIECE_COUNT,
69+
want: MIN_PIECE_LENGTH,
70+
},
71+
{
72+
name: "contentLength results in MAX_PIECE_LENGTH after rounding up from pieceLength=(MAX_PIECE_LENGTH/2)+1",
73+
contentLength: ((MAX_PIECE_LENGTH / 2) + 1) * MAX_PIECE_COUNT,
74+
want: MAX_PIECE_LENGTH,
75+
},
76+
{
77+
name: "calculated pieceLength (e.g. 256 bytes) is power of two but less than MIN_PIECE_LENGTH",
78+
contentLength: 256 * MAX_PIECE_COUNT, // pieceLength = 256. nextPowerOfTwo(256) = 256. 256 < MIN_PIECE_LENGTH.
79+
want: MIN_PIECE_LENGTH,
80+
},
81+
{
82+
name: "calculated pieceLength (e.g. 128MB) is power of two but greater than MAX_PIECE_LENGTH",
83+
contentLength: (128 * mb) * MAX_PIECE_COUNT, // pieceLength = 128MB. nextPowerOfTwo(128MB) = 128MB. 128MB > MAX_PIECE_LENGTH.
84+
want: MAX_PIECE_LENGTH,
85+
},
86+
{
87+
name: "contentLength 1GB", // 1024MB. pieceLength = 1024MB/500 = 2.048MB = 2147483. nextPowerOfTwo(2147483) = 4MB (MIN_PIECE_LENGTH).
88+
contentLength: 1024 * mb,
89+
want: MIN_PIECE_LENGTH,
90+
},
91+
{
92+
name: "contentLength 10GB", // 10240MB. pieceLength = 10240MB/500 = 20.48MB = 21474836. nextPowerOfTwo(21474836) = 32MB.
93+
contentLength: 10240 * mb,
94+
want: 32 * mb,
95+
},
96+
{
97+
// Test with contentLength that makes pieceLength just under a power of two.
98+
// Target actualPieceLength = 8MB (8388608).
99+
// pieceLength should be > 4MB and <= 8MB.
100+
// Let pieceLength = 8MB - 1 = 8388607.
101+
// contentLength = 8388607 * MAX_PIECE_COUNT = 8388607 * 500 = 4194303500
102+
name: "contentLength makes pieceLength just under a power of two (results in 8MB)",
103+
contentLength: ( (8 * mb) -1 ) * MAX_PIECE_COUNT,
104+
want: 8 * mb,
105+
},
106+
}
107+
108+
for _, tt := range tests {
109+
t.Run(tt.name, func(t *testing.T) {
110+
if got := CalculatePieceLength(tt.contentLength); got != tt.want {
111+
t.Errorf("CalculatePieceLength(%d) = %v, want %v", tt.contentLength, got, tt.want)
112+
}
113+
})
114+
}
115+
}

pkg/storage/stat_task.go

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
/*
2+
* Copyright 2025 The Dragonfly Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package storage
18+
19+
import (
20+
"errors"
21+
"os"
22+
"path/filepath"
23+
24+
"d7y.io/dragonfly/v2/pkg/idgen"
25+
)
26+
27+
// defaultFilteredQueryParams is the default filtered query params to generate the task id.
28+
var defaultFilteredQueryParams []string
29+
30+
var (
31+
// s3FilteredQueryParams is the default filtered query params with s3 protocol to generate the task id.
32+
s3FilteredQueryParams = []string{
33+
"X-Amz-Algorithm",
34+
"X-Amz-Credential",
35+
"X-Amz-Date",
36+
"X-Amz-Expires",
37+
"X-Amz-SignedHeaders",
38+
"X-Amz-Signature",
39+
"X-Amz-Security-Token",
40+
"X-Amz-User-Agent",
41+
}
42+
43+
// gcsFilteredQueryParams is the filtered query params with gcs protocol to generate the task id.
44+
gcsFilteredQueryParams = []string{
45+
"X-Goog-Algorithm",
46+
"X-Goog-Credential",
47+
"X-Goog-Date",
48+
"X-Goog-Expires",
49+
"X-Goog-SignedHeaders",
50+
"X-Goog-Signature",
51+
}
52+
53+
// ossFilteredQueryParams is the default filtered query params with oss protocol to generate the task id.
54+
ossFilteredQueryParams = []string{
55+
"OSSAccessKeyId",
56+
"Expires",
57+
"Signature",
58+
"SecurityToken",
59+
}
60+
61+
// obsFilteredQueryParams is the default filtered query params with obs protocol to generate the task id.
62+
obsFilteredQueryParams = []string{
63+
"AccessKeyId",
64+
"Signature",
65+
"Expires",
66+
"X-Obs-Date",
67+
"X-Obs-Security-Token",
68+
}
69+
70+
// cosFilteredQueryParams is the default filtered query params with cos protocol to generate the task id.
71+
cosFilteredQueryParams = []string{
72+
"q-sign-algorithm",
73+
"q-ak",
74+
"q-sign-time",
75+
"q-key-time",
76+
"q-header-list",
77+
"q-url-param-list",
78+
"q-signature",
79+
"x-cos-security-token",
80+
}
81+
82+
// containerdFilteredQueryParams is the default filtered query params with containerd to generate the task id.
83+
containerdFilteredQueryParams = []string{
84+
"ns",
85+
}
86+
)
87+
88+
func init() {
89+
defaultFilteredQueryParams = append(defaultFilteredQueryParams, s3FilteredQueryParams...)
90+
defaultFilteredQueryParams = append(defaultFilteredQueryParams, gcsFilteredQueryParams...)
91+
defaultFilteredQueryParams = append(defaultFilteredQueryParams, ossFilteredQueryParams...)
92+
defaultFilteredQueryParams = append(defaultFilteredQueryParams, obsFilteredQueryParams...)
93+
defaultFilteredQueryParams = append(defaultFilteredQueryParams, cosFilteredQueryParams...)
94+
defaultFilteredQueryParams = append(defaultFilteredQueryParams, containerdFilteredQueryParams...)
95+
}
96+
97+
// statTask is the options for stat task.
98+
type statTask struct {
99+
// path is the base path of the storage.
100+
path string
101+
// url is the url of the task.
102+
url string
103+
// contentLength is the content length of the task.(optional)
104+
contentLength *uint64
105+
// pieceLength is the piece length of the task.(optional)
106+
pieceLength *uint64
107+
// tag is the tag of the task.(optional)
108+
tag string
109+
// application is the application of the task.(optional)
110+
application string
111+
// filteredQueryParams is the filtered query params of the task.(optional)
112+
filteredQueryParams []string
113+
}
114+
115+
type StatTaskOption func(*statTask)
116+
117+
func WithStatTaskContentLength(contentLength *uint64) StatTaskOption {
118+
return func(task *statTask) {
119+
task.contentLength = contentLength
120+
}
121+
}
122+
123+
func WithStatTaskPieceLength(pieceLength *uint64) StatTaskOption {
124+
return func(task *statTask) {
125+
task.pieceLength = pieceLength
126+
}
127+
}
128+
129+
func WithStatTaskTag(tag string) StatTaskOption {
130+
return func(task *statTask) {
131+
task.tag = tag
132+
}
133+
}
134+
135+
func WithStatTaskApplication(application string) StatTaskOption {
136+
return func(task *statTask) {
137+
task.application = application
138+
}
139+
}
140+
141+
func WithStatTaskFilteredQueryParams(filteredQueryParams []string) StatTaskOption {
142+
return func(task *statTask) {
143+
task.filteredQueryParams = filteredQueryParams
144+
}
145+
}
146+
147+
// StatTask stats the task by the given parameters.
148+
func StatTask(path, url string, opts ...StatTaskOption) (os.FileInfo, error) {
149+
st := &statTask{
150+
path: path,
151+
url: url,
152+
}
153+
for _, opt := range opts {
154+
opt(st)
155+
}
156+
157+
// Validate and mutate the options for stat task.
158+
if st.path == "" || st.url == "" {
159+
return nil, errors.New("path and url are required")
160+
}
161+
162+
if st.contentLength == nil && st.pieceLength == nil {
163+
return nil, errors.New("either contentLength or pieceLength must be specified")
164+
}
165+
166+
if st.pieceLength == nil {
167+
// Calculate pieceLength from contentLength if not specified.
168+
pieceLength := CalculatePieceLength(*st.contentLength)
169+
st.pieceLength = &pieceLength
170+
}
171+
172+
if len(st.filteredQueryParams) == 0 {
173+
st.filteredQueryParams = defaultFilteredQueryParams
174+
}
175+
176+
taskID := idgen.TaskIDV2ByURLBased(st.url, st.pieceLength, st.tag, st.application, st.filteredQueryParams)
177+
178+
// Construct the file path.
179+
filePath := filepath.Join(path, "content/tasks", taskID[0:3], taskID)
180+
return os.Stat(filePath)
181+
}

0 commit comments

Comments
 (0)