We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
ZeroDivisionError
1 parent 192022f commit a5c2229Copy full SHA for a5c2229
megatron/data/data_utils.py
@@ -249,7 +249,7 @@ def build_weighted_datasets(
249
return train_datasets, valid_datasets, test_datasets
250
251
252
-def weights_by_num_docs(l, alpha=0.3):
+def weights_by_num_docs(l: list, alpha=0.3):
253
"""
254
Builds weights from a multinomial distribution over groups of data according to the number of
255
samples in each group.
@@ -263,6 +263,9 @@ def weights_by_num_docs(l, alpha=0.3):
263
264
See https://arxiv.org/abs/1911.02116 for more details
265
266
+ if len(l) == 1:
267
+ return [1.0]
268
+
269
total_n_docs = sum(l)
270
unbiased_sample_probs = [i / total_n_docs for i in l]
271
0 commit comments