Skip to content

Commit f692f4b

Browse files
committed
subclass nn.Parameters
1 parent cc08195 commit f692f4b

File tree

421 files changed

+3419
-2798
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

421 files changed

+3419
-2798
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ repo-consistency:
4545
python utils/check_modular_conversion.py
4646
python utils/check_dummies.py
4747
python utils/check_repo.py
48+
python utils/check_init_weights_data.py
4849
python utils/check_inits.py
4950
python utils/check_pipeline_typing.py
5051
python utils/check_config_docstrings.py

docs/source/de/add_new_model.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -508,16 +508,16 @@ BERT `_init_weights` Methode:
508508
def _init_weights(self, module):
509509
"""Initialize the weights"""
510510
if isinstance(module, nn.Linear):
511-
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
511+
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
512512
if module.bias is not None:
513-
module.bias.data.zero_()
513+
module.bias.zero_()
514514
elif isinstance(module, nn.Embedding):
515-
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
515+
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
516516
if module.padding_idx is not None:
517517
module.weight.data[module.padding_idx].zero_()
518518
elif isinstance(module, nn.LayerNorm):
519-
module.bias.data.zero_()
520-
module.weight.data.fill_(1.0)
519+
module.bias.zero_()
520+
module.weight.fill_(1.0)
521521
```
522522

523523
Sie können weitere benutzerdefinierte Schemata verwenden, wenn Sie eine spezielle Initialisierung für einige Module benötigen. Zum Beispiel in
@@ -533,9 +533,9 @@ def _init_weights(self, module):
533533
module.project_hid._is_hf_initialized = True
534534
module.project_q._is_hf_initialized = True
535535
elif isinstance(module, nn.Linear):
536-
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
536+
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
537537
if module.bias is not None:
538-
module.bias.data.zero_()
538+
module.bias.zero_()
539539
```
540540

541541
Das Flag `_is_hf_initialized` wird intern verwendet, um sicherzustellen, dass wir ein Submodul nur einmal initialisieren. Wenn Sie es auf

docs/source/en/add_new_model.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -314,16 +314,16 @@ Random initialization occurs in the `_init_weights` method of `BrandNewLlamaPreT
314314
def _init_weights(self, module):
315315
"""Initialize the weights"""
316316
if isinstance(module, nn.Linear):
317-
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
317+
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
318318
if module.bias is not None:
319-
module.bias.data.zero_()
319+
module.bias.zero_()
320320
elif isinstance(module, nn.Embedding):
321-
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
321+
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
322322
if module.padding_idx is not None:
323323
module.weight.data[module.padding_idx].zero_()
324324
elif isinstance(module, nn.LayerNorm):
325-
module.bias.data.zero_()
326-
module.weight.data.fill_(1.0)
325+
module.bias.zero_()
326+
module.weight.fill_(1.0)
327327
```
328328

329329
The initialization scheme can look different if you need to adapt it to your model. For example, [`Wav2Vec2ForPreTraining`] initializes [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) in its last two linear layers.
@@ -339,9 +339,9 @@ def _init_weights(self, module):
339339
module.project_hid._is_hf_initialized = True
340340
module.project_q._is_hf_initialized = True
341341
elif isinstance(module, nn.Linear):
342-
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
342+
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
343343
if module.bias is not None:
344-
module.bias.data.zero_()
344+
module.bias.zero_()
345345
```
346346

347347
### Convert checkpoints to Transformers

docs/source/ja/add_new_model.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -406,16 +406,16 @@ model = BrandNewBertModel(BrandNewBertConfig())
406406
def _init_weights(self, module):
407407
"""Initialize the weights"""
408408
if isinstance(module, nn.Linear):
409-
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
409+
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
410410
if module.bias is not None:
411-
module.bias.data.zero_()
411+
module.bias.zero_()
412412
elif isinstance(module, nn.Embedding):
413-
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
413+
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
414414
if module.padding_idx is not None:
415415
module.weight.data[module.padding_idx].zero_()
416416
elif isinstance(module, nn.LayerNorm):
417-
module.bias.data.zero_()
418-
module.weight.data.fill_(1.0)
417+
module.bias.zero_()
418+
module.weight.fill_(1.0)
419419
```
420420

421421
特定のモジュールに特別な初期化が必要な場合、カスタムスキームをさらに持つことができます。たとえば、
@@ -431,9 +431,9 @@ def _init_weights(self, module):
431431
module.project_hid._is_hf_initialized = True
432432
module.project_q._is_hf_initialized = True
433433
elif isinstance(module, nn.Linear):
434-
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
434+
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
435435
if module.bias is not None:
436-
module.bias.data.zero_()
436+
module.bias.zero_()
437437
```
438438

439439
`_is_hf_initialized`フラグは、サブモジュールを一度だけ初期化することを確実にするために内部で使用されます。

docs/source/ko/add_new_model.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -348,16 +348,16 @@ model = BrandNewBertModel(BrandNewBertConfig())
348348
def _init_weights(self, module):
349349
"""Initialize the weights"""
350350
if isinstance(module, nn.Linear):
351-
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
351+
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
352352
if module.bias is not None:
353-
module.bias.data.zero_()
353+
module.bias.zero_()
354354
elif isinstance(module, nn.Embedding):
355-
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
355+
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
356356
if module.padding_idx is not None:
357357
module.weight.data[module.padding_idx].zero_()
358358
elif isinstance(module, nn.LayerNorm):
359-
module.bias.data.zero_()
360-
module.weight.data.fill_(1.0)
359+
module.bias.zero_()
360+
module.weight.fill_(1.0)
361361
```
362362

363363
몇 가지 모듈에 대해 특별한 초기화가 필요한 경우 사용자 정의 방식을 사용할 수도 있습니다. 예를 들어, `Wav2Vec2ForPreTraining`에서 마지막 두 개의 선형 레이어는 일반적인 PyTorch `nn.Linear`의 초기화를 가져야 하지만, 다른 모든 레이어는 위와 같은 초기화를 사용해야 합니다. 이는 다음과 같이 코드화됩니다:
@@ -371,9 +371,9 @@ def _init_weights(self, module):
371371
module.project_hid._is_hf_initialized = True
372372
module.project_q._is_hf_initialized = True
373373
elif isinstance(module, nn.Linear):
374-
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
374+
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
375375
if module.bias is not None:
376-
module.bias.data.zero_()
376+
module.bias.zero_()
377377
```
378378

379379
`_is_hf_initialized` 플래그는 서브모듈을 한 번만 초기화하도록 내부적으로 사용됩니다. `module.project_q``module.project_hid`에 대해 `True`로 설정함으로써, 우리가 수행한 사용자 정의 초기화가 이후에 덮어쓰이지 않도록 합니다. 즉, `_init_weights` 함수가 이들에게 적용되지 않습니다.

examples/modular-transformers/modeling_dummy_bert.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -533,18 +533,18 @@ class DummyBertPreTrainedModel(PreTrainedModel):
533533
def _init_weights(self, module):
534534
"""Initialize the weights"""
535535
if isinstance(module, nn.Linear):
536-
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
536+
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
537537
if module.bias is not None:
538-
module.bias.data.zero_()
538+
module.bias.zero_()
539539
elif isinstance(module, nn.Embedding):
540-
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
540+
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
541541
if module.padding_idx is not None:
542542
module.weight.data[module.padding_idx].zero_()
543543
elif isinstance(module, nn.LayerNorm):
544-
module.bias.data.zero_()
545-
module.weight.data.fill_(1.0)
544+
module.bias.zero_()
545+
module.weight.fill_(1.0)
546546
elif isinstance(module, DummyBertLMPredictionHead):
547-
module.bias.data.zero_()
547+
module.bias.zero_()
548548

549549

550550
@auto_docstring(

examples/modular-transformers/modeling_my_new_model2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ def _init_weights(self, module):
265265

266266
# We initialize with 0s to be 1 centered as the RMSNorm here does (1 + weight)
267267
if "RMSNorm" in module.__class__.__name__:
268-
module.weight.data.zero_()
268+
module.weight.zero_()
269269

270270

271271
class MyNewModel2ForSequenceClassification(GenericForSequenceClassification, MyNewModel2PreTrainedModel):

examples/modular-transformers/modeling_new_task_model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,9 @@ def _init_weights(self, module):
104104
std = getattr(self.config, "initializer_range", self.config.get_text_config().initializer_range)
105105

106106
if isinstance(module, nn.Linear):
107-
module.weight.data.normal_(mean=0.0, std=std)
107+
module.weight.normal_(mean=0.0, std=std)
108108
if module.bias is not None:
109-
module.bias.data.zero_()
109+
module.bias.zero_()
110110

111111

112112
def token_type_ids_mask_function(

examples/modular-transformers/modeling_roberta.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -536,18 +536,18 @@ class RobertaPreTrainedModel(PreTrainedModel):
536536
def _init_weights(self, module):
537537
"""Initialize the weights"""
538538
if isinstance(module, nn.Linear):
539-
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
539+
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
540540
if module.bias is not None:
541-
module.bias.data.zero_()
541+
module.bias.zero_()
542542
elif isinstance(module, nn.Embedding):
543-
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
543+
module.weight.normal_(mean=0.0, std=self.config.initializer_range)
544544
if module.padding_idx is not None:
545545
module.weight.data[module.padding_idx].zero_()
546546
elif isinstance(module, nn.LayerNorm):
547-
module.bias.data.zero_()
548-
module.weight.data.fill_(1.0)
547+
module.bias.zero_()
548+
module.weight.fill_(1.0)
549549
elif isinstance(module, RobertaLMPredictionHead):
550-
module.bias.data.zero_()
550+
module.bias.zero_()
551551

552552

553553
@auto_docstring(

examples/modular-transformers/modeling_test_detr.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -846,11 +846,11 @@ def _init_weights(self, module):
846846
nn.init.xavier_uniform_(module.output_proj.weight.data)
847847
nn.init.constant_(module.output_proj.bias.data, 0.0)
848848
elif isinstance(module, (nn.Linear, nn.Conv2d, nn.BatchNorm2d)):
849-
module.weight.data.normal_(mean=0.0, std=std)
849+
module.weight.normal_(mean=0.0, std=std)
850850
if module.bias is not None:
851-
module.bias.data.zero_()
851+
module.bias.zero_()
852852
elif isinstance(module, nn.Embedding):
853-
module.weight.data.normal_(mean=0.0, std=std)
853+
module.weight.normal_(mean=0.0, std=std)
854854
if module.padding_idx is not None:
855855
module.weight.data[module.padding_idx].zero_()
856856
if hasattr(module, "reference_points") and not self.config.two_stage:

0 commit comments

Comments
 (0)