+base_model.embedding.weight,base_model.enc_norm.bias,base_model.enc_norm.weight,base_model.layers.0.attn.dense.bias,base_model.layers.0.attn.dense.weight,base_model.layers.0.attn.in_proj.key.bias,base_model.layers.0.attn.in_proj.key.weight,base_model.layers.0.attn.in_proj.query.bias,base_model.layers.0.attn.in_proj.query.weight,base_model.layers.0.attn.in_proj.value.bias,base_model.layers.0.attn.in_proj.value.weight,base_model.layers.0.ff_ln.bias,base_model.layers.0.ff_ln.weight,base_model.layers.0.ff_sub_layer.w1.bias,base_model.layers.0.ff_sub_layer.w1.weight,base_model.layers.0.ff_sub_layer.w2.bias,base_model.layers.0.ff_sub_layer.w2.weight,base_model.layers.0.ln.bias,base_model.layers.0.ln.weight,base_model.layers.1.attn.dense.bias,base_model.layers.1.attn.dense.weight,base_model.layers.1.attn.in_proj.key.bias,base_model.layers.1.attn.in_proj.key.weight,base_model.layers.1.attn.in_proj.query.bias,base_model.layers.1.attn.in_proj.query.weight,base_model.layers.1.attn.in_proj.value.bias,base_model.layers.1.attn.in_proj.value.weight,base_model.layers.1.ff_ln.bias,base_model.layers.1.ff_ln.weight,base_model.layers.1.ff_sub_layer.w1.bias,base_model.layers.1.ff_sub_layer.w1.weight,base_model.layers.1.ff_sub_layer.w2.bias,base_model.layers.1.ff_sub_layer.w2.weight,base_model.layers.1.ln.bias,base_model.layers.1.ln.weight,base_model.layers.10.attn.dense.bias,base_model.layers.10.attn.dense.weight,base_model.layers.10.attn.in_proj.key.bias,base_model.layers.10.attn.in_proj.key.weight,base_model.layers.10.attn.in_proj.query.bias,base_model.layers.10.attn.in_proj.query.weight,base_model.layers.10.attn.in_proj.value.bias,base_model.layers.10.attn.in_proj.value.weight,base_model.layers.10.ff_ln.bias,base_model.layers.10.ff_ln.weight,base_model.layers.10.ff_sub_layer.w1.bias,base_model.layers.10.ff_sub_layer.w1.weight,base_model.layers.10.ff_sub_layer.w2.bias,base_model.layers.10.ff_sub_layer.w2.weight,base_model.layers.10.ln.bias,base_model.layers.10.ln.weight,base_model.layers.11.attn.dense.bias,base_model.layers.11.attn.dense.weight,base_model.layers.11.attn.in_proj.key.bias,base_model.layers.11.attn.in_proj.key.weight,base_model.layers.11.attn.in_proj.query.bias,base_model.layers.11.attn.in_proj.query.weight,base_model.layers.11.attn.in_proj.value.bias,base_model.layers.11.attn.in_proj.value.weight,base_model.layers.11.ff_ln.bias,base_model.layers.11.ff_ln.weight,base_model.layers.11.ff_sub_layer.w1.bias,base_model.layers.11.ff_sub_layer.w1.weight,base_model.layers.11.ff_sub_layer.w2.bias,base_model.layers.11.ff_sub_layer.w2.weight,base_model.layers.11.ln.bias,base_model.layers.11.ln.weight,base_model.layers.2.attn.dense.bias,base_model.layers.2.attn.dense.weight,base_model.layers.2.attn.in_proj.key.bias,base_model.layers.2.attn.in_proj.key.weight,base_model.layers.2.attn.in_proj.query.bias,base_model.layers.2.attn.in_proj.query.weight,base_model.layers.2.attn.in_proj.value.bias,base_model.layers.2.attn.in_proj.value.weight,base_model.layers.2.ff_ln.bias,base_model.layers.2.ff_ln.weight,base_model.layers.2.ff_sub_layer.w1.bias,base_model.layers.2.ff_sub_layer.w1.weight,base_model.layers.2.ff_sub_layer.w2.bias,base_model.layers.2.ff_sub_layer.w2.weight,base_model.layers.2.ln.bias,base_model.layers.2.ln.weight,base_model.layers.3.attn.dense.bias,base_model.layers.3.attn.dense.weight,base_model.layers.3.attn.in_proj.key.bias,base_model.layers.3.attn.in_proj.key.weight,base_model.layers.3.attn.in_proj.query.bias,base_model.layers.3.attn.in_proj.query.weight,base_model.layers.3.attn.in_proj.value.bias,base_model.layers.3.attn.in_proj.value.weight,base_model.layers.3.ff_ln.bias,base_model.layers.3.ff_ln.weight,base_model.layers.3.ff_sub_layer.w1.bias,base_model.layers.3.ff_sub_layer.w1.weight,base_model.layers.3.ff_sub_layer.w2.bias,base_model.layers.3.ff_sub_layer.w2.weight,base_model.layers.3.ln.bias,base_model.layers.3.ln.weight,base_model.layers.4.attn.dense.bias,base_model.layers.4.attn.dense.weight,base_model.layers.4.attn.in_proj.key.bias,base_model.layers.4.attn.in_proj.key.weight,base_model.layers.4.attn.in_proj.query.bias,base_model.layers.4.attn.in_proj.query.weight,base_model.layers.4.attn.in_proj.value.bias,base_model.layers.4.attn.in_proj.value.weight,base_model.layers.4.ff_ln.bias,base_model.layers.4.ff_ln.weight,base_model.layers.4.ff_sub_layer.w1.bias,base_model.layers.4.ff_sub_layer.w1.weight,base_model.layers.4.ff_sub_layer.w2.bias,base_model.layers.4.ff_sub_layer.w2.weight,base_model.layers.4.ln.bias,base_model.layers.4.ln.weight,base_model.layers.5.attn.dense.bias,base_model.layers.5.attn.dense.weight,base_model.layers.5.attn.in_proj.key.bias,base_model.layers.5.attn.in_proj.key.weight,base_model.layers.5.attn.in_proj.query.bias,base_model.layers.5.attn.in_proj.query.weight,base_model.layers.5.attn.in_proj.value.bias,base_model.layers.5.attn.in_proj.value.weight,base_model.layers.5.ff_ln.bias,base_model.layers.5.ff_ln.weight,base_model.layers.5.ff_sub_layer.w1.bias,base_model.layers.5.ff_sub_layer.w1.weight,base_model.layers.5.ff_sub_layer.w2.bias,base_model.layers.5.ff_sub_layer.w2.weight,base_model.layers.5.ln.bias,base_model.layers.5.ln.weight,base_model.layers.6.attn.dense.bias,base_model.layers.6.attn.dense.weight,base_model.layers.6.attn.in_proj.key.bias,base_model.layers.6.attn.in_proj.key.weight,base_model.layers.6.attn.in_proj.query.bias,base_model.layers.6.attn.in_proj.query.weight,base_model.layers.6.attn.in_proj.value.bias,base_model.layers.6.attn.in_proj.value.weight,base_model.layers.6.ff_ln.bias,base_model.layers.6.ff_ln.weight,base_model.layers.6.ff_sub_layer.w1.bias,base_model.layers.6.ff_sub_layer.w1.weight,base_model.layers.6.ff_sub_layer.w2.bias,base_model.layers.6.ff_sub_layer.w2.weight,base_model.layers.6.ln.bias,base_model.layers.6.ln.weight,base_model.layers.7.attn.dense.bias,base_model.layers.7.attn.dense.weight,base_model.layers.7.attn.in_proj.key.bias,base_model.layers.7.attn.in_proj.key.weight,base_model.layers.7.attn.in_proj.query.bias,base_model.layers.7.attn.in_proj.query.weight,base_model.layers.7.attn.in_proj.value.bias,base_model.layers.7.attn.in_proj.value.weight,base_model.layers.7.ff_ln.bias,base_model.layers.7.ff_ln.weight,base_model.layers.7.ff_sub_layer.w1.bias,base_model.layers.7.ff_sub_layer.w1.weight,base_model.layers.7.ff_sub_layer.w2.bias,base_model.layers.7.ff_sub_layer.w2.weight,base_model.layers.7.ln.bias,base_model.layers.7.ln.weight,base_model.layers.8.attn.dense.bias,base_model.layers.8.attn.dense.weight,base_model.layers.8.attn.in_proj.key.bias,base_model.layers.8.attn.in_proj.key.weight,base_model.layers.8.attn.in_proj.query.bias,base_model.layers.8.attn.in_proj.query.weight,base_model.layers.8.attn.in_proj.value.bias,base_model.layers.8.attn.in_proj.value.weight,base_model.layers.8.ff_ln.bias,base_model.layers.8.ff_ln.weight,base_model.layers.8.ff_sub_layer.w1.bias,base_model.layers.8.ff_sub_layer.w1.weight,base_model.layers.8.ff_sub_layer.w2.bias,base_model.layers.8.ff_sub_layer.w2.weight,base_model.layers.8.ln.bias,base_model.layers.8.ln.weight,base_model.layers.9.attn.dense.bias,base_model.layers.9.attn.dense.weight,base_model.layers.9.attn.in_proj.key.bias,base_model.layers.9.attn.in_proj.key.weight,base_model.layers.9.attn.in_proj.query.bias,base_model.layers.9.attn.in_proj.query.weight,base_model.layers.9.attn.in_proj.value.bias,base_model.layers.9.attn.in_proj.value.weight,base_model.layers.9.ff_ln.bias,base_model.layers.9.ff_ln.weight,base_model.layers.9.ff_sub_layer.w1.bias,base_model.layers.9.ff_sub_layer.w1.weight,base_model.layers.9.ff_sub_layer.w2.bias,base_model.layers.9.ff_sub_layer.w2.weight,base_model.layers.9.ln.bias,base_model.layers.9.ln.weight,base_model.position_embedding.weight,base_model.token_type_embeddings.weight,qa_head.bias,qa_head.weight
0 commit comments