I put the modified cnt dataset on the model and ran it sending some errors.
D:\ProgramData\anaconda3\envs\labram\python.exe E:\lab\DL\LaBraM-main\run_class_finetuning.py
Not using distributed mode
Namespace(batch_size=64, epochs=30, update_freq=1, save_ckpt_freq=5, robust_test=None, model='labram_base_patch200_200', qkv_bias=True, rel_pos_bias=True, abs_pos_emb=True, layer_scale_init_value=0.1, input_size=200, drop=0.0, attn_drop_rate=0.0, drop_path=0.1, disable_eval_during_finetuning=False, model_ema=False, model_ema_decay=0.9999, model_ema_force_cpu=False, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=None, momentum=0.9, weight_decay=0.05, weight_decay_end=None, lr=0.0005, layer_decay=0.9, warmup_lr=1e-06, min_lr=1e-06, warmup_epochs=5, warmup_steps=-1, smoothing=0.1, reprob=0.25, remode='pixel', recount=1, resplit=False, finetune='', model_key='model|module', model_prefix='', model_filter_name='gzp', init_scale=0.001, use_mean_pooling=True, disable_weight_decay_on_rel_pos_bias=False, nb_classes=4, output_dir='E:/lab/DL/LaBraM-main/checkpoints/finetune_MI_base', log_dir='E:/lab/DL/LaBraM-main/log/finetune_MI_base', device='cuda', seed=0, resume='', auto_resume=True, save_ckpt=True, start_epoch=0, eval=False, dist_eval=False, num_workers=10, pin_mem=True, world_size=1, local_rank=-1, dist_on_itp=False, dist_url='env://', enable_deepspeed=False, dataset='MI', distributed=False)
2199 399 680
Sampler_train = <torch.utils.data.distributed.DistributedSampler object at 0x000001F93E230D50>
Patch size = 200
Model = NeuralTransformer(
(patch_embed): TemporalConv(
(conv1): Conv2d(1, 8, kernel_size=(1, 15), stride=(1, 8), padding=(0, 7))
(gelu1): GELU(approximate='none')
(norm1): GroupNorm(4, 8, eps=1e-05, affine=True)
(conv2): Conv2d(8, 8, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
(gelu2): GELU(approximate='none')
(norm2): GroupNorm(4, 8, eps=1e-05, affine=True)
(conv3): Conv2d(8, 8, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
(norm3): GroupNorm(4, 8, eps=1e-05, affine=True)
(gelu3): GELU(approximate='none')
)
(pos_drop): Dropout(p=0.0, inplace=False)
(blocks): ModuleList(
(0): Block(
(norm1): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(qkv): Linear(in_features=200, out_features=600, bias=False)
(q_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(k_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=200, out_features=200, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): Identity()
(norm2): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=200, out_features=800, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=800, out_features=200, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(1): Block(
(norm1): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(qkv): Linear(in_features=200, out_features=600, bias=False)
(q_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(k_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=200, out_features=200, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): DropPath(p=0.00909090880304575)
(norm2): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=200, out_features=800, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=800, out_features=200, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(2): Block(
(norm1): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(qkv): Linear(in_features=200, out_features=600, bias=False)
(q_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(k_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=200, out_features=200, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): DropPath(p=0.0181818176060915)
(norm2): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=200, out_features=800, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=800, out_features=200, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(3): Block(
(norm1): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(qkv): Linear(in_features=200, out_features=600, bias=False)
(q_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(k_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=200, out_features=200, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): DropPath(p=0.027272727340459824)
(norm2): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=200, out_features=800, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=800, out_features=200, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(4): Block(
(norm1): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(qkv): Linear(in_features=200, out_features=600, bias=False)
(q_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(k_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=200, out_features=200, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): DropPath(p=0.036363635212183)
(norm2): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=200, out_features=800, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=800, out_features=200, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(5): Block(
(norm1): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(qkv): Linear(in_features=200, out_features=600, bias=False)
(q_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(k_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=200, out_features=200, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): DropPath(p=0.045454543083906174)
(norm2): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=200, out_features=800, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=800, out_features=200, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(6): Block(
(norm1): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(qkv): Linear(in_features=200, out_features=600, bias=False)
(q_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(k_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=200, out_features=200, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): DropPath(p=0.054545458406209946)
(norm2): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=200, out_features=800, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=800, out_features=200, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(7): Block(
(norm1): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(qkv): Linear(in_features=200, out_features=600, bias=False)
(q_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(k_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=200, out_features=200, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): DropPath(p=0.06363636255264282)
(norm2): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=200, out_features=800, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=800, out_features=200, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(8): Block(
(norm1): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(qkv): Linear(in_features=200, out_features=600, bias=False)
(q_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(k_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=200, out_features=200, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): DropPath(p=0.0727272778749466)
(norm2): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=200, out_features=800, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=800, out_features=200, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(9): Block(
(norm1): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(qkv): Linear(in_features=200, out_features=600, bias=False)
(q_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(k_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=200, out_features=200, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): DropPath(p=0.08181818574666977)
(norm2): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=200, out_features=800, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=800, out_features=200, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(10): Block(
(norm1): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(qkv): Linear(in_features=200, out_features=600, bias=False)
(q_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(k_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=200, out_features=200, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): DropPath(p=0.09090909361839294)
(norm2): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=200, out_features=800, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=800, out_features=200, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
(11): Block(
(norm1): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(attn): Attention(
(qkv): Linear(in_features=200, out_features=600, bias=False)
(q_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(k_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=200, out_features=200, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
)
(drop_path): DropPath(p=0.10000000149011612)
(norm2): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(mlp): Mlp(
(fc1): Linear(in_features=200, out_features=800, bias=True)
(act): GELU(approximate='none')
(fc2): Linear(in_features=800, out_features=200, bias=True)
(drop): Dropout(p=0.0, inplace=False)
)
)
)
(norm): Identity()
(fc_norm): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
(head): Linear(in_features=200, out_features=4, bias=True)
)
number of params: 5825540
LR = 0.00050000
Batch size = 64
Update frequent = 1
Number of training examples = 2199
Number of training training per epoch = 34
Assigned values = [0.2541865828329001, 0.2824295364810001, 0.31381059609000006, 0.3486784401000001, 0.3874204890000001, 0.4304672100000001, 0.4782969000000001, 0.531441, 0.5904900000000001, 0.6561, 0.7290000000000001, 0.81, 0.9, 1.0]
Skip weight decay name marked in model: {'time_embed', 'cls_token', 'pos_embed'}
Param groups = {
"layer_0_no_decay": {
"weight_decay": 0.0,
"params": [
"cls_token",
"pos_embed",
"patch_embed.conv1.bias",
"patch_embed.norm1.weight",
"patch_embed.norm1.bias",
"patch_embed.conv2.bias",
"patch_embed.norm2.weight",
"patch_embed.norm2.bias",
"patch_embed.conv3.bias",
"patch_embed.norm3.weight",
"patch_embed.norm3.bias"
],
"lr_scale": 0.2541865828329001
},
"layer_13_no_decay": {
"weight_decay": 0.0,
"params": [
"time_embed",
"fc_norm.weight",
"fc_norm.bias",
"head.bias"
],
"lr_scale": 1.0
},
"layer_0_decay": {
"weight_decay": 0.05,
"params": [
"patch_embed.conv1.weight",
"patch_embed.conv2.weight",
"patch_embed.conv3.weight"
],
"lr_scale": 0.2541865828329001
},
"layer_1_no_decay": {
"weight_decay": 0.0,
"params": [
"blocks.0.gamma_1",
"blocks.0.gamma_2",
"blocks.0.norm1.weight",
"blocks.0.norm1.bias",
"blocks.0.attn.q_bias",
"blocks.0.attn.v_bias",
"blocks.0.attn.q_norm.weight",
"blocks.0.attn.q_norm.bias",
"blocks.0.attn.k_norm.weight",
"blocks.0.attn.k_norm.bias",
"blocks.0.attn.proj.bias",
"blocks.0.norm2.weight",
"blocks.0.norm2.bias",
"blocks.0.mlp.fc1.bias",
"blocks.0.mlp.fc2.bias"
],
"lr_scale": 0.2824295364810001
},
"layer_1_decay": {
"weight_decay": 0.05,
"params": [
"blocks.0.attn.qkv.weight",
"blocks.0.attn.proj.weight",
"blocks.0.mlp.fc1.weight",
"blocks.0.mlp.fc2.weight"
],
"lr_scale": 0.2824295364810001
},
"layer_2_no_decay": {
"weight_decay": 0.0,
"params": [
"blocks.1.gamma_1",
"blocks.1.gamma_2",
"blocks.1.norm1.weight",
"blocks.1.norm1.bias",
"blocks.1.attn.q_bias",
"blocks.1.attn.v_bias",
"blocks.1.attn.q_norm.weight",
"blocks.1.attn.q_norm.bias",
"blocks.1.attn.k_norm.weight",
"blocks.1.attn.k_norm.bias",
"blocks.1.attn.proj.bias",
"blocks.1.norm2.weight",
"blocks.1.norm2.bias",
"blocks.1.mlp.fc1.bias",
"blocks.1.mlp.fc2.bias"
],
"lr_scale": 0.31381059609000006
},
"layer_2_decay": {
"weight_decay": 0.05,
"params": [
"blocks.1.attn.qkv.weight",
"blocks.1.attn.proj.weight",
"blocks.1.mlp.fc1.weight",
"blocks.1.mlp.fc2.weight"
],
"lr_scale": 0.31381059609000006
},
"layer_3_no_decay": {
"weight_decay": 0.0,
"params": [
"blocks.2.gamma_1",
"blocks.2.gamma_2",
"blocks.2.norm1.weight",
"blocks.2.norm1.bias",
"blocks.2.attn.q_bias",
"blocks.2.attn.v_bias",
"blocks.2.attn.q_norm.weight",
"blocks.2.attn.q_norm.bias",
"blocks.2.attn.k_norm.weight",
"blocks.2.attn.k_norm.bias",
"blocks.2.attn.proj.bias",
"blocks.2.norm2.weight",
"blocks.2.norm2.bias",
"blocks.2.mlp.fc1.bias",
"blocks.2.mlp.fc2.bias"
],
"lr_scale": 0.3486784401000001
},
"layer_3_decay": {
"weight_decay": 0.05,
"params": [
"blocks.2.attn.qkv.weight",
"blocks.2.attn.proj.weight",
"blocks.2.mlp.fc1.weight",
"blocks.2.mlp.fc2.weight"
],
"lr_scale": 0.3486784401000001
},
"layer_4_no_decay": {
"weight_decay": 0.0,
"params": [
"blocks.3.gamma_1",
"blocks.3.gamma_2",
"blocks.3.norm1.weight",
"blocks.3.norm1.bias",
"blocks.3.attn.q_bias",
"blocks.3.attn.v_bias",
"blocks.3.attn.q_norm.weight",
"blocks.3.attn.q_norm.bias",
"blocks.3.attn.k_norm.weight",
"blocks.3.attn.k_norm.bias",
"blocks.3.attn.proj.bias",
"blocks.3.norm2.weight",
"blocks.3.norm2.bias",
"blocks.3.mlp.fc1.bias",
"blocks.3.mlp.fc2.bias"
],
"lr_scale": 0.3874204890000001
},
"layer_4_decay": {
"weight_decay": 0.05,
"params": [
"blocks.3.attn.qkv.weight",
"blocks.3.attn.proj.weight",
"blocks.3.mlp.fc1.weight",
"blocks.3.mlp.fc2.weight"
],
"lr_scale": 0.3874204890000001
},
"layer_5_no_decay": {
"weight_decay": 0.0,
"params": [
"blocks.4.gamma_1",
"blocks.4.gamma_2",
"blocks.4.norm1.weight",
"blocks.4.norm1.bias",
"blocks.4.attn.q_bias",
"blocks.4.attn.v_bias",
"blocks.4.attn.q_norm.weight",
"blocks.4.attn.q_norm.bias",
"blocks.4.attn.k_norm.weight",
"blocks.4.attn.k_norm.bias",
"blocks.4.attn.proj.bias",
"blocks.4.norm2.weight",
"blocks.4.norm2.bias",
"blocks.4.mlp.fc1.bias",
"blocks.4.mlp.fc2.bias"
],
"lr_scale": 0.4304672100000001
},
"layer_5_decay": {
"weight_decay": 0.05,
"params": [
"blocks.4.attn.qkv.weight",
"blocks.4.attn.proj.weight",
"blocks.4.mlp.fc1.weight",
"blocks.4.mlp.fc2.weight"
],
"lr_scale": 0.4304672100000001
},
"layer_6_no_decay": {
"weight_decay": 0.0,
"params": [
"blocks.5.gamma_1",
"blocks.5.gamma_2",
"blocks.5.norm1.weight",
"blocks.5.norm1.bias",
"blocks.5.attn.q_bias",
"blocks.5.attn.v_bias",
"blocks.5.attn.q_norm.weight",
"blocks.5.attn.q_norm.bias",
"blocks.5.attn.k_norm.weight",
"blocks.5.attn.k_norm.bias",
"blocks.5.attn.proj.bias",
"blocks.5.norm2.weight",
"blocks.5.norm2.bias",
"blocks.5.mlp.fc1.bias",
"blocks.5.mlp.fc2.bias"
],
"lr_scale": 0.4782969000000001
},
"layer_6_decay": {
"weight_decay": 0.05,
"params": [
"blocks.5.attn.qkv.weight",
"blocks.5.attn.proj.weight",
"blocks.5.mlp.fc1.weight",
"blocks.5.mlp.fc2.weight"
],
"lr_scale": 0.4782969000000001
},
"layer_7_no_decay": {
"weight_decay": 0.0,
"params": [
"blocks.6.gamma_1",
"blocks.6.gamma_2",
"blocks.6.norm1.weight",
"blocks.6.norm1.bias",
"blocks.6.attn.q_bias",
"blocks.6.attn.v_bias",
"blocks.6.attn.q_norm.weight",
"blocks.6.attn.q_norm.bias",
"blocks.6.attn.k_norm.weight",
"blocks.6.attn.k_norm.bias",
"blocks.6.attn.proj.bias",
"blocks.6.norm2.weight",
"blocks.6.norm2.bias",
"blocks.6.mlp.fc1.bias",
"blocks.6.mlp.fc2.bias"
],
"lr_scale": 0.531441
},
"layer_7_decay": {
"weight_decay": 0.05,
"params": [
"blocks.6.attn.qkv.weight",
"blocks.6.attn.proj.weight",
"blocks.6.mlp.fc1.weight",
"blocks.6.mlp.fc2.weight"
],
"lr_scale": 0.531441
},
"layer_8_no_decay": {
"weight_decay": 0.0,
"params": [
"blocks.7.gamma_1",
"blocks.7.gamma_2",
"blocks.7.norm1.weight",
"blocks.7.norm1.bias",
"blocks.7.attn.q_bias",
"blocks.7.attn.v_bias",
"blocks.7.attn.q_norm.weight",
"blocks.7.attn.q_norm.bias",
"blocks.7.attn.k_norm.weight",
"blocks.7.attn.k_norm.bias",
"blocks.7.attn.proj.bias",
"blocks.7.norm2.weight",
"blocks.7.norm2.bias",
"blocks.7.mlp.fc1.bias",
"blocks.7.mlp.fc2.bias"
],
"lr_scale": 0.5904900000000001
},
"layer_8_decay": {
"weight_decay": 0.05,
"params": [
"blocks.7.attn.qkv.weight",
"blocks.7.attn.proj.weight",
"blocks.7.mlp.fc1.weight",
"blocks.7.mlp.fc2.weight"
],
"lr_scale": 0.5904900000000001
},
"layer_9_no_decay": {
"weight_decay": 0.0,
"params": [
"blocks.8.gamma_1",
"blocks.8.gamma_2",
"blocks.8.norm1.weight",
"blocks.8.norm1.bias",
"blocks.8.attn.q_bias",
"blocks.8.attn.v_bias",
"blocks.8.attn.q_norm.weight",
"blocks.8.attn.q_norm.bias",
"blocks.8.attn.k_norm.weight",
"blocks.8.attn.k_norm.bias",
"blocks.8.attn.proj.bias",
"blocks.8.norm2.weight",
"blocks.8.norm2.bias",
"blocks.8.mlp.fc1.bias",
"blocks.8.mlp.fc2.bias"
],
"lr_scale": 0.6561
},
"layer_9_decay": {
"weight_decay": 0.05,
"params": [
"blocks.8.attn.qkv.weight",
"blocks.8.attn.proj.weight",
"blocks.8.mlp.fc1.weight",
"blocks.8.mlp.fc2.weight"
],
"lr_scale": 0.6561
},
"layer_10_no_decay": {
"weight_decay": 0.0,
"params": [
"blocks.9.gamma_1",
"blocks.9.gamma_2",
"blocks.9.norm1.weight",
"blocks.9.norm1.bias",
"blocks.9.attn.q_bias",
"blocks.9.attn.v_bias",
"blocks.9.attn.q_norm.weight",
"blocks.9.attn.q_norm.bias",
"blocks.9.attn.k_norm.weight",
"blocks.9.attn.k_norm.bias",
"blocks.9.attn.proj.bias",
"blocks.9.norm2.weight",
"blocks.9.norm2.bias",
"blocks.9.mlp.fc1.bias",
"blocks.9.mlp.fc2.bias"
],
"lr_scale": 0.7290000000000001
},
"layer_10_decay": {
"weight_decay": 0.05,
"params": [
"blocks.9.attn.qkv.weight",
"blocks.9.attn.proj.weight",
"blocks.9.mlp.fc1.weight",
"blocks.9.mlp.fc2.weight"
],
"lr_scale": 0.7290000000000001
},
"layer_11_no_decay": {
"weight_decay": 0.0,
"params": [
"blocks.10.gamma_1",
"blocks.10.gamma_2",
"blocks.10.norm1.weight",
"blocks.10.norm1.bias",
"blocks.10.attn.q_bias",
"blocks.10.attn.v_bias",
"blocks.10.attn.q_norm.weight",
"blocks.10.attn.q_norm.bias",
"blocks.10.attn.k_norm.weight",
"blocks.10.attn.k_norm.bias",
"blocks.10.attn.proj.bias",
"blocks.10.norm2.weight",
"blocks.10.norm2.bias",
"blocks.10.mlp.fc1.bias",
"blocks.10.mlp.fc2.bias"
],
"lr_scale": 0.81
},
"layer_11_decay": {
"weight_decay": 0.05,
"params": [
"blocks.10.attn.qkv.weight",
"blocks.10.attn.proj.weight",
"blocks.10.mlp.fc1.weight",
"blocks.10.mlp.fc2.weight"
],
"lr_scale": 0.81
},
"layer_12_no_decay": {
"weight_decay": 0.0,
"params": [
"blocks.11.gamma_1",
"blocks.11.gamma_2",
"blocks.11.norm1.weight",
"blocks.11.norm1.bias",
"blocks.11.attn.q_bias",
"blocks.11.attn.v_bias",
"blocks.11.attn.q_norm.weight",
"blocks.11.attn.q_norm.bias",
"blocks.11.attn.k_norm.weight",
"blocks.11.attn.k_norm.bias",
"blocks.11.attn.proj.bias",
"blocks.11.norm2.weight",
"blocks.11.norm2.bias",
"blocks.11.mlp.fc1.bias",
"blocks.11.mlp.fc2.bias"
],
"lr_scale": 0.9
},
"layer_12_decay": {
"weight_decay": 0.05,
"params": [
"blocks.11.attn.qkv.weight",
"blocks.11.attn.proj.weight",
"blocks.11.mlp.fc1.weight",
"blocks.11.mlp.fc2.weight"
],
"lr_scale": 0.9
},
"layer_13_decay": {
"weight_decay": 0.05,
"params": [
"head.weight"
],
"lr_scale": 1.0
}
}
Optimizer config: {'lr': 0.0005, 'weight_decay': 0.0, 'eps': 1e-08}
Use step level LR scheduler!
Set warmup steps = 170
Set warmup steps = 0
Max WD = 0.0500000, Min WD = 0.0500000
criterion = LabelSmoothingCrossEntropy()
Auto resume checkpoint:
Start training for 30 epochs
Traceback (most recent call last):
File "E:\lab\DL\LaBraM-main\run_class_finetuning.py", line 582, in
main(opts, ds_init)
File "E:\lab\DL\LaBraM-main\run_class_finetuning.py", line 496, in main
train_stats = train_one_epoch(
^^^^^^^^^^^^^^^^
File "E:\lab\DL\LaBraM-main\engine_for_finetuning.py", line 77, in train_one_epoch
loss, output = train_class_batch(
^^^^^^^^^^^^^^^^^^
File "E:\lab\DL\LaBraM-main\engine_for_finetuning.py", line 19, in train_class_batch
outputs = model(samples, ch_names)
^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\ProgramData\anaconda3\envs\labram\Lib\site-packages\torch\nn\modules\module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\ProgramData\anaconda3\envs\labram\Lib\site-packages\torch\nn\modules\module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "E:\lab\DL\LaBraM-main\modeling_finetune.py", line 395, in forward
x = self.forward_features(x, input_chans=input_chans, return_patch_tokens=return_patch_tokens, return_all_tokens=return_all_tokens, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "E:\lab\DL\LaBraM-main\modeling_finetune.py", line 362, in forward_features
x = x + pos_embed
~~^~~~~~~~~~~
RuntimeError: The size of tensor a (341) must match the size of tensor b (286) at non-singleton dimension 1
进程已结束,退出代码为 1