将run.py修改如下:
search_space = {
'learning_rate': tune.loguniform(1e-5, 1e-2),
'num_epochs': tune.randint(2, 3),
'dropout': tune.uniform(0, 0.5),
'hidden_size': tune.randint(32, 257),
'num_layers': tune.randint(1,3)
}
if name == 'main':
dataset = 'THUCNews' # 数据集
model_grouping = {
'bert':2,
'TextRCNN':1,
'TextCNN':1,
'TextRNN':1,
'FastText':1,
}
# model_group = model_grouping[args.model]
model_group = model_grouping['bert'] ###此处直接选用bert作为训练模型
# 搜狗新闻:embedding_SougouNews.npz, 腾讯:embedding_Tencent.npz, 随机初始化:random
embedding = 'embedding_SougouNews.npz'
model_name = 'bert'
if model_group == 2:
from utils_bert import build_dataset, build_iterator, get_time_dif
np.random.seed(1)
torch.manual_seed(1)
torch.cuda.manual_seed_all(1)
torch.backends.cudnn.deterministic = True # 保证每次结果一样
def experiment(tune_config):
x = import_module('models.' + model_name)
if model_group == 1:
config = x.Config(dataset, embedding)
elif model_group == 2:
config = x.Config(dataset)
if tune_config:
for param in tune_config:
setattr(config, param, tune_config[param])
start_time = time.time()
print("Loading data...")
if model_group == 1:
vocab, train_data, dev_data, test_data = build_dataset(config, args.word)
elif model_group == 2:
train_data, dev_data, test_data = build_dataset(config)
train_iter = build_iterator(train_data, config)
dev_iter = build_iterator(dev_data, config)
test_iter = build_iterator(test_data, config)
time_dif = get_time_dif(start_time)
print("Time usage:", time_dif)
# train
if model_group == 1:
config.n_vocab = len(vocab)
model = x.Model(config).to(config.device)
if model_name != 'Transformer':
init_network(model)
print(model.parameters)
elif model_group == 2:
model = x.Model(config).to(config.device)
if tune_config:
res = train(config, model, train_iter, dev_iter, test_iter, model_group=model_group, tune_param=True)
tune.report(metric=res)
else:
train(config, model, train_iter, dev_iter, test_iter, model_group=model_group, tune_param=False)
print('tune param: ', True)
# if tune parameters
if True:
scheduler = ASHAScheduler(metric='metric', mode="max") if args.tune_asha else None
analysis = tune.run(experiment, num_samples=50, config=search_space, resources_per_trial={'gpu':int(True)},
scheduler=scheduler,
verbose=3)
analysis.results_df.to_csv('tune_results_'+args.tune_file+'.csv')
# if not tune parameters
else:
experiment(tune_config=None)
然后运行的过程中出现了如下错误:
ray.exceptions.RayTaskError(RuntimeError): ray::ImplicitFunc.train() (pid=22632, ip=127.0.0.1, repr=experiment)
File "python\ray_raylet.pyx", line 877, in ray._raylet.execute_task
File "python\ray_raylet.pyx", line 881, in ray._raylet.execute_task
File "python\ray_raylet.pyx", line 821, in ray._raylet.execute_task.function_executor
File "G:\anaconda\envs\bertCTP-F\lib\site-packages\ray_private\function_manager.py", line 670, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "G:\anaconda\envs\bertCTP-F\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_span
return method(self, *_args, **_kwargs)
File "G:\anaconda\envs\bertCTP-F\lib\site-packages\ray\tune\trainable\trainable.py", line 384, in train
raise skipped from exception_cause(skipped)
File "G:\anaconda\envs\bertCTP-F\lib\site-packages\ray\tune\trainable\function_trainable.py", line 339, in entrypoint
self._status_reporter.get_checkpoint(),
File "G:\anaconda\envs\bertCTP-F\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_span
return method(self, *_args, **_kwargs)
File "G:\anaconda\envs\bertCTP-F\lib\site-packages\ray\tune\trainable\function_trainable.py", line 653, in _trainable_func
output = fn()
File "E:/w_learning/bertCTP-F/run.py", line 112, in experiment
res = train(config, model, train_iter, dev_iter, test_iter, model_group=model_group, tune_param=True)
File "E:\w_learning\bertCTP-F\train_eval.py", line 60, in train
outputs = model(trains)
File "G:\anaconda\envs\bertCTP-F\lib\site-packages\torch\nn\modules\module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "E:\w_learning\bertCTP-F\models\bert.py", line 47, in forward
out = self.fc(pooled)
File "G:\anaconda\envs\bertCTP-F\lib\site-packages\torch\nn\modules\module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "G:\anaconda\envs\bertCTP-F\lib\site-packages\torch\nn\modules\linear.py", line 103, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (128x768 and 208x10)
想问下大佬这是什么地方出现了问题呀