from typing import List
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, f1_score
from sklearn.metrics import recall_score, confusion_matrix
定义ClassEvaluator类
python复制代码
class ClassEvaluator(object):
def __init__(self):
self.goldens = []
self.predictions = []
def add_batch(self, pred_batch: List[List], gold_batch: List[List]):
"""
添加一个batch中的prediction和gold列表,用于后续统一计算。
Args:
pred_batch (list): 模型预测标签列表, e.g. -> [0, 0, 1, 2, 0, ...] or [['体', '育'], ['财', '经'], ...]
gold_batch (list): 真实标签标签列表, e.g. -> [1, 0, 1, 2, 0, ...] or [['体', '育'], ['财', '经'], ...]
"""
assert len(pred_batch) == len(gold_batch)
# 若遇到多个子标签构成一个标签的情况
if type(gold_batch[0]) in [list, tuple]:
# 将所有的label拼接为一个整label: ['体', '育'] -> '体育'
pred_batch = [','.join([str(e) for e in ele]) for ele in pred_batch]
gold_batch = [','.join([str(e) for e in ele]) for ele in gold_batch]
self.goldens.extend(gold_batch)
self.predictions.extend(pred_batch)
def compute(self, round_num=2) -> dict:
"""
根据当前类中累积的变量值,计算当前的P, R, F1。
Args:
round_num (int): 计算结果保留小数点后几位, 默认小数点后2位。
Returns:
dict -> {
'accuracy': 准确率,
'precision': 精准率,
'recall': 召回率,
'f1': f1值,
'class_metrics': {
'0': {
'precision': 该类别下的precision,
'recall': 该类别下的recall,
'f1': 该类别下的f1
},
...
}
}
"""
classes, class_metrics, res = sorted(list(set(self.goldens) | set(self.predictions))), {}, {}
# 构建全局指标
res['accuracy'] = round(accuracy_score(self.goldens, self.predictions), round_num)
res['precision'] = round(precision_score(self.goldens, self.predictions, average='weighted'), round_num)
# average='weighted'代表:考虑类别的不平衡性,需要计算类别的加权平均。如果是二分类问题则选择参数'binary'
res['recall'] = round(recall_score(self.goldens, self.predictions, average='weighted'), round_num)
res['f1'] = round(f1_score(self.goldens, self.predictions, average='weighted'), round_num)
try:
conf_matrix = np.array(confusion_matrix(self.goldens, self.predictions)) # (n_class, n_class)
assert conf_matrix.shape[0] == len(classes)
for i in range(conf_matrix.shape[0]): # 构建每个class的指标
precision = 0 if sum(conf_matrix[:, i]) == 0 else conf_matrix[i, i] / sum(conf_matrix[:, i])
recall = 0 if sum(conf_matrix[i, :]) == 0 else conf_matrix[i, i] / sum(conf_matrix[i, :])
f1 = 0 if (precision + recall) == 0 else 2 * precision * recall / (precision + recall)
class_metrics[classes[i]] = {
'precision': round(precision, round_num),
'recall': round(recall, round_num),
'f1': round(f1, round_num)
}
res['class_metrics'] = class_metrics
except Exception as e:
print(f'[Warning] Something wrong when calculate class_metrics: {e}')
print(f'-> goldens: {set(self.goldens)}')
print(f'-> predictions: {set(self.predictions)}')
print(f'-> diff elements: {set(self.predictions) - set(self.goldens)}')
res['class_metrics'] = {}
return res
def reset(self):
"""
重置积累的数值。
"""
self.goldens = []
self.predictions = []
import os
import time
from transformers import AutoModelForMaskedLM, AutoTokenizer, get_scheduler
from pet_config import *
import sys
sys.path.append('/Users/ligang/PycharmProjects/llm/prompt_tasks/PET/data_handle')
sys.path.append('/Users/ligang/PycharmProjects/llm/prompt_tasks/PET/utils')
from utils.metirc_utils import ClassEvaluator
from utils.common_utils import *
from data_handle.data_loader import *
from utils.verbalizer import Verbalizer
from pet_config import *
pc = ProjectConfig()
定义model2train()函数
python复制代码
def model2train():
model = AutoModelForMaskedLM.from_pretrained(pc.pre_model)
tokenizer = AutoTokenizer.from_pretrained(pc.pre_model)
verbalizer = Verbalizer(verbalizer_file=pc.verbalizer,
tokenizer=tokenizer,
max_label_len=pc.max_label_len)
#对参数做权重衰减是为了使函数平滑,然而bias和layernorm的权重参数不影响函数的平滑性。
#他们起到的作用仅仅是缩放平移,因此不需要权重衰减
no_decay = ["bias", "LayerNorm.weight"]
optimizer_grouped_parameters = [
{
"params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
"weight_decay": pc.weight_decay,
},
{
"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
"weight_decay": 0.0,
},
]
optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=pc.learning_rate)
model.to(pc.device)
train_dataloader, dev_dataloader = get_data()
# 根据训练轮数计算最大训练步数,以便于scheduler动态调整lr
num_update_steps_per_epoch = len(train_dataloader)
#指定总的训练步数,它会被学习率调度器用来确定学习率的变化规律,确保学习率在整个训练过程中得以合理地调节
max_train_steps = pc.epochs * num_update_steps_per_epoch
warm_steps = int(pc.warmup_ratio * max_train_steps) # 预热阶段的训练步数
lr_scheduler = get_scheduler(
name='linear',
optimizer=optimizer,
num_warmup_steps=warm_steps,
num_training_steps=max_train_steps,
)
loss_list = []
tic_train = time.time()
metric = ClassEvaluator()
criterion = torch.nn.CrossEntropyLoss()
global_step, best_f1 = 0, 0
print('开始训练:')
for epoch in range(pc.epochs):
for batch in train_dataloader:
logits = model(input_ids=batch['input_ids'].to(pc.device),
token_type_ids=batch['token_type_ids'].to(pc.device),
attention_mask=batch['attention_mask'].to(pc.device)).logits
# print(f'模型训练得到的结果logits-->{logits.size()}')
# 真实标签
mask_labels = batch['mask_labels'].numpy().tolist()
sub_labels = verbalizer.batch_find_sub_labels(mask_labels)
sub_labels = [ele['token_ids'] for ele in sub_labels]
# print(f'sub_labels--->{sub_labels}')
loss = mlm_loss(logits,
batch['mask_positions'].to(pc.device),
sub_labels,
criterion,
pc.device,
1.0)
optimizer.zero_grad()
loss.backward()
optimizer.step()
lr_scheduler.step()
loss_list.append(float(loss.cpu().detach()))
# #
global_step += 1
if global_step % pc.logging_steps == 0:
time_diff = time.time() - tic_train
loss_avg = sum(loss_list) / len(loss_list)
print("global step %d, epoch: %d, loss: %.5f, speed: %.2f step/s"
% (global_step, epoch, loss_avg, pc.logging_steps / time_diff))
tic_train = time.time()
if global_step % pc.valid_steps == 0:
cur_save_dir = os.path.join(pc.save_dir, "model_%d" % global_step)
if not os.path.exists(cur_save_dir):
os.makedirs(cur_save_dir)
model.save_pretrained(os.path.join(cur_save_dir))
tokenizer.save_pretrained(os.path.join(cur_save_dir))
acc, precision, recall, f1, class_metrics = evaluate_model(model,
metric,
dev_dataloader,
tokenizer,
verbalizer)
print("Evaluation precision: %.5f, recall: %.5f, F1: %.5f" % (precision, recall, f1))
if f1 > best_f1:
print(
f"best F1 performence has been updated: {best_f1:.5f} --> {f1:.5f}"
)
print(f'Each Class Metrics are: {class_metrics}')
best_f1 = f1
cur_save_dir = os.path.join(pc.save_dir, "model_best")
if not os.path.exists(cur_save_dir):
os.makedirs(cur_save_dir)
model.save_pretrained(os.path.join(cur_save_dir))
tokenizer.save_pretrained(os.path.join(cur_save_dir))
tic_train = time.time()
print('训练结束')
定义evaluate_model()函数
python复制代码
def evaluate_model(model, metric, data_loader, tokenizer, verbalizer):
"""
在测试集上评估当前模型的训练效果。
Args:
model: 当前模型
metric: 评估指标类(metric)
data_loader: 测试集的dataloader
global_step: 当前训练步数
"""
model.eval()
metric.reset()
with torch.no_grad():
for step, batch in enumerate(data_loader):
logits = model(input_ids=batch['input_ids'].to(pc.device),
token_type_ids=batch['token_type_ids'].to(pc.device),
attention_mask=batch['attention_mask'].to(pc.device)).logits
mask_labels = batch['mask_labels'].numpy().tolist() # (batch, label_num)
for i in range(len(mask_labels)): # 去掉label中的[PAD] token
while tokenizer.pad_token_id in mask_labels[i]:
mask_labels[i].remove(tokenizer.pad_token_id)
# id转文字
mask_labels = [''.join(tokenizer.convert_ids_to_tokens(t)) for t in mask_labels]
# (batch, label_num)
predictions = convert_logits_to_ids(logits,
batch['mask_positions']).cpu().numpy().tolist()
# 找到子label属于的主label
predictions = verbalizer.batch_find_main_label(predictions)
predictions = [ele['label'] for ele in predictions]
metric.add_batch(pred_batch=predictions, gold_batch=mask_labels)
eval_metric = metric.compute()
model.train()
return eval_metric['accuracy'], eval_metric['precision'], \
eval_metric['recall'], eval_metric['f1'], \
eval_metric['class_metrics']
调用:
shell复制代码
cd /Users/**/PycharmProjects/llm/prompt_tasks/PET
# 实现模型训练
python train.py
import time
from typing import List
import torch
from rich import print
from transformers import AutoTokenizer, AutoModelForMaskedLM
import sys
sys.path.append('/Users/**/PycharmProjects/llm/prompt_tasks/PET/data_handle')
sys.path.append('/Users/**/PycharmProjects/llm/prompt_tasks/PET/utils')
from utils.verbalizer import Verbalizer
from data_handle.template import HardTemplate
from data_handle.data_preprocess import convert_example
from utils.common_utils import convert_logits_to_ids