# model_new代表新的模型 # model_saved代表其他模型,比如用torch.load导入的已保存的模型 model_new_dict = model_new.state_dict() model_common_dict = {k:v for k, v in model_saved.items() if k in model_new_dict.keys()} model_new_dict.update(model_common_dict) model_new.load_state_dict(model_new_dict)
# Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model total_step = len(train_loader) for epoch inrange(num_epochs): for i ,(images, labels) inenumerate(train_loader): images = images.to(device) labels = labels.to(device)
# Forward pass outputs = model(images) loss = criterion(outputs, labels)
# Backward and optimizer optimizer.zero_grad() loss.backward() optimizer.step()
def_smooth_label(self, target, length, smooth_factor): """convert targets to one-hot format, and smooth them. Args: target: target in form with [label1, label2, label_batchsize] length: length of one-hot format(number of classes) smooth_factor: smooth factor for label smooth
Returns: smoothed labels in one hot format """ one_hot = self._one_hot(target, length, value=1 - smooth_factor) one_hot += smooth_factor / (length - 1)
return one_hot.to(target.device)
defforward(self, x, target):
if x.size(0) != target.size(0): raise ValueError('Expected input batchsize ({}) to match target batch_size({})' .format(x.size(0), target.size(0)))
if x.dim() < 2: raise ValueError('Expected input tensor to have least 2 dimensions(got {})' .format(x.size(0)))
if x.dim() != 2: raise ValueError('Only 2 dimension tensor are implemented, (got {})' .format(x.size()))
smoothed_target = self._smooth_label(target, x.size(1), self.e) x = self.log_softmax(x) loss = torch.sum(- x * smoothed_target, dim=1)
else: raise ValueError('unrecognized option, expect reduction to be one of none, mean, sum')
或者直接在训练文件里做 label smoothing
1 2 3 4 5 6 7 8 9 10 11 12 13
for images, labels in train_loader: images, labels = images.cuda(), labels.cuda() N = labels.size(0) # C is the number of classes. smoothed_labels = torch.full(size=(N, C), fill_value=0.1 / (C - 1)).cuda() smoothed_labels.scatter_(dim=1, index=torch.unsqueeze(labels, dim=1), value=0.9)
score = model(images) log_prob = torch.nn.functional.log_softmax(score, dim=1) loss = -torch.sum(log_prob * smoothed_labels) / N optimizer.zero_grad() loss.backward() optimizer.step()
Mixup 训练
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
beta_distribution = torch.distributions.beta.Beta(alpha, alpha) for images, labels in train_loader: images, labels = images.cuda(), labels.cuda()
defforward(self, x): with torch.no_grad(): conv_representation = [] for name, layer inself._model.named_children(): x = layer(x) if name inself._layers_to_extract: conv_representation.append(x) return conv_representation
微调全连接层
1 2 3 4 5 6 7 8 9 10 11 12
model = torchvision.models.resnet18(pretrained=True) for param in model.parameters(): param.requires_grad = False model.fc = nn.Linear(512, 100) # Replace the last fc layer optimizer = torch.optim.SGD(model.fc.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4) 以较大学习率微调全连接层,较小学习率微调卷积层 model = torchvision.models.resnet18(pretrained=True) finetuned_parameters = list(map(id, model.fc.parameters())) conv_parameters = (p for p in model.parameters() ifid(p) notin finetuned_parameters) parameters = [{'params': conv_parameters, 'lr': 1e-3}, {'params': model.fc.parameters()}] optimizer = torch.optim.SGD(parameters, lr=1e-2, momentum=0.9, weight_decay=1e-4)
以较大学习率微调全连接层,较小学习率微调卷积层
1 2 3 4 5 6
model = torchvision.models.resnet18(pretrained=True) finetuned_parameters = list(map(id, model.fc.parameters())) conv_parameters = (p for p in model.parameters() ifid(p) notin finetuned_parameters) parameters = [{'params': conv_parameters, 'lr': 1e-3}, {'params': model.fc.parameters()}] optimizer = torch.optim.SGD(parameters, lr=1e-2, momentum=0.9, weight_decay=1e-4)