PyTorch (4) 簡單 ConvNet
這篇其實是 [1] 和 [2] 的整理,如果不想看英文的人就聽我喇賽吧。
1. lib dependencies:
常用的 libs:
import torch import torchvision import torchvision.transforms as transforms from torch import nn, optim import matplotlib.pyplot as plt import numpy as np from time import time
2. 定義神經網路:
我們的神經網路寫得比較簡單,繼承自 nn.Module 並包含下列方法:
- __init__: 初始化屬性
- forward: 定義神經元的傳播運算
- load_data: 回傳測試集的 tensor 和 label
class Net(nn.Module):
def __init__(self, dset="MNIST"):
super(Net, self).__init__()
self.dset = dset
if self.dset == "CIFAR10": # input image size [3, 32, 32]
self.features = nn.Sequential(
nn.Conv2d(3, 6, 5),
nn.MaxPool2d(2, 2),
nn.Conv2d(6, 16, 5),
nn.MaxPool2d(2, 2),
)
self.classifier = nn.Sequential(
nn.Linear(16 * 5 * 5, 120),
nn.ReLU(),
nn.Linear(120, 84),
nn.ReLU(),
nn.Linear(84, 10)
)
elif self.dset == "MNIST": # input image size [1, 28, 28]
self.features = nn.Sequential(
nn.Conv2d(1, 6, 5),
nn.MaxPool2d(2, 2),
nn.Conv2d(6, 16, 5),
nn.MaxPool2d(2, 2),
)
self.classifier = nn.Sequential(
nn.Linear(16 * 4 * 4, 120),
nn.ReLU(),
nn.Linear(120, 84),
nn.ReLU(),
nn.Linear(84, 10)
)
我們使用 CIFAR10 和 MNIST 兩種資料測試集,兩種測試集的圖像的維度不相同 (CIFAR10 是 [3, 32, 32],MNIST 是 [1, 28, 28])。 如 [1] 我們定義 CIFAR10 資料集的神經網路由特徵抽取層 (features) 和歸類層(classifier)組成,這些神經元都是使用 nn 的 functions。
解釋一下各層的參數,對 convolution 和 maxpool 不熟的讀者可以參考小弟的這篇網誌 http://cooperbear2.blogspot.com/2018/04/tensorflow-4-1-cnn-basics.html:
- nn.Conv2d(3, 6, 5): 3, 6, 5 是 input channels, output channels, mask width/size
- nn.MaxPool2d(2, 2): 2, 2 是 mask size
- nn.Linear(16 * 5 * 5, 120): 最後的 feature map 是 [16, 5, 5] 轉成 120 個輸出
forward 定義了收到 tensor 時的運算規則,基本上就是先找特徵再將之歸類,view() 會把 feature map 壓扁成一維的資料:
def forward(self, x):
x = self.features(x)
if self.dset == "CIFAR10":
x = x.view(-1, 16 * 5 * 5)
elif self.dset == "MNIST":
x = x.view(-1, 16 * 4 * 4)
x = self.classifier(x)
return x
接下來要準備練和測試用的 data,transforms.Compose() 定義了一系列對 PIL image 的操作。我們將 PIL image 轉成 tensor 並正歸化 (normalize) 和去中心化 (中心放在原點)。 資料來源是使用 torchvision 提供的資料集,torch.utils.data.DataLoader() 會回傳資料的 iterator。
def load_data(self, batch_size=10):
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
if self.dset == "CIFAR10":
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
trainset = torchvision.datasets.CIFAR10(root='../../data/CIFAR10', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='../../data/CIFAR10', train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
shuffle=True, num_workers=2)
elif self.dset == "MNIST":
classes = ('0', '1', '2', '3',
'4', '5', '6', '7', '8', '9')
trainset = torchvision.datasets.MNIST(root='../../data/MNIST', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
shuffle=True, num_workers=2)
testset = torchvision.datasets.MNIST(root='../../data/MNIST', train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
shuffle=True, num_workers=2)
return trainloader, testloader, classes
3. 主程式運行訓練:
接下來就剩下把東西拿去訓練,我們使用 optim 來控制參數的修正。optim.Adam 接收的參數可以用 list of dict 的方式設定各層的學習率,通常各層的學習率不是一致的。
if __name__ == "__main__":
device = "cuda:0" if torch.cuda.is_available() else "cpu"
net = Net("MNIST").to(device)
# net = Net("CIFAR10").to(device)
trainloader, testloader, classes = net.load_data()
criterion = nn.CrossEntropyLoss()
# [start-20180905-cooper-mod]#
# optimizer = optim.Adam(net.parameters(), lr=0.001)
optimizer = optim.Adam([
{"params": net.features.parameters(), "lr": 1e-2},
{"params": net.classifier.parameters()}], lr=1e-3)
# [end-20180905-cooper-mod]#
EPOCHS = 2
LOSS = []
start = time()
for epoch in range(EPOCHS): # loop over the dataset multiple times
running_loss = 0
for i, data in enumerate(trainloader):
inputs, labels = data
inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 1000 == 999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
LOSS.append(running_loss)
running_loss = 0.0
end = time()
print("time consumption: ", end-start)
plt.plot(LOSS)
plt.show()
4. 簡單測試:
最後進行一個簡單但非正統的測試,我們抽取一些圖片並確認他是否是我們想的那個答案:
def imshow(img):
img = img / 2 + 0.5
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
dataiter = iter(testloader)
images, labels = dataiter.next()
del dataiter
# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(10)))
images = images.cuda()
labels = labels.cuda()
outputs = net(images)
_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
for j in range(10)))
plt.show()
Reference:
留言
張貼留言