範例 1:PyTorch 的淺度機器學習範例(近似 sk-learn 的 MLPClassifier)以 AT&T 人臉影像辨識為例。
注意:剛認識 PyTorch,必須懂得或學習利用範例程式來了解 PyTorch 的細節。這包括仔細的 trace 每行程式碼,必要時列印出內容或單獨測試,也可以使用 Debug 模式。
Load data and prepare for Torch
import torch
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
df = pd.read_csv('data/face_data.csv')
n_persons = df['target'].nunique()
X = np.array(df.drop('target', axis=1)) # 400 x 4096
y = np.array(df['target'])
test_size = 0.3
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size) # deafult test_size=0.25
# prepare data for PyTorch Tensor
X_train = torch.from_numpy(X_train).float() # convert to float tensor
y_train = torch.from_numpy(y_train).float() #
train_dataset = TensorDataset(X_train, y_train) # create your datset
X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test).float()
test_dataset = TensorDataset(X_test, y_test) # create your datset
# create dataloader for PyTorch
batch_size = 64 # 32, 64, 128, 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # convert to dataloader
test_loader = DataLoader(test_dataset, batch_size=len(X_test), shuffle=False)
Set up NN Model
import torch.nn as nn
import torch.nn.functional as F
# select device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = "cpu" # run faster than cuda in some cases
print("Using {} device".format(device))
# Create a neural network
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
self.mlp = nn.Sequential(
nn.Linear(64*64, 512), # image length 64x64=4096, fully connected layer
nn.ReLU(), # try to take ReLU out to see what happen
# nn.Linear(512, 512), # second hidden layer
# nn.ReLU(),
nn.Linear(512, 40) # 10 classes, fully connected layer
# nn.Softmax()
)
# Specify how data will pass through this model
def forward(self, x):
# out = self.mlp(x)
# Apply softmax to x here~
x = self.mlp(x)
out = F.log_softmax(x, dim=1) # it’s faster and has better numerical propertie than softmax
# out = F.softmax(x, dim=1)
return out
# define model, optimizer, loss function
model = MLP().to(device) # start an instance
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # default lreaning rate=1e-3
loss_fun = nn.CrossEntropyLoss() # define loss function
print(model)
Start training
from tqdm import tqdm
epochs = 50 # Repeat the whole dataset epochs times
model.train() # Sets the module in training mode. The training model allow the parameters to be updated during backpropagation.
for epoch in range(epochs):
# for epoch in tqdm(range(epochs)):
trainAcc = 0
samples = 0
losses = []
for batch_num, input_data in enumerate(train_loader):
# for batch_num, input_data in tqdm(enumerate(train_loader), total=len(train_loader)):
x, y = input_data
x = x.to(device).float()
y = y.to(device)
# perform training based on the backpropagation
y_pre = model(x) # predict y
loss = loss_fun(y_pre, y.long()) # the loss function nn.CrossEntropyLoss()
losses.append(loss.item())
optimizer.zero_grad() # Zeros the gradients accumulated from the previous batch/step of the model
loss.backward() # Performs backpropagation and calculates the gradients
optimizer.step() # Updates the weights in our neural network based on the results of backpropagation
# Record the training accuracy for each batch
trainAcc += (y_pre.argmax(dim=1) == y).sum().item() # comparison
samples += y.size(0)
if batch_num % 4 == 0:
print('\tEpoch %d | Batch %d | Loss %6.2f' % (epoch, batch_num, loss.item()))
print('Epoch %d | Loss %6.2f | train accuracy %.4f' % (epoch, sum(losses)/len(losses), trainAcc/samples))
print('Finished ... Loss %7.4f | train accuracy %.4f' % (sum(losses)/len(losses), trainAcc/samples))
Testing (1) : Compute test accuracy by batch
model.eval()
testAcc = 0
samples = 0
with torch.no_grad():
for x, y_truth in test_loader:
x = x.to(device).float()
y_truth = y_truth.to(device)
y_pre = model(x).argmax(dim=1) # the predictions for the batch
testAcc += (y_pre == y_truth).sum().item() # comparison
samples += y_truth.size(0)
print('Test Accuracy:{:.3f}'.format(testAcc/samples))
Testing (2): Compute the test accuracy and record the result for each test data
import csv
# use eval() in conjunction with a torch.no_grad() context,
# meaning that gradient computation is turned off in evaluation mode
model.eval()
testAcc = 0
samples = 0
with open('mlp_att.csv', 'w') as f:
fieldnames = ['ImageId', 'Label', 'Ground_Truth']
writer = csv.DictWriter(f, fieldnames=fieldnames, lineterminator = '\n')
writer.writeheader()
image_id = 1
with torch.no_grad():
for x, y_truth in test_loader:
x = x.to(device).float()
y_truth = y_truth.to(device).long()
yIdx = 0
y_pre = model(x).argmax(dim=1) # the predictions for the batch
testAcc += (y_pre == y_truth).sum().item() # comparison
samples += y_truth.size(0)
for y in y_pre:
writer.writerow({fieldnames[0]: image_id,fieldnames[1]: y.item(), fieldnames[2]: y_truth[yIdx].item()})
image_id += 1
yIdx += 1
print('Test Accuracy:{:.3f}'.format(testAcc/samples))
練習:調整上述範例的 NN model 的設定,包括神經層的數量、神經元的個數、激發函數的選擇、演算法的選擇 … 等。
練習:同上範例,但將人臉影像資料改為 Yale Face。
練習:同上範例,但將人臉影像資料改為手寫數字。
範例 2:PyTorch 的深度機器學習 CNN 範例:以手寫數字辨識為例。程式部分參考官網範例:
https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
Load and prepare data: 注意讀入的資料轉換為 Tensor 型態的過程。
from scipy.io import loadmat
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
mnist = loadmat("data/mnist-original.mat")
X = mnist["data"].T # 70000 x 784
y = mnist["label"][0]
test_size = 0.2
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size) # deafult test_size=0.25
# standaredize (may not be necessary)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)
# convert to tensors
X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train).long()
X_train = X_train.reshape(-1, 1, 28, 28) # convert to N x 1 x 28 x 28 for CNN
X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test).long()
X_test = X_test.reshape(-1, 1, 28, 28) # N x 1 x 28 x 28
# check shapes of data
print("X_train.shape:", X_train.shape)
print("y_train.shape:", y_train.shape)
print("X_test.shape:", X_test.shape)
print("y_test.shape:", y_test.shape)
# create dataloaders
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
Define the network: 注意 convolution 採 zero-padding=0,即沒有 zero-padding,因此最後轉線性函數時,須留意計算經過幾層 convolution 及 max-pooling 後的影像矩陣大小。
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 5) # 1 input channel, 6 output channels, 5x5 square convolution
self.conv2 = nn.Conv2d(6, 16, 5) # 6 input channel, 16 output channels, 5x5 square convolution
# an affine operation: y = Wx + b
self.fc1 = nn.Linear(16 * 4 * 4, 120) # 4*4 from image dimension
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10) # 10 output classes
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # Max pooling over a (2, 2) window
# If the size is a square, you can specify with a single number
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = torch.flatten(x, 1) # flatten all dimensions except the batch dimension
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))
net = Net().to(device)
print(net)
觀察將進入訓練的影像資料,並試做 network 是否設計有誤
import matplotlib.pyplot as plt
import numpy as np
import torchvision
input = train_dataset[0][0].reshape(1, 1, 28, 28)
# 觀察 network 輸出結果
out = net(input.to(device))
print(out.to("cpu").detach().numpy().shape)
print(out.data)
print(out.data.max(1, keepdim=True)[1])
print(out.to("cpu").detach().numpy())
# get some random training images
dataiter = iter(train_loader) # an iterable object
images, labels = next(dataiter) # get the next batch
print(images.shape)
# show a single image
a_image = images[0]
a_image_ = np.transpose(a_image, (1, 2, 0))
plt.imshow(a_image_, cmap='gray')
plt.show()
# show a grid of images
montage = torchvision.utils.make_grid(images, nrow=16) # make a grid of images with 16 images per row
plt.imshow(np.transpose(montage, (1, 2, 0)), cmap='gray')
plt.show()
Define a Loss function and optimizer: use a Classification Cross-Entropy loss and SGD with momentum
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
# optimizer = optim.Adam(net2.parameters(), lr = 0.01)
Train the network
epochs = 15
for epoch in range(epochs): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data[0].to(device), data[1].to(device) # move to GPU if available
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs) # forward pass
loss = criterion(outputs, labels) # compute loss
# writer.add_scalar("Loss/train", loss, epoch)
loss.backward() # compute gradients
optimizer.step() # update weights
# print statistics
running_loss += loss.item()
if i % 200 == 199: # print every 200 mini-batches
print(f'[epoch : {epoch + 1}, batch: {i + 1:5d}] loss: {running_loss / 200:.3f}')
running_loss = 0.0
print('Finished Training')
save the trained model for later use: 做為再次訓練的起點或訓練完成後的使用狀態
PATH = './digit10_net.pt' # .pth will collide with the PyTorch JIT
torch.save(net.state_dict(), PATH)
Test the network on the test data
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
for data in test_loader:
images, labels = data[0].to(device), data[1].to(device)
# calculate outputs by running images through the network
outputs = net(images)
# the class with the highest energy is what we choose as prediction
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
Test on a random batch: 觀察部分測試的實際情況
import torchvision
import matplotlib.pyplot as plt
import numpy as np
sample_idx = torch.randint(len(test_loader), size=(1,)).item()
dataiter = iter(test_loader)
for i in range(sample_idx): # randomly select a batch
images, labels = next(dataiter)
# print test images
nrow = 16 # number of images per row
montage = torchvision.utils.make_grid(images, nrow=nrow)
plt.imshow(np.transpose(montage, (1, 2, 0)), cmap='gray')
plt.show()
# predict the labels
outputs = net(images.to(device))
_, predicted = torch.max(outputs, 1)
print("The predictions:\n", {predicted.reshape(batch_size // nrow, nrow)})
total = labels.size(0)
correct_rate = (predicted == labels.to(device)).sum().item() / total
print(f'Accuracy of the network on the 64 test images: {100 * correct_rate} %')
採用第二種 network 結構 Net2:留意 convolution 的 zero-padding=2,造成後續影像矩陣大小的變化。另外,network 的輸出有兩項,第二項作為其他用途。
class Net2(nn.Module):
def __init__(self):
super(Net2, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
# fully connected layer, output 10 classes
self.out = nn.Linear(32 * 7 * 7, 10) # 32 * 7 * 7 is the size of the output of conv2
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
# flatten the output of conv2 to (batch_size, 32 * 7 * 7)
x = x.view(x.size(0), -1) # another way to flatten
output = self.out(x)
return output, x # return x for visualization
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))
net2 = Net2().to(device)
print(net2)
Set up Loss function and optimizer
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(net2.parameters(), lr = 0.01)
Train the network
num_epochs = 15
def train(num_epochs, cnn, loaders):
cnn.train()
# Train the model
total_step = len(loaders)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(loaders):
b_x = images.to(device) # batch x
b_y = labels.to(device) # batch y
output = cnn(b_x)[0] # cnn output: get the first element of the returned tuple
loss = loss_func(output, b_y)
# clear gradients for this training step
optimizer.zero_grad()
# backpropagation, compute gradients
loss.backward()
# apply gradients
optimizer.step()
if (i+1) % 200 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
pass
pass
pass
train(num_epochs, net2, train_loader)
Evaluate the model on test data
def test():
# Test the model
net2.eval()
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
test_output, last_layer = net2(images.to(device))
pred_y = torch.max(test_output, 1)[1]
correct += (pred_y == labels.to(device)).sum().item()
total += labels.size(0)
# accuracy = (pred_y == labels.to(device)).sum().item() / float(labels.size(0))
pass
accuracy = correct / total
print('Test Accuracy of the model on the 10000 test images: %.2f' % accuracy)
pass
test()
Print 10 predictions from test data
sample = next(iter(test_loader))
imgs, lbls = sample
ground_truth = lbls[:10].numpy()
print("Ground truth:\n", ground_truth)
test_output, last_layer = net2(imgs[:10].to(device))
pred_y = torch.max(test_output, 1)[1]
print("Prediction:\n", pred_y.cpu().numpy())
練習:同上範例,將影像辨識資料改為 Yale Faces。CNN network 結構自訂。
練習:同上範例,資料改為手寫英文字母,共 26 個字母,3,700,000 張,每張 28×28。下載處:
https://www.kaggle.com/datasets/ashishguptajiit/handwritten-az