山内セミナーⅠ(2020/07/29)work

Loading and processing data

Loading a dataset

from torchvision import datasets
from torchvision import utils
import torch
import numpy as np
import matplotlib.pyplot as plt
  
def show(img):
    # convert tensor to numpy array
    npimg = img.numpy()
    # Convert to H*W*C shape
    npimg_tr = np.transpose(npimg, (1,2,0))
    plt.imshow(npimg_tr, interpolation='nearest')
    plt.show()
  
# path to store data and/or load from
path2data = './data'
   
# loading training data
train_data = datasets.MNIST(path2data, train=True, download=True)
   
# extract data and targets
x_train, y_train = train_data.data, train_data.targets
print(x_train.shape)
print(y_train.shape)
   
# loading validation data
val_data = datasets.MNIST(path2data, train=False, download=True)
   
# extract data and targets
x_val, y_val = val_data.data, val_data.targets
print(x_val.shape)
print(y_val.shape)
  
# add a dimension to tensor to become B*C*H*W
if len(x_train.shape) == 3:
    x_train = x_train.unsqueeze(1)
print(x_train.shape)
  
if len(x_val.shape) == 3:
    x_val = x_val.unsqueeze(1)
print(x_val.shape)
   
# make a grid of 40 images, 8 images per row
x_grid = utils.make_grid(x_train[:40], nrow=8, padding=2)
print(x_grid.shape)
  
# call helper function
show(x_grid)
    

Data transformation

from torchvision import transforms
from torchvision import datasets
import matplotlib.pyplot as plt
   
# path to store data and/or load from
path2data = './data'
   
# loading MNIST training dataset
train_data=datasets.MNIST(path2data, train=True, download=True)
   
# define transformations
data_transform = transforms.Compose([transforms.RandomHorizontalFlip(p=1), transforms.RandomVerticalFlip(p=1), transforms.ToTensor(),])
   
# get a sample image from training dataset
img = train_data[0][0]
   
# transform sample image
img_tr=data_transform(img)
   
# convert tensor to numpy array
img_tr_np=img_tr.numpy()
   
# show original and transformed images
plt.subplot(1,2,1)
plt.imshow(img,cmap="gray")
plt.title("original")
plt.subplot(1,2,2)
plt.imshow(img_tr_np[0],cmap="gray");
plt.title("transformed")
plt.show()
    

Wrapping tensors into a dataset

from torch.utils.data import TensorDataset
from torchvision import datasets
   
# path to store data and/or load from
path2data = './data'
   
# loading MNIST training dataset
train_data=datasets.MNIST(path2data, train=True, download=True)
   
# extract data and targets
x_train, y_train = train_data.data, train_data.targets
   
# loading validation data
val_data = datasets.MNIST(path2data, train=False, download=True)
   
# extract data and targets
x_val, y_val = val_data.data, val_data.targets
  
# wrap tensors into a dataset
train_ds = TensorDataset(x_train, y_train)
val_ds = TensorDataset(x_val, y_val)
   
for x,y in train_ds:
    print(x.shape, y.item())
    break
    

Creating data loaders

from torch.utils.data import TensorDataset
from torchvision import datasets
from torch.utils.data import DataLoader
   
# path to store data and/or load from
path2data = './data'
   
# loading MNIST training dataset
train_data=datasets.MNIST(path2data, train=True, download=True)
   
# extract data and targets
x_train, y_train = train_data.data, train_data.targets
    
# loading validation data
val_data = datasets.MNIST(path2data, train=False, download=True)
   
# extract data and targets
x_val, y_val = val_data.data, val_data.targets
   
# wrap tensors into a dataset
train_ds = TensorDataset(x_train, y_train)
val_ds = TensorDataset(x_val, y_val)
    
# create a data loader from dataset
train_dl = DataLoader(train_ds, batch_size=8)
val_dl = DataLoader(val_ds, batch_size=8)
   
# iterate over batches
for xb,yb in train_dl:
    print(xb.shape)
    print(yb.shape)
    break