専門ユニット2/山内研セミナー(2020/11/03)

関連サイトと資料

使用する線形代数ライブラリ

linear_algebra.py
import math
   
def add(v, w):
    """Adds corresponding elements"""
    assert len(v) == len(w), "vectors must be the same length"
    return [v_i + w_i for v_i, w_i in zip(v, w)]
   
def subtract(v, w):
    """Subtracts corresponding elements"""
    assert len(v) == len(w), "vectors must be the same length"
    return [v_i - w_i for v_i, w_i in zip(v, w)]
  
def vector_sum(vectors):
    """Sums all corresponding elements"""
    # Check that vectors is not empty
    assert vectors, "no vectors provided!"
  
    # Check the vectors are all the same size
    num_elements = len(vectors[0])
    assert all(len(v) == num_elements for v in vectors), "different sizes!"
  
    # the i-th element of the result is the sum of every vector[i]
    return [sum(vector[i] for vector in vectors) for i in range(num_elements)]
  
def scalar_multiply(c, v):
    """Multiplies every element by c"""
    return [c * v_i for v_i in v]
  
def vector_mean(vectors):
    """Computes the element-wise average"""
    n = len(vectors)
    return scalar_multiply(1/n, vector_sum(vectors))
  
def dot(v, w):
    """Computes v_1 * w_1 + ... + v_n * w_n"""
    assert len(v) == len(w), "vectors must be same length"
  
    return sum(v_i * w_i for v_i, w_i in zip(v, w))
  
def sum_of_squares(v):
    """Returns v_1 * v_1 + ... + v_n * v_n"""
    return dot(v, v)
  
def magnitude(v):
    """Returns the magnitude (or length) of v"""
    return math.sqrt(sum_of_squares(v))
   
def squared_distance(v, w):
    """Computes (v_1 - w_1) ** 2 + ... + (v_n - w_n) ** 2"""
    return sum_of_squares(subtract(v, w))
   
def distance(v, w):
    """Computes the distance between v and w"""
    return math.sqrt(squared_distance(v, w))
   
def shape(A):
    """Returns (# of rows of A, # of columns of A)"""
    num_rows = len(A)
    num_cols = len(A[0]) if A else 0   # number of elements in first row
    return num_rows, num_cols
  
def get_row(A, i):
    """Returns the i-th row of A (as a Vector)"""
    return A[i]
  
def get_column(A, j):
    """Returns the j-th column of A (as a Vector)"""
    return [A_i[j] for A_i in A]
   
def make_matrix(num_rows, num_cols, entry_fn):
    """
    Returns a num_rows x num_cols matrix
    whose (i,j)-th entry is entry_fn(i, j)
    """
    return [[entry_fn(i, j) for j in range(num_cols)] for i in range(num_rows)]
   
def identity_matrix(n):
    """Returns the n x n identity matrix"""
    return make_matrix(n, n, lambda i, j: 1 if i == j else 0)
    

関数addの使用例
from linear_algebra import *
   
print(add([1, 2, 3], [4, 5, 6]))
    

関数subtractの使用例
from linear_algebra import *
   
print(subtract([5, 7, 9], [4, 5, 6]))
    

関数vector_sumの使用例
from linear_algebra import *
   
print(vector_sum([[1, 2], [3, 4], [5, 6], [7, 8]]))
    

関数scalar_multiplyの使用例
from linear_algebra import *
   
print(scalar_multiply(2, [1, 2, 3]))
    

関数vector_meanの使用例
from linear_algebra import *
   
print(vector_mean([[1, 2], [3, 4], [5, 6]]))
    

関数dotの使用例
from linear_algebra import *
   
print(dot([1, 2, 3], [4, 5, 6]))
    

関数sum_of_squaresの使用例
from linear_algebra import *
   
print(sum_of_squares([1, 2, 3]))
    

関数magnitudeの使用例
from linear_algebra import *
  
print(magnitude([3, 4]))
    

関数shapeの使用例
from linear_algebra import *
   
print(shape([[1, 2, 3], [4, 5, 6]]))
    

関数identity_matrixの使用例
from linear_algebra import *
   
print(identity_matrix(5))
    

8.2 勾配の評価

微分を差分で近似
%matplotlib inline
from linear_algebra import dot
import matplotlib.pyplot as plt
   
def sum_of_squares(v):
    """Computes the sum of squared elements in v"""
    return dot(v, v)
   
def square(x):
    return x * x
   
def derivative(x):
    return 2 * x
   
def difference_quotient(f, x, h):
    return (f(x + h) - f(x)) / h
   
xs = range(-10, 11)
actuals = [derivative(x) for x in xs]
estimates = [difference_quotient(square, x, h=0.001) for x in xs]
    
plt.title("Actual Derivatives vs. Estimates")
plt.plot(xs, actuals, 'rx', label='Actual')
plt.plot(xs, estimates, 'b+', label='Estimate')
plt.legend(loc=9)
    

8.3 勾配を利用する

関数sum_of_squaresの最小値を探索
import random
from linear_algebra import add, scalar_multiply
  
def gradient_step(v, gradient, step_size):
    """Moves `step_size` in the `gradient` direction from `v`"""
    assert len(v) == len(gradient)
    step = scalar_multiply(step_size, gradient)
    return add(v, step)
  
def sum_of_squares_gradient(v):
    return [2 * v_i for v_i in v]
   
v = [random.uniform(-10, 10) for i in range(3)]
   
for epoch in range(1000):
    grad = sum_of_squares_gradient(v)
    v = gradient_step(v, grad, -0.01)
    print(epoch, v)
    

8.5 勾配降下法を使用してモデルを適合させる

線形モデルy = 20x + 5のパラメータを推定する
import random
from linear_algebra import add, scalar_multiply, vector_mean
   
inputs = [(x, 20*x+5) for x in range(-50, 50)]
theta = [random.uniform(-1, 1), random.uniform(-1, 1)]
   
def gradient_step(v, gradient, step_size):
    """Moves `step_size` in the `gradient` direction from `v`"""
    assert len(v) == len(gradient)
    step = scalar_multiply(step_size, gradient)
    return add(v, step)
   
def linear_gradient(x, y, theta):
    slope, intercept = theta
    predicted = slope * x + intercept
    error = (predicted - y)
    squared_error = error ** 2
    grad = [2 * error * x, 2 * error]
    return grad
   
learning_rate = 0.001
   
for epoch in range(5000):
    grad = vector_mean([linear_gradient(x, y, theta) for x, y in inputs])
    theta = gradient_step(theta, grad, -learning_rate)
    print(epoch, theta)
    

8.6 ミニバッチと確率的勾配降下法

ミニバッチを用いて、線形モデルy = 20x + 5のパラメータを推定する
import random
from linear_algebra import add, scalar_multiply, vector_mean
   
inputs = [(x, 20*x+5) for x in range(-50, 50)]
theta = [random.uniform(-1, 1), random.uniform(-1, 1)]
   
def gradient_step(v, gradient, step_size):
    """Moves `step_size` in the `gradient` direction from `v`"""
    assert len(v) == len(gradient)
    step = scalar_multiply(step_size, gradient)
    return add(v, step)
   
def linear_gradient(x, y, theta):
    slope, intercept = theta
    predicted = slope * x + intercept
    error = (predicted - y)
    squared_error = error ** 2
    grad = [2 * error * x, 2 * error]
    return grad
   
def minibatches(dataset, batch_size, shuffle = True):
    """Generates `batch_size`-sized minibatches from the dataset"""
    # Start indexes 0, batch_size, 2 * batch_size, ...
    batch_starts = [start for start in range(0, len(dataset), batch_size)]
   
    if shuffle: 
        random.shuffle(batch_starts)
   
    for start in batch_starts:
        end = start + batch_size
        yield dataset[start:end]
   
learning_rate = 0.001
   
for epoch in range(1000):
    for batch in minibatches(inputs, batch_size=20):
        grad = vector_mean([linear_gradient(x, y, theta) for x, y in batch])
        theta = gradient_step(theta, grad, -learning_rate)
    print(epoch, theta)