専門ユニット2/山内研セミナー(2020/11/17)

関連サイトと資料

使用する線形代数ライブラリ

linear_algebra.py
import math
   
def add(v, w):
    """Adds corresponding elements"""
    assert len(v) == len(w), "vectors must be the same length"
    return [v_i + w_i for v_i, w_i in zip(v, w)]
   
def subtract(v, w):
    """Subtracts corresponding elements"""
    assert len(v) == len(w), "vectors must be the same length"
    return [v_i - w_i for v_i, w_i in zip(v, w)]
  
def vector_sum(vectors):
    """Sums all corresponding elements"""
    # Check that vectors is not empty
    assert vectors, "no vectors provided!"
  
    # Check the vectors are all the same size
    num_elements = len(vectors[0])
    assert all(len(v) == num_elements for v in vectors), "different sizes!"
  
    # the i-th element of the result is the sum of every vector[i]
    return [sum(vector[i] for vector in vectors) for i in range(num_elements)]
  
def scalar_multiply(c, v):
    """Multiplies every element by c"""
    return [c * v_i for v_i in v]
  
def vector_mean(vectors):
    """Computes the element-wise average"""
    n = len(vectors)
    return scalar_multiply(1/n, vector_sum(vectors))
  
def dot(v, w):
    """Computes v_1 * w_1 + ... + v_n * w_n"""
    assert len(v) == len(w), "vectors must be same length"
  
    return sum(v_i * w_i for v_i, w_i in zip(v, w))
  
def sum_of_squares(v):
    """Returns v_1 * v_1 + ... + v_n * v_n"""
    return dot(v, v)
  
def magnitude(v):
    """Returns the magnitude (or length) of v"""
    return math.sqrt(sum_of_squares(v))
   
def squared_distance(v, w):
    """Computes (v_1 - w_1) ** 2 + ... + (v_n - w_n) ** 2"""
    return sum_of_squares(subtract(v, w))
   
def distance(v, w):
    """Computes the distance between v and w"""
    return math.sqrt(squared_distance(v, w))
   
def shape(A):
    """Returns (# of rows of A, # of columns of A)"""
    num_rows = len(A)
    num_cols = len(A[0]) if A else 0   # number of elements in first row
    return num_rows, num_cols
  
def get_row(A, i):
    """Returns the i-th row of A (as a Vector)"""
    return A[i]
  
def get_column(A, j):
    """Returns the j-th column of A (as a Vector)"""
    return [A_i[j] for A_i in A]
   
def make_matrix(num_rows, num_cols, entry_fn):
    """
    Returns a num_rows x num_cols matrix
    whose (i,j)-th entry is entry_fn(i, j)
    """
    return [[entry_fn(i, j) for j in range(num_cols)] for i in range(num_rows)]
   
def identity_matrix(n):
    """Returns the n x n identity matrix"""
    return make_matrix(n, n, lambda i, j: 1 if i == j else 0)
    

使用する勾配降下法ライブラリ

gradient_descent.py
from linear_algebra import add, scalar_multiply
   
def gradient_step(v, gradient, step_size):
    """Moves `step_size` in the `gradient` direction from `v`"""
    assert len(v) == len(gradient)
    step = scalar_multiply(step_size, gradient)
    return add(v, step)
    

18.1 パーセプトロン

パーセプトロンの実装
from linear_algebra import dot
   
def step_function(x):
    return 1.0 if x >= 0 else 0.0
  
def perceptron_output(weights, bias, x):
    """Returns 1 if the perceptron 'fires', 0 if not"""
    calculation = dot(weights, x) + bias
    return step_function(calculation)
    

パーセプトロンによるANDゲート
and_weights = [2., 2]
and_bias = -3.
   
inputs = [[i, j] for i in range(2) for j in range(2)]
   
for pair_input in inputs:
    v = perceptron_output(and_weights, and_bias, pair_input)
    print('input={0} output={1}'.format(pair_input, v))
    

パーセプトロンによるORゲート
or_weights = [2., 2]
or_bias = -1.
   
inputs = [[i, j] for i in range(2) for j in range(2)]
   
for pair_input in inputs:
    v = perceptron_output(or_weights, or_bias, pair_input)
    print('input={0} output={1}'.format(pair_input, v))
    

パーセプトロンによるNOTゲート
not_weights = [-2.]
not_bias = 1.
   
single_input = [[i] for i in range(2)]
   
for i1 in single_input:
    v = perceptron_output(not_weights, not_bias, i1)
    print('input={0} output={1}'.format(i1, v))
    

18.2 フィードフォワードニューラルネットワーク

フィードフォワードニューラルネットワークの実装
import math
from linear_algebra import dot
   
def sigmoid(t):
    return 1 / (1 + math.exp(-t))
   
def neuron_output(weights, inputs):
    # weights includes the bias term, inputs includes a 1
    return sigmoid(dot(weights, inputs))
   
def feed_forward(neural_network, input_vector):
    """
    Feeds the input vector through the neural network.
    Returns the outputs of all layers (not just the last one).
    """
    outputs = []
  
    for layer in neural_network:
        input_with_bias = input_vector + [1]
        output = [neuron_output(neuron, input_with_bias) for neuron in layer]
        outputs.append(output)
   
        # Then the input to the next layer is the output of this one
        input_vector = output
   
    return outputs
    

フィードフォワードニューラルネットワークによるXORゲート
xor_network = [# hidden layer
      [[20., 20, -30],      # 'and' neuron
       [20., 20, -10]],     # 'or'  neuron
      # output layer
      [[-60., 60, -30]]]
   
inputs = [[i, j] for i in range(2) for j in range(2)]
  
for pair_input in inputs:
    v = feed_forward(xor_network, pair_input)[-1][0]
    print('input={0} output={1}'.format(pair_input, v))
    

18.3 逆伝搬誤差法

勾配を計算
def sqerror_gradients(network, input_vector, target_vector):
    """
    Given a neural network, an input vector, and a target vector,
    make a prediction and compute the gradient of the squared error
    loss with respect to the neuron weights.
    """
    # forward pass
    hidden_outputs, outputs = feed_forward(network, input_vector)
   
    # gradients with respect to output neuron pre-activation outputs
    output_deltas = [output * (1 - output) * (output - target) for output, target in zip(outputs, target_vector)]
   
    # gradients with respect to output neuron weights
    output_grads = [[output_deltas[i] * hidden_output for hidden_output in hidden_outputs + [1]] for i, output_neuron in enumerate(network[-1])]
   
    # gradients with respect to hidden neuron pre-activation outputs
    hidden_deltas = [hidden_output * (1 - hidden_output) * dot(output_deltas, [n[i] for n in network[-1]]) for i, hidden_output in enumerate(hidden_outputs)]
   
    # gradients with respect to hidden neuron weights
    hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]] for i, hidden_neuron in enumerate(network[0])]
    
    return [hidden_grads, output_grads]
    

XORゲートを学習
from gradient_descent import gradient_step
import random
import tqdm
   
random.seed(0)
    
# training data
xs = [[0., 0], [0., 1], [1., 0], [1., 1]]
ys = [[0.], [1.], [1.], [0.]]
    
# start with random weights
network = [ # hidden layer: 2 inputs -> 2 outputs
                [[random.random() for _ in range(2 + 1)],   # 1st hidden neuron
                 [random.random() for _ in range(2 + 1)]],  # 2nd hidden neuron
                # output layer: 2 inputs -> 1 output
                [[random.random() for _ in range(2 + 1)]]   # 1st output neuron
        ]
    
learning_rate = 1.0
    
for epoch in tqdm.trange(20000, desc="neural net for xor"):
    for x, y in zip(xs, ys):
        gradients = sqerror_gradients(network, x, y)
    
        # Take a gradient step for each neuron in each layer
        network = [[gradient_step(neuron, grad, -learning_rate) for neuron, grad in zip(layer, layer_grad)] for layer, layer_grad in zip(network, gradients)]
   
print()
   
inputs = [[i, j] for i in range(2) for j in range(2)]
   
for pair_input in inputs:
    v = feed_forward(xor_network, pair_input)[-1][0]
    print('input={0} output={1}'.format(pair_input, v))
   
print(network)
    

18.4 事例:Fizz Buzz

実装(1)
def fizz_buzz_encode(x):
    if x % 15 == 0:
        return [0, 0, 0, 1]
    elif x % 5 == 0:
        return [0, 0, 1, 0]
    elif x % 3 == 0:
        return [0, 1, 0, 0]
    else:
        return [1, 0, 0, 0]
  
def binary_encode(x):
    binary = []
   
    for i in range(10):
        binary.append(x % 2)
        x = x // 2
  
    return binary
    

実装(2)
import math
from linear_algebra import dot
   
def sigmoid(t):
    return 1 / (1 + math.exp(-t))
   
def neuron_output(weights, inputs):
    # weights includes the bias term, inputs includes a 1
    return sigmoid(dot(weights, inputs))
   
def feed_forward(neural_network, input_vector):
    """
    Feeds the input vector through the neural network.
    Returns the outputs of all layers (not just the last one).
    """
    outputs = []
  
    for layer in neural_network:
        input_with_bias = input_vector + [1]
        output = [neuron_output(neuron, input_with_bias) for neuron in layer]
        outputs.append(output)

        # Then the input to the next layer is the output of this one
        input_vector = output
  
    return outputs
    

実装(3)
def sqerror_gradients(network, input_vector, target_vector):
    """
    Given a neural network, an input vector, and a target vector,
    make a prediction and compute the gradient of the squared error
    loss with respect to the neuron weights.
    """
    # forward pass
    hidden_outputs, outputs = feed_forward(network, input_vector)
  
    # gradients with respect to output neuron pre-activation outputs
    output_deltas = [output * (1 - output) * (output - target) for output, target in zip(outputs, target_vector)]
  
    # gradients with respect to output neuron weights
    output_grads = [[output_deltas[i] * hidden_output for hidden_output in hidden_outputs + [1]] for i, output_neuron in enumerate(network[-1])]
  
    # gradients with respect to hidden neuron pre-activation outputs
    hidden_deltas = [hidden_output * (1 - hidden_output) * dot(output_deltas, [n[i] for n in network[-1]]) for i, hidden_output in enumerate(hidden_outputs)]
  
    # gradients with respect to hidden neuron weights
    hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]] for i, hidden_neuron in enumerate(network[0])]
  
    return [hidden_grads, output_grads]
    

実装(4)
import random
import tqdm
from linear_algebra import squared_distance
from gradient_descent import gradient_step
   
xs = [binary_encode(n) for n in range(101, 1024)]
ys = [fizz_buzz_encode(n) for n in range(101, 1024)]
   
NUM_HIDDEN = 25
    
network = [
        # hidden layer: 10 inputs -> NUM_HIDDEN outputs
        [[random.random() for _ in range(10 + 1)] for _ in range(NUM_HIDDEN)],
    
        # output_layer: NUM_HIDDEN inputs -> 4 outputs
        [[random.random() for _ in range(NUM_HIDDEN + 1)] for _ in range(4)]
    ]
  
learning_rate = 1.0
    
with tqdm.trange(500) as t:
    for epoch in t:
        epoch_loss = 0.0
    
        for x, y in zip(xs, ys):
            predicted = feed_forward(network, x)[-1]
            epoch_loss += squared_distance(predicted, y)
            gradients = sqerror_gradients(network, x, y)
    
            # Take a gradient step for each neuron in each layer
            network = [[gradient_step(neuron, grad, -learning_rate) for neuron, grad in zip(layer, layer_grad)] for layer, layer_grad in zip(network, gradients)]
    
        t.set_description(f"fizz buzz (loss: {epoch_loss:.2f})")
    

実装(5)
def argmax(xs):
    """Returns the index of the largest value"""
    return max(range(len(xs)), key=lambda i: xs[i])
  
num_correct = 0
for n in range(1, 101):
    x = binary_encode(n)
    predicted = argmax(feed_forward(network, x)[-1])
    actual = argmax(fizz_buzz_encode(n))
    labels = [str(n), "fizz", "buzz", "fizzbuzz"]
    print(n, labels[predicted], labels[actual])
    
    if predicted == actual:
        num_correct += 1
    
print(num_correct, "/", 100)