専門ユニット2/山内研セミナー(2020/11/10)

関連サイトと資料

11.3 過学習と未学習

データセットの分割
import random
   
def split_data(data, prob):
    """Split data into fractions [prob, 1 - prob]"""
    data = data[:]
    random.shuffle(data)
    cut = int(len(data) * prob)
    return data[:cut], data[cut:]
   
data = [n for n in range(1000)]
train, test = split_data(data, 0.75)
    

データセットの分割(入力と出力がある場合)
def train_test_split(xs, ys, test_pct):
    # Generate the indices and split them.
    idxs = [i for i in range(len(xs))]
    train_idxs, test_idxs = split_data(idxs, 1 - test_pct)
   
    return ([xs[i] for i in train_idxs],  # x_train
            [xs[i] for i in test_idxs],   # x_test
            [ys[i] for i in train_idxs],  # y_train
            [ys[i] for i in test_idxs])   # y_test
   
xs = [x for x in range(1000)]
ys = [2 * x for x in xs]
   
x_train, x_test, y_train, y_test = train_test_split(xs, ys, 0.25)
    

11.4 正確さ

白血病検査法の正解率(accuracy)
def accuracy(tp, fp, fn, tn):
    correct = tp + tn
    total = tp + fp + fn + tn
    return correct / total
   
print(accuracy(70, 4930, 13930, 981070))
    

白血病検査法の適合率(precision)
def precision(tp, fp, fn, tn):
    return tp / (tp + fp)
  
print(precision(70, 4930, 13930, 981070))
    

白血病検査法の再現率(recall)
def recall(tp, fp, fn, tn):
    return tp / (tp + fn)
   
print(recall(70, 4930, 13930, 981070))
    

白血病検査法のF1値
def f1_score(tp, fp, fn, tn):
    p = precision(tp, fp, fn, tn)
    r = recall(tp, fp, fn, tn)
    return 2 * p * r / (p + r)
   
print(f1_score(70, 4930, 13930, 981070))