山内セミナーⅠ(2022/07/20)

関連サイトと資料

DeepLabV3 ResNet50のPyTorch学習済みモデルからONNX形式への変換(ご参考)

import os
import cv2
import numpy as np
import torch
import torch.onnx
from torch.autograd import Variable
from torchvision import models
  
# proxy
os.environ["http_proxy"] = "http://ccproxyz.kanagawa-it.ac.jp:10080"
os.environ["https_proxy"] = "http://ccproxyz.kanagawa-it.ac.jp:10080"
    

def save_pytorch_onnx_model(original_model, onnx_model_path):
    generated_input = Variable(
        torch.randn(1, 3, 224, 224)
    )
  
    torch.onnx.export(
        original_model,
        generated_input,
        onnx_model_path,
        verbose=True,
        input_names=["input"],
        output_names=["output"],
        opset_version=11
    )
    

model = models.segmentation.deeplabv3_resnet50(pretrained=True)
  
save_path = 'deeplabv3_resnet50.onnx'
save_pytorch_onnx_model(model, save_path)
    

DeepLabV3 ResNet50によるセグメンテーション

import cv2
import numpy as np
import os
    

def read_classes(file):
    classes = None
    with open(file, mode='r', encoding="utf-8") as f:
        classes = f.read().splitlines()
    return classes
  
def get_colors(num):
    colors = []
    np.random.seed(0)
    for i in range(num):
        color = np.random.randint(0, 256, [3]).tolist()
        colors.append(color)
    return colors
  
def get_preprocessed_img(input_img):    
    input_img = input_img.astype(np.float32)
    input_img = cv2.resize(input_img, (256, 256))
    
    # define preprocess parameters
    mean = np.array([0.485, 0.456, 0.406]) * 255.0
    scale = 1 / 255.0
    std = [0.229, 0.224, 0.225]
  
    # prepare input blob to fit the model input:
    # 1. subtract mean
    # 2. scale to set pixel values from 0 to 1
    input_blob = cv2.dnn.blobFromImage(
        image=input_img,
        scalefactor=scale,
        size=(224, 224),  # img target size
        mean=mean,
        swapRB=True,  # BGR -> RGB
        crop=False  # center crop
    )
    # 3. divide by std
    input_blob[0] /= np.asarray(std, dtype=np.float32).reshape(3, 1, 1)
    return input_blob
  
def get_opencv_dnn_prediction(opencv_net, org_img, preproc_img, w0, h0, colors):
    opencv_net.setInput(preproc_img)
    out = opencv_net.forward()
  
    _, _, h, w = out.shape
    mask = np.zeros((h, w), dtype=np.uint8)
    for y in range(h):
        for x in range(w):
            v = out[0, :, y, x]
            mask[y, x] = np.argmax(v)
    
    color_mask = np.array(colors, dtype=np.uint8)[mask]
    color_mask = cv2.resize(color_mask, (w0, h0), cv2.INTER_NEAREST)
    alpha = 0.5
    beta = 1.0 - alpha
    cv2.addWeighted(org_img, alpha, color_mask, beta, 0.0, org_img)
    return org_img, color_mask
    

model_path = 'deeplabv3_resnet50.onnx'
opencv_net = cv2.dnn.readNetFromONNX(model_path)
  
# クラスリストを取得する
names = 'voc.names'
classes = read_classes(names)
colors = get_colors(len(classes))
colors[0] = (0, 0, 0)
  
target = 'bicycle.jpg'
base_name, _ = os.path.splitext(target)
save_seg_name = base_name + '_seg.png'
save_mask_name = base_name + '_mask.png'
org_img = cv2.imread(target, cv2.IMREAD_COLOR)
img = org_img.copy()
h0, w0 = org_img.shape[:2]
  
input_img = get_preprocessed_img(org_img)
seg_img, mask_img = get_opencv_dnn_prediction(opencv_net, img, input_img, w0, h0, colors)
  
cv2.imwrite(save_seg_name, seg_img)
cv2.imwrite(save_mask_name, mask_img)
    

入力画像

セグメンテーション画像

マスク画像

voc.names(学習対象のリスト)
background
aeroplane
bicycle
bird
boat
bottle
bus
car
cat
chair
cow
diningtable
dog
horse
motorbike
person
pottedplant
sheep
sofa
train
tvmonitor
    

DeepLabV3 ResNet50によるセグメンテーション(imagesフォルダ下の画像をすべて処理)

import cv2
import numpy as np
import os
import glob
    

def read_classes(file):
    classes = None
    with open(file, mode='r', encoding="utf-8") as f:
        classes = f.read().splitlines()
    return classes
  
def get_colors(num):
    colors = []
    np.random.seed(0)
    for i in range(num):
        color = np.random.randint(0, 256, [3]).tolist()
        colors.append(color)
    return colors
  
def get_preprocessed_img(input_img):    
    input_img = input_img.astype(np.float32)
    input_img = cv2.resize(input_img, (256, 256))
    
    # define preprocess parameters
    mean = np.array([0.485, 0.456, 0.406]) * 255.0
    scale = 1 / 255.0
    std = [0.229, 0.224, 0.225]
  
    # prepare input blob to fit the model input:
    # 1. subtract mean
    # 2. scale to set pixel values from 0 to 1
    input_blob = cv2.dnn.blobFromImage(
        image=input_img,
        scalefactor=scale,
        size=(224, 224),  # img target size
        mean=mean,
        swapRB=True,  # BGR -> RGB
        crop=False  # center crop
    )
    # 3. divide by std
    input_blob[0] /= np.asarray(std, dtype=np.float32).reshape(3, 1, 1)
    return input_blob
  
def get_opencv_dnn_prediction(opencv_net, org_img, preproc_img, w0, h0, colors):
    opencv_net.setInput(preproc_img)
    out = opencv_net.forward()
  
    _, _, h, w = out.shape
    mask = np.zeros((h, w), dtype=np.uint8)
    for y in range(h):
        for x in range(w):
            v = out[0, :, y, x]
            mask[y, x] = np.argmax(v)
    
    color_mask = np.array(colors, dtype=np.uint8)[mask]
    color_mask = cv2.resize(color_mask, (w0, h0), cv2.INTER_NEAREST)
    alpha = 0.5
    beta = 1.0 - alpha
    cv2.addWeighted(org_img, alpha, color_mask, beta, 0.0, org_img)
    return org_img, color_mask
    

model_path = 'deeplabv3_resnet50.onnx'
opencv_net = cv2.dnn.readNetFromONNX(model_path)
  
# クラスリストを取得する
names = 'voc.names'
classes = read_classes(names)
colors = get_colors(len(classes))
colors[0] = (0, 0, 0)
  
for path in glob.glob('images/*.*'):
    f = os.path.basename(path)
    print(f)
    base_name, _ = os.path.splitext(f)
    save_seg_name = 'results/' + base_name + '_seg.png'
    save_mask_name = 'results/' + base_name + '_mask.png'
    org_img = cv2.imread(path, cv2.IMREAD_COLOR)
    img = org_img.copy()
    h0, w0 = org_img.shape[:2]
  
    input_img = get_preprocessed_img(org_img)
    seg_img, mask_img = get_opencv_dnn_prediction(opencv_net, img, input_img, w0, h0, colors)
  
    cv2.imwrite(save_seg_name, seg_img)
    cv2.imwrite(save_mask_name, mask_img)