山内セミナーⅠ(2022/07/20)
関連サイトと資料
DeepLabV3 ResNet50のPyTorch学習済みモデルからONNX形式への変換(ご参考)
import os
import cv2
import numpy as np
import torch
import torch.onnx
from torch.autograd import Variable
from torchvision import models
# proxy
os.environ["http_proxy"] = "http://ccproxyz.kanagawa-it.ac.jp:10080"
os.environ["https_proxy"] = "http://ccproxyz.kanagawa-it.ac.jp:10080"
def save_pytorch_onnx_model(original_model, onnx_model_path):
generated_input = Variable(
torch.randn(1, 3, 224, 224)
)
torch.onnx.export(
original_model,
generated_input,
onnx_model_path,
verbose=True,
input_names=["input"],
output_names=["output"],
opset_version=11
)
model = models.segmentation.deeplabv3_resnet50(pretrained=True)
save_path = 'deeplabv3_resnet50.onnx'
save_pytorch_onnx_model(model, save_path)
DeepLabV3 ResNet50によるセグメンテーション
import cv2
import numpy as np
import os
def read_classes(file):
classes = None
with open(file, mode='r', encoding="utf-8") as f:
classes = f.read().splitlines()
return classes
def get_colors(num):
colors = []
np.random.seed(0)
for i in range(num):
color = np.random.randint(0, 256, [3]).tolist()
colors.append(color)
return colors
def get_preprocessed_img(input_img):
input_img = input_img.astype(np.float32)
input_img = cv2.resize(input_img, (256, 256))
# define preprocess parameters
mean = np.array([0.485, 0.456, 0.406]) * 255.0
scale = 1 / 255.0
std = [0.229, 0.224, 0.225]
# prepare input blob to fit the model input:
# 1. subtract mean
# 2. scale to set pixel values from 0 to 1
input_blob = cv2.dnn.blobFromImage(
image=input_img,
scalefactor=scale,
size=(224, 224), # img target size
mean=mean,
swapRB=True, # BGR -> RGB
crop=False # center crop
)
# 3. divide by std
input_blob[0] /= np.asarray(std, dtype=np.float32).reshape(3, 1, 1)
return input_blob
def get_opencv_dnn_prediction(opencv_net, org_img, preproc_img, w0, h0, colors):
opencv_net.setInput(preproc_img)
out = opencv_net.forward()
_, _, h, w = out.shape
mask = np.zeros((h, w), dtype=np.uint8)
for y in range(h):
for x in range(w):
v = out[0, :, y, x]
mask[y, x] = np.argmax(v)
color_mask = np.array(colors, dtype=np.uint8)[mask]
color_mask = cv2.resize(color_mask, (w0, h0), cv2.INTER_NEAREST)
alpha = 0.5
beta = 1.0 - alpha
cv2.addWeighted(org_img, alpha, color_mask, beta, 0.0, org_img)
return org_img, color_mask
model_path = 'deeplabv3_resnet50.onnx'
opencv_net = cv2.dnn.readNetFromONNX(model_path)
# クラスリストを取得する
names = 'voc.names'
classes = read_classes(names)
colors = get_colors(len(classes))
colors[0] = (0, 0, 0)
target = 'bicycle.jpg'
base_name, _ = os.path.splitext(target)
save_seg_name = base_name + '_seg.png'
save_mask_name = base_name + '_mask.png'
org_img = cv2.imread(target, cv2.IMREAD_COLOR)
img = org_img.copy()
h0, w0 = org_img.shape[:2]
input_img = get_preprocessed_img(org_img)
seg_img, mask_img = get_opencv_dnn_prediction(opencv_net, img, input_img, w0, h0, colors)
cv2.imwrite(save_seg_name, seg_img)
cv2.imwrite(save_mask_name, mask_img)
入力画像
セグメンテーション画像
マスク画像
voc.names(学習対象のリスト)
background
aeroplane
bicycle
bird
boat
bottle
bus
car
cat
chair
cow
diningtable
dog
horse
motorbike
person
pottedplant
sheep
sofa
train
tvmonitor
DeepLabV3 ResNet50によるセグメンテーション(imagesフォルダ下の画像をすべて処理)
import cv2
import numpy as np
import os
import glob
def read_classes(file):
classes = None
with open(file, mode='r', encoding="utf-8") as f:
classes = f.read().splitlines()
return classes
def get_colors(num):
colors = []
np.random.seed(0)
for i in range(num):
color = np.random.randint(0, 256, [3]).tolist()
colors.append(color)
return colors
def get_preprocessed_img(input_img):
input_img = input_img.astype(np.float32)
input_img = cv2.resize(input_img, (256, 256))
# define preprocess parameters
mean = np.array([0.485, 0.456, 0.406]) * 255.0
scale = 1 / 255.0
std = [0.229, 0.224, 0.225]
# prepare input blob to fit the model input:
# 1. subtract mean
# 2. scale to set pixel values from 0 to 1
input_blob = cv2.dnn.blobFromImage(
image=input_img,
scalefactor=scale,
size=(224, 224), # img target size
mean=mean,
swapRB=True, # BGR -> RGB
crop=False # center crop
)
# 3. divide by std
input_blob[0] /= np.asarray(std, dtype=np.float32).reshape(3, 1, 1)
return input_blob
def get_opencv_dnn_prediction(opencv_net, org_img, preproc_img, w0, h0, colors):
opencv_net.setInput(preproc_img)
out = opencv_net.forward()
_, _, h, w = out.shape
mask = np.zeros((h, w), dtype=np.uint8)
for y in range(h):
for x in range(w):
v = out[0, :, y, x]
mask[y, x] = np.argmax(v)
color_mask = np.array(colors, dtype=np.uint8)[mask]
color_mask = cv2.resize(color_mask, (w0, h0), cv2.INTER_NEAREST)
alpha = 0.5
beta = 1.0 - alpha
cv2.addWeighted(org_img, alpha, color_mask, beta, 0.0, org_img)
return org_img, color_mask
model_path = 'deeplabv3_resnet50.onnx'
opencv_net = cv2.dnn.readNetFromONNX(model_path)
# クラスリストを取得する
names = 'voc.names'
classes = read_classes(names)
colors = get_colors(len(classes))
colors[0] = (0, 0, 0)
for path in glob.glob('images/*.*'):
f = os.path.basename(path)
print(f)
base_name, _ = os.path.splitext(f)
save_seg_name = 'results/' + base_name + '_seg.png'
save_mask_name = 'results/' + base_name + '_mask.png'
org_img = cv2.imread(path, cv2.IMREAD_COLOR)
img = org_img.copy()
h0, w0 = org_img.shape[:2]
input_img = get_preprocessed_img(org_img)
seg_img, mask_img = get_opencv_dnn_prediction(opencv_net, img, input_img, w0, h0, colors)
cv2.imwrite(save_seg_name, seg_img)
cv2.imwrite(save_mask_name, mask_img)