山内セミナーⅠ(2022/07/13)

関連サイトと資料

efficientnetのPyTorch学習済みモデルからONNX形式への変換(ご参考)

import os
import cv2
import numpy as np
import torch
import torch.onnx
from torch.autograd import Variable
from torchvision import models
  
# proxy
#os.environ["http_proxy"] = "http://ccproxyz.kanagawa-it.ac.jp:10080"
#os.environ["https_proxy"] = "http://ccproxyz.kanagawa-it.ac.jp:10080"
    

def save_pytorch_onnx_model(original_model, onnx_model_path):
    generated_input = Variable(
        torch.randn(1, 3, 224, 224)
    )
  
    torch.onnx.export(
        original_model,
        generated_input,
        onnx_model_path,
        verbose=True,
        input_names=["input"],
        output_names=["output"],
        opset_version=11
    )
    

original_model = models.efficientnet_b7(pretrained=True)
  
save_path = 'efficientnet-b7.onnx'
save_pytorch_onnx_model(original_model, save_path)
    

efficientnetによるクラス分類

import cv2
import numpy as np
import matplotlib.pyplot as plt
    

# ファイルからクラスの名前のリストを読み込む関数
def read_classes(file):
    classes = None
    with open(file, mode='r', encoding="utf-8") as f:
        classes = f.read().splitlines()
    return classes
  
# クラスリストを取得する
names = 'imagenet.names'
classes = read_classes(names)
    

def get_preprocessed_img(img_path):
    # read the image
    input_img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    input_img = input_img.astype(np.float32)
  
    input_img = cv2.resize(input_img, (256, 256))
  
    # define preprocess parameters
    mean = np.array([0.485, 0.456, 0.406]) * 255.0
    scale = 1 / 255.0
    std = [0.229, 0.224, 0.225]
  
    # prepare input blob to fit the model input:
    # 1. subtract mean
    # 2. scale to set pixel values from 0 to 1
    input_blob = cv2.dnn.blobFromImage(
        image=input_img,
        scalefactor=scale,
        size=(224, 224),  # img target size
        mean=mean,
        swapRB=True,  # BGR -> RGB
        crop=True  # center crop
    )
    # 3. divide by std
    input_blob[0] /= np.asarray(std, dtype=np.float32).reshape(3, 1, 1)
    return input_blob
    

def get_opencv_dnn_prediction(opencv_net, preproc_img, imagenet_labels):
    # set OpenCV DNN input
    opencv_net.setInput(preproc_img)
  
    # OpenCV DNN inference
    out = opencv_net.forward()
    print("OpenCV DNN prediction: \n")
    print("* shape: ", out.shape)
  
    # get the predicted class ID
    imagenet_class_id = np.argmax(out)
  
    # get confidence
    confidence = out[0][imagenet_class_id]
    print("* class ID: {}, label: {}".format(imagenet_class_id, imagenet_labels[imagenet_class_id]))
    print("* confidence: {:.4f}".format(confidence))
    

model_path = 'efficientnet-b7.onnx'
opencv_net = cv2.dnn.readNetFromONNX(model_path)
  
input_img = get_preprocessed_img('yorkie.jpg')
  
get_opencv_dnn_prediction(opencv_net, input_img, classes)
    

yorkie.jpg

yorkie2.jpg

yorkie3.jpg

panda.jpg

mobilenetv3-largeのPyTorch学習済みモデルからONNX形式への変換(ご参考)

import os
import cv2
import numpy as np
import torch
import torch.onnx
from torch.autograd import Variable
from torchvision import models
  
# proxy
os.environ["http_proxy"] = "http://ccproxyz.kanagawa-it.ac.jp:10080"
os.environ["https_proxy"] = "http://ccproxyz.kanagawa-it.ac.jp:10080"
    

def save_pytorch_onnx_model(original_model, onnx_model_path):
    generated_input = Variable(
        torch.randn(1, 3, 224, 224)
    )
  
    torch.onnx.export(
        original_model,
        generated_input,
        onnx_model_path,
        verbose=True,
        input_names=["input"],
        output_names=["output"],
        opset_version=11
    )
    

original_model = models.mobilenet_v3_large(pretrained=True)
  
save_path = 'models/mobilenetv3-large.onnx'
save_pytorch_onnx_model(original_model, save_path)
    

mobilenetv3-largeによるクラス分類

import cv2
import numpy as np
import matplotlib.pyplot as plt
    

# ファイルからクラスの名前のリストを読み込む関数
def read_classes(file):
    classes = None
    with open(file, mode='r', encoding="utf-8") as f:
        classes = f.read().splitlines()
    return classes
  
# クラスリストを取得する
names = 'imagenet.names'
classes = read_classes(names)
    

def get_preprocessed_img(img_path):
    # read the image
    input_img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    input_img = input_img.astype(np.float32)
  
    input_img = cv2.resize(input_img, (256, 256))
  
    # define preprocess parameters
    mean = np.array([0.485, 0.456, 0.406]) * 255.0
    scale = 1 / 255.0
    std = [0.229, 0.224, 0.225]
  
    # prepare input blob to fit the model input:
    # 1. subtract mean
    # 2. scale to set pixel values from 0 to 1
    input_blob = cv2.dnn.blobFromImage(
        image=input_img,
        scalefactor=scale,
        size=(224, 224),  # img target size
        mean=mean,
        swapRB=True,  # BGR -> RGB
        crop=True  # center crop
    )
    # 3. divide by std
    input_blob[0] /= np.asarray(std, dtype=np.float32).reshape(3, 1, 1)
    return input_blob
    

def get_opencv_dnn_prediction(opencv_net, preproc_img, imagenet_labels):
    # set OpenCV DNN input
    opencv_net.setInput(preproc_img)
  
    # OpenCV DNN inference
    out = opencv_net.forward()
    print("OpenCV DNN prediction: \n")
    print("* shape: ", out.shape)
  
    # get the predicted class ID
    imagenet_class_id = np.argmax(out)
  
    # get confidence
    confidence = out[0][imagenet_class_id]
    print("* class ID: {}, label: {}".format(imagenet_class_id, imagenet_labels[imagenet_class_id]))
    print("* confidence: {:.4f}".format(confidence))
    

model_path = 'mobilenetv3-large.onnx'
opencv_net = cv2.dnn.readNetFromONNX(model_path)
  
input_img = get_preprocessed_img('yorkie.jpg')
  
get_opencv_dnn_prediction(opencv_net, input_img, classes)
    

mobilenetv3-largeによるカメラ映像に対するリアルタイム・クラス分類

import cv2
import numpy as np
  
def get_preprocessed_img(input_img):
    # read the image
    org_img = input_img.copy()
  
    input_img = input_img.astype(np.float32)
    input_img = cv2.resize(input_img, (256, 256))
  
    # define preprocess parameters
    mean = np.array([0.485, 0.456, 0.406]) * 255.0
    scale = 1 / 255.0
    std = [0.229, 0.224, 0.225]
  
    # prepare input blob to fit the model input:
    # 1. subtract mean
    # 2. scale to set pixel values from 0 to 1
    input_blob = cv2.dnn.blobFromImage(
        image=input_img,
        scalefactor=scale,
        size=(224, 224),  # img target size
        mean=mean,
        swapRB=True,  # BGR -> RGB
        crop=True  # center crop
    )
    # 3. divide by std
    input_blob[0] /= np.asarray(std, dtype=np.float32).reshape(3, 1, 1)
    return input_blob, org_img
  
# ファイルからクラスの名前のリストを読み込む関数
def read_classes(file):
    classes = None
    with open(file, mode='r', encoding="utf-8") as f:
        classes = f.read().splitlines()
    return classes
  
def get_opencv_dnn_prediction(opencv_net, preproc_img, imagenet_labels):
    # set OpenCV DNN input
    opencv_net.setInput(preproc_img)
  
    # OpenCV DNN inference
    out = opencv_net.forward()
  
    # get the predicted class ID
    imagenet_class_id = np.argmax(out)
  
    # get confidence
    confidence = out[0][imagenet_class_id]
  
    result = f'{imagenet_labels[imagenet_class_id]} ({confidence:.3f})'
    return result, confidence
    

# クラスリストを取得する
names = 'imagenet.names'
classes = read_classes(names)
  
model_path = 'models/mobilenetv3-large.onnx'
model = cv2.dnn.readNetFromONNX(model_path)
  
capture = cv2.VideoCapture(0)
while True:
  # フレームをキャプチャして画像を読み込む
  result, frame = capture.read()
  
  h, w = frame.shape[:2]
  input_img, org_img = get_preprocessed_img(frame)
  
  s, conf = get_opencv_dnn_prediction(model, input_img, classes)
  if conf >= 10:
    point = (10, 15)
    font = cv2.FONT_HERSHEY_SIMPLEX
    scale = 0.5
    color = (0, 0, 255)
    thickness = 1
    cv2.rectangle(org_img, (0, 0), (w, 20), color=(255, 255, 255), thickness=-1)
    cv2.putText(org_img, s, point, font, scale, color, thickness, cv2.LINE_AA)
  
  cv2.imshow("classfication", org_img)
  key = cv2.waitKey(10)
    if key == ord('q'):
      break
    
cv2.destroyAllWindows()