From 65b3db36462edfb91a2ea247df024976f024c6d1 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 21:03:13 +0300 Subject: [PATCH 01/29] Refactor main function and modularize codebase - Refactored the main function for improved readability and maintainability. - Extracted benchmarking, prediction, and utility functions into separate modules. - Created dedicated modules for ONNX, OpenVINO, and CUDA operations to enhance code organization. - Simplified CUDA configurations and added detailed comments for clarity. --- README.md | 10 +- benchmark/__init__.py | 0 benchmark/benchmark_models.py | 18 ++ benchmark/benchmark_utils.py | 121 +++++++++++ common/__init__.py | 0 common/utils.py | 54 +++++ prediction/__init__.py | 0 prediction/prediction_models.py | 19 ++ prediction/prediction_utils.py | 79 +++++++ src/benchmark.py | 3 +- src/main.py | 359 ++++---------------------------- 11 files changed, 342 insertions(+), 321 deletions(-) create mode 100644 benchmark/__init__.py create mode 100644 benchmark/benchmark_models.py create mode 100644 benchmark/benchmark_utils.py create mode 100644 common/__init__.py create mode 100644 common/utils.py create mode 100644 prediction/__init__.py create mode 100644 prediction/prediction_models.py create mode 100644 prediction/prediction_utils.py diff --git a/README.md b/README.md index bb10f69..2be52b3 100644 --- a/README.md +++ b/README.md @@ -6,20 +6,20 @@ 2. [Requirements](#requirements) - [Steps to Run](#steps-to-run) - [Example Command](#example-command) -5. [RESULTS](#results) ![Static Badge](https://img.shields.io/badge/update-orange) +3. [RESULTS](#results) ![Static Badge](https://img.shields.io/badge/update-orange) - [Results explanation](#results-explanation) - [Example Input](#example-input) -6. [Benchmark Implementation Details](#benchmark-implementation-details) ![New](https://img.shields.io/badge/-New-842E5B) +4. [Benchmark Implementation Details](#benchmark-implementation-details) ![New](https://img.shields.io/badge/-New-842E5B) - [PyTorch CPU & CUDA](#pytorch-cpu--cuda) - [TensorRT FP32 & FP16](#tensorrt-fp32--fp16) - [ONNX](#onnx) - [OpenVINO](#openvino) -7. [Used methodologies](#used-methodologies) ![New](https://img.shields.io/badge/-New-96E5FE) +5. [Used methodologies](#used-methodologies) ![New](https://img.shields.io/badge/-New-96E5FE) - [TensorRT Optimization](#tensorrt-optimization) - [ONNX Exporter](#onnx-exporter) - [OV Exporter](#ov-exporter) -10. [Author](#author) -11. [References](#references) +6. [Author](#author) +7. [References](#references) diff --git a/benchmark/__init__.py b/benchmark/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmark/benchmark_models.py b/benchmark/benchmark_models.py new file mode 100644 index 0000000..71b791e --- /dev/null +++ b/benchmark/benchmark_models.py @@ -0,0 +1,18 @@ +from benchmark.benchmark_utils import run_benchmark +from src.benchmark import PyTorchBenchmark, ONNXBenchmark, OVBenchmark +import openvino as ov +import torch +import onnxruntime as ort + + +def benchmark_onnx_model(ort_session: ort.InferenceSession): + run_benchmark(None, None, None, ort_session, onnx=True) + + +def benchmark_ov_model(ov_model: ov.CompiledModel): + ov_benchmark = OVBenchmark(ov_model, input_shape=(1, 3, 224, 224)) + ov_benchmark.run() + + +def benchmark_cuda_model(cuda_model: torch.nn.Module, device: str, dtype: torch.dtype): + run_benchmark(cuda_model, device, dtype) diff --git a/benchmark/benchmark_utils.py b/benchmark/benchmark_utils.py new file mode 100644 index 0000000..74ae67f --- /dev/null +++ b/benchmark/benchmark_utils.py @@ -0,0 +1,121 @@ +import logging + +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +from typing import Dict, Any +import torch +import onnxruntime as ort + +from src.benchmark import PyTorchBenchmark, ONNXBenchmark, OVBenchmark + + +def run_benchmark( + model: torch.nn.Module, + device: str, + dtype: torch.dtype, + ort_session: ort.InferenceSession = None, + onnx: bool = False, +) -> None: + """ + Run and log the benchmark for the given model, device, and dtype. + + :param onnx: + :param ort_session: + :param model: The model to be benchmarked. + :param device: The device to run the benchmark on ("cpu" or "cuda"). + :param dtype: The data type to be used in the benchmark (typically torch.float32 or torch.float16). + """ + if onnx: + logging.info(f"Running Benchmark for ONNX") + benchmark = ONNXBenchmark(ort_session, input_shape=(32, 3, 224, 224)) + else: + logging.info(f"Running Benchmark for {device.upper()}") + benchmark = PyTorchBenchmark(model, device=device, dtype=dtype) + benchmark.run() + + +def run_all_benchmarks( + models: Dict[str, Any], img_batch: np.ndarray +) -> Dict[str, float]: + """ + Run benchmarks for all models and return a dictionary of average inference times. + + :param models: Dictionary of models. Key is model type ("onnx", "ov", "pytorch", "trt_fp32", "trt_fp16"), value is the model. + :param img_batch: The batch of images to run the benchmark on. + :return: Dictionary of average inference times. Key is model type, value is average inference time. + """ + results = {} + + # ONNX benchmark + onnx_benchmark = ONNXBenchmark(models["onnx"], img_batch.shape) + avg_time_onnx = onnx_benchmark.run() + results["ONNX"] = avg_time_onnx + + # OpenVINO benchmark + ov_benchmark = OVBenchmark(models["ov"], img_batch.shape) + avg_time_ov = ov_benchmark.run() + results["OpenVINO"] = avg_time_ov + + # PyTorch + TRT benchmark + configs = [ + ("cpu", torch.float32, False), + ("cuda", torch.float32, False), + ("cuda", torch.float32, True), + ("cuda", torch.float16, True), + ] + for device, precision, is_trt in configs: + model_to_use = models["pytorch"].to(device) + + if not is_trt: + pytorch_benchmark = PyTorchBenchmark( + model_to_use, device=device, dtype=precision + ) + avg_time_pytorch = pytorch_benchmark.run() + results[f"PyTorch_{device}"] = avg_time_pytorch + + else: + # TensorRT benchmarks + if precision == torch.float32 or precision == torch.float16: + mode = "fp32" if precision == torch.float32 else "fp16" + trt_benchmark = PyTorchBenchmark( + models[f"trt_{mode}"], device=device, dtype=precision + ) + avg_time_trt = trt_benchmark.run() + results[f"TRT_{mode}"] = avg_time_trt + + return results + + +def plot_benchmark_results(results: Dict[str, float]): + """ + Plot the benchmark results using Seaborn. + + :param results: Dictionary of average inference times. Key is model type, value is average inference time. + """ + # Convert dictionary to two lists for plotting + models = list(results.keys()) + times = list(results.values()) + + # Create a DataFrame for plotting + data = pd.DataFrame({"Model": models, "Time": times}) + + # Sort the DataFrame by Time + data = data.sort_values("Time", ascending=True) + + # Plot + plt.figure(figsize=(10, 6)) + ax = sns.barplot(x=data["Time"], y=data["Model"], palette="rocket") + + # Adding the actual values on the bars + for index, value in enumerate(data["Time"]): + ax.text(value, index, f"{value:.2f} ms", color="black", ha="left", va="center") + + plt.xlabel("Average Inference Time (ms)") + plt.ylabel("Model Type") + plt.title("ResNet50 - Inference Benchmark Results") + + # Save the plot to a file + plt.savefig("./inference/plot.png", bbox_inches="tight") + plt.show() \ No newline at end of file diff --git a/common/__init__.py b/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/common/utils.py b/common/utils.py new file mode 100644 index 0000000..a190468 --- /dev/null +++ b/common/utils.py @@ -0,0 +1,54 @@ +import argparse +import openvino as ov +import torch +from model import ModelLoader +from onnx_exporter import ONNXExporter +from ov_exporter import OVExporter +import onnxruntime as ort + + +# Model Initialization Functions +def init_onnx_model(onnx_path: str, model_loader: ModelLoader, device: torch.device) -> ort.InferenceSession: + onnx_exporter = ONNXExporter(model_loader.model, device, onnx_path) + onnx_exporter.export_model() + return ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"]) + + +def init_ov_model(onnx_path: str) -> ov.CompiledModel: + ov_exporter = OVExporter(onnx_path) + return ov_exporter.export_model() + + +def init_cuda_model(model_loader: ModelLoader, device: torch.device, dtype: torch.dtype) -> torch.nn.Module: + cuda_model = model_loader.model.to(device) + if device == "cuda": + cuda_model = torch.jit.trace(cuda_model, [torch.randn((1, 3, 224, 224)).to(device)]) + return cuda_model + + +def parse_arguments(): + # Initialize ArgumentParser with description + parser = argparse.ArgumentParser(description="PyTorch Inference") + parser.add_argument( + "--image_path", + type=str, + default="./inference/cat3.jpg", + help="Path to the image to predict", + ) + parser.add_argument( + "--topk", type=int, default=5, help="Number of top predictions to show" + ) + parser.add_argument( + "--onnx_path", + type=str, + default="./inference/model.onnx", + help="Path where model in ONNX format will be exported", + ) + parser.add_argument( + "--mode", + choices=["onnx", "ov", "cuda", "all"], + required=True, + help="Mode for exporting and running the model. Choices are: onnx, ov, cuda or all.", + ) + + return parser.parse_args() diff --git a/prediction/__init__.py b/prediction/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/prediction/prediction_models.py b/prediction/prediction_models.py new file mode 100644 index 0000000..b02b0be --- /dev/null +++ b/prediction/prediction_models.py @@ -0,0 +1,19 @@ +import onnxruntime as ort +import openvino as ov +import numpy as np +import torch +from typing import List +from prediction.prediction_utils import make_prediction + + +# Prediction Functions +def predict_onnx_model(ort_session: ort.InferenceSession, img_batch: np.ndarray, topk: int, categories: List[str]): + make_prediction(ort_session, img_batch.cpu().numpy(), topk, categories) + + +def predict_ov_model(ov_model: ov.CompiledModel, img_batch: np.ndarray, topk: int, categories: List[str]): + make_prediction(ov_model, img_batch.cpu().numpy(), topk, categories) + + +def predict_cuda_model(cuda_model: torch.nn.Module, img_batch: torch.Tensor, topk: int, categories: List[str], precision: torch.dtype): + make_prediction(cuda_model, img_batch, topk, categories, precision) \ No newline at end of file diff --git a/prediction/prediction_utils.py b/prediction/prediction_utils.py new file mode 100644 index 0000000..44dddfd --- /dev/null +++ b/prediction/prediction_utils.py @@ -0,0 +1,79 @@ +import logging +from typing import List, Tuple, Union, Dict, Any +import openvino as ov +import torch +import onnxruntime as ort +import numpy as np + + +def make_prediction( + model: Union[torch.nn.Module, ort.InferenceSession, ov.CompiledModel], + img_batch: Union[torch.Tensor, np.ndarray], + topk: int, + categories: List[str], + precision: torch.dtype = None, +) -> None: + """ + Make and print predictions for the given model, img_batch, topk, and categories. + + :param model: The model (or ONNX Runtime InferenceSession) to make predictions with. + :param img_batch: The batch of images to make predictions on. + :param topk: The number of top predictions to show. + :param categories: The list of categories to label the predictions. + :param precision: The data type to be used for the predictions (typically torch.float32 or torch.float16) for PyTorch models. + """ + is_onnx_model = isinstance(model, ort.InferenceSession) + is_ov_model = isinstance(model, ov.CompiledModel) + + if is_onnx_model: + # Get the input name for the ONNX model. + input_name = model.get_inputs()[0].name + + # Run the model with the properly named input. + ort_inputs = {input_name: img_batch} + ort_outs = model.run(None, ort_inputs) + + # Assuming the model returns a list with one array of class probabilities. + if len(ort_outs) > 0: + prob = ort_outs[0] + + # Checking if prob has more than one dimension and selecting the right one. + if prob.ndim > 1: + prob = prob[0] + + # Apply Softmax to get probabilities + prob = np.exp(prob) / np.sum(np.exp(prob)) + elif is_ov_model: + # For OV, the input name is usually the first input + input_name = next(iter(model.inputs)) + outputs = model(inputs={input_name: img_batch}) + + # Assuming the model returns a dictionary with one key for class probabilities + prob_key = next(iter(outputs)) + prob = outputs[prob_key] + + # Apply Softmax to get probabilities + prob = np.exp(prob[0]) / np.sum(np.exp(prob[0])) + + else: # PyTorch Model + if isinstance(img_batch, np.ndarray): + img_batch = torch.tensor(img_batch) + else: + img_batch = img_batch.clone().to(precision) + model.eval() + with torch.no_grad(): + outputs = model(img_batch.to(precision)) + prob = torch.nn.functional.softmax(outputs[0], dim=0) + prob = prob.cpu().numpy() + + top_indices = prob.argsort()[-topk:][::-1] + top_probs = prob[top_indices] + + for i in range(topk): + probability = top_probs[i] + if is_onnx_model: + # Accessing the DataFrame by row number using .iloc[] + class_label = categories.iloc[top_indices[i]].item() + else: + class_label = categories[0][int(top_indices[i])] + logging.info(f"#{i + 1}: {int(probability * 100)}% {class_label}") \ No newline at end of file diff --git a/src/benchmark.py b/src/benchmark.py index cf24764..9e866ae 100644 --- a/src/benchmark.py +++ b/src/benchmark.py @@ -115,7 +115,6 @@ def __init__( self.nwarmup = nwarmup self.nruns = nruns - def run(self): print("Warming up ...") # Adjusting the batch size in the input shape to match the expected input size of the model. @@ -196,4 +195,4 @@ def run(self): avg_time = total_time / self.num_runs logging.info(f"Average inference time: {avg_time * 1000:.2f} ms") - return avg_time * 1000 \ No newline at end of file + return avg_time * 1000 diff --git a/src/main.py b/src/main.py index aed87bf..9ea0ac4 100644 --- a/src/main.py +++ b/src/main.py @@ -1,344 +1,75 @@ -import argparse -import os -import logging -import pandas as pd -import openvino as ov -import torch -import torch_tensorrt -from typing import List, Tuple, Union, Dict, Any -import onnxruntime as ort -import numpy as np -import seaborn as sns -import matplotlib.pyplot as plt - -from model import ModelLoader +from benchmark.benchmark_models import * +from benchmark.benchmark_utils import * +from common.utils import * from image_processor import ImageProcessor -from benchmark import PyTorchBenchmark, ONNXBenchmark, OVBenchmark -from onnx_exporter import ONNXExporter -from ov_exporter import OVExporter +from prediction.prediction_models import * +from src.model import ModelLoader # Configure logging logging.basicConfig(filename="model.log", level=logging.INFO) -def run_benchmark( - model: torch.nn.Module, - device: str, - dtype: torch.dtype, - ort_session: ort.InferenceSession = None, - onnx: bool = False, -) -> None: - """ - Run and log the benchmark for the given model, device, and dtype. - - :param onnx: - :param ort_session: - :param model: The model to be benchmarked. - :param device: The device to run the benchmark on ("cpu" or "cuda"). - :param dtype: The data type to be used in the benchmark (typically torch.float32 or torch.float16). - """ - if onnx: - logging.info(f"Running Benchmark for ONNX") - benchmark = ONNXBenchmark(ort_session, input_shape=(32, 3, 224, 224)) - else: - logging.info(f"Running Benchmark for {device.upper()}") - benchmark = PyTorchBenchmark(model, device=device, dtype=dtype) - benchmark.run() - - -def make_prediction( - model: Union[torch.nn.Module, ort.InferenceSession, ov.CompiledModel], - img_batch: Union[torch.Tensor, np.ndarray], - topk: int, - categories: List[str], - precision: torch.dtype = None, -) -> None: - """ - Make and print predictions for the given model, img_batch, topk, and categories. - - :param model: The model (or ONNX Runtime InferenceSession) to make predictions with. - :param img_batch: The batch of images to make predictions on. - :param topk: The number of top predictions to show. - :param categories: The list of categories to label the predictions. - :param precision: The data type to be used for the predictions (typically torch.float32 or torch.float16) for PyTorch models. - """ - is_onnx_model = isinstance(model, ort.InferenceSession) - is_ov_model = isinstance(model, ov.CompiledModel) - - if is_onnx_model: - # Get the input name for the ONNX model. - input_name = model.get_inputs()[0].name - - # Run the model with the properly named input. - ort_inputs = {input_name: img_batch} - ort_outs = model.run(None, ort_inputs) - - # Assuming the model returns a list with one array of class probabilities. - if len(ort_outs) > 0: - prob = ort_outs[0] - - # Checking if prob has more than one dimension and selecting the right one. - if prob.ndim > 1: - prob = prob[0] - - # Apply Softmax to get probabilities - prob = np.exp(prob) / np.sum(np.exp(prob)) - elif is_ov_model: - # For OV, the input name is usually the first input - input_name = next(iter(model.inputs)) - outputs = model(inputs={input_name: img_batch}) - - # Assuming the model returns a dictionary with one key for class probabilities - prob_key = next(iter(outputs)) - prob = outputs[prob_key] - - # Apply Softmax to get probabilities - prob = np.exp(prob[0]) / np.sum(np.exp(prob[0])) - - else: # PyTorch Model - if isinstance(img_batch, np.ndarray): - img_batch = torch.tensor(img_batch) - else: - img_batch = img_batch.clone().to(precision) - model.eval() - with torch.no_grad(): - outputs = model(img_batch.to(precision)) - prob = torch.nn.functional.softmax(outputs[0], dim=0) - prob = prob.cpu().numpy() - - top_indices = prob.argsort()[-topk:][::-1] - top_probs = prob[top_indices] - - for i in range(topk): - probability = top_probs[i] - if is_onnx_model: - # Accessing the DataFrame by row number using .iloc[] - class_label = categories.iloc[top_indices[i]].item() - else: - class_label = categories[0][int(top_indices[i])] - logging.info(f"#{i + 1}: {int(probability * 100)}% {class_label}") - - -def run_all_benchmarks( - models: Dict[str, Any], img_batch: np.ndarray -) -> Dict[str, float]: - """ - Run benchmarks for all models and return a dictionary of average inference times. - - :param models: Dictionary of models. Key is model type ("onnx", "ov", "pytorch", "trt_fp32", "trt_fp16"), value is the model. - :param img_batch: The batch of images to run the benchmark on. - :return: Dictionary of average inference times. Key is model type, value is average inference time. - """ - results = {} - - # ONNX benchmark - onnx_benchmark = ONNXBenchmark(models["onnx"], img_batch.shape) - avg_time_onnx = onnx_benchmark.run() - results["ONNX"] = avg_time_onnx - - # OpenVINO benchmark - ov_benchmark = OVBenchmark(models["ov"], img_batch.shape) - avg_time_ov = ov_benchmark.run() - results["OpenVINO"] = avg_time_ov - - # PyTorch + TRT benchmark - configs = [ - ("cpu", torch.float32, False), - ("cuda", torch.float32, False), - ("cuda", torch.float32, True), - ("cuda", torch.float16, True), - ] - for device, precision, is_trt in configs: - model_to_use = models["pytorch"].to(device) - - if not is_trt: - pytorch_benchmark = PyTorchBenchmark( - model_to_use, device=device, dtype=precision - ) - avg_time_pytorch = pytorch_benchmark.run() - results[f"PyTorch_{device}"] = avg_time_pytorch - - else: - # TensorRT benchmarks - if precision == torch.float32 or precision == torch.float16: - mode = "fp32" if precision == torch.float32 else "fp16" - trt_benchmark = PyTorchBenchmark( - models[f"trt_{mode}"], device=device, dtype=precision - ) - avg_time_trt = trt_benchmark.run() - results[f"TRT_{mode}"] = avg_time_trt - - return results - - -def plot_benchmark_results(results: Dict[str, float]): - """ - Plot the benchmark results using Seaborn. - - :param results: Dictionary of average inference times. Key is model type, value is average inference time. - """ - # Convert dictionary to two lists for plotting - models = list(results.keys()) - times = list(results.values()) - - # Create a DataFrame for plotting - data = pd.DataFrame({"Model": models, "Time": times}) - - # Sort the DataFrame by Time - data = data.sort_values("Time", ascending=True) - - # Plot - plt.figure(figsize=(10, 6)) - ax = sns.barplot(x=data["Time"], y=data["Model"], palette="rocket") - - # Adding the actual values on the bars - for index, value in enumerate(data["Time"]): - ax.text(value, index, f"{value:.2f} ms", color="black", ha="left", va="center") - - plt.xlabel("Average Inference Time (ms)") - plt.ylabel("Model Type") - plt.title("ResNet50 - Inference Benchmark Results") - - # Save the plot to a file - plt.savefig("./inference/plot.png", bbox_inches="tight") - plt.show() - - def main() -> None: """ Main function to run inference, benchmarks, and predictions on the model using provided image and optional parameters. """ - # Initialize ArgumentParser with description - parser = argparse.ArgumentParser(description="PyTorch Inference") - parser.add_argument( - "--image_path", - type=str, - default="./inference/cat3.jpg", - help="Path to the image to predict", - ) - parser.add_argument( - "--topk", type=int, default=5, help="Number of top predictions to show" - ) - parser.add_argument( - "--onnx_path", - type=str, - default="./inference/model.onnx", - help="Path where model in ONNX format will be exported", - ) - parser.add_argument( - "--mode", - choices=["onnx", "ov", "cuda", "all"], - required=True, - help="Mode for exporting and running the model. Choices are: onnx, ov, cuda or all.", - ) - - args = parser.parse_args() + args = parse_arguments() + # Model and Image Initialization models = {} - - # Setup device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - # Initialize model and image processor model_loader = ModelLoader(device=device) img_processor = ImageProcessor(img_path=args.image_path, device=device) img_batch = img_processor.process_image() - if args.mode == "onnx" or args.mode == "all": - onnx_path = args.onnx_path - - # Export the model to ONNX format using ONNXExporter - onnx_exporter = ONNXExporter(model_loader.model, device, onnx_path) - onnx_exporter.export_model() - - # Create ONNX Runtime session - ort_session = ort.InferenceSession( - onnx_path, providers=["CPUExecutionProvider"] - ) - - models["onnx"] = ort_session - - # Run benchmark - # run_benchmark(None, None, None, ort_session, onnx=True) - - # Make prediction - print(f"Making prediction with {ort.get_device()} for ONNX model") - make_prediction( - ort_session, - img_batch.cpu().numpy(), - topk=args.topk, - categories=model_loader.categories, - ) - if args.mode == "ov" or args.mode == "all": - # Export the ONNX model to OpenVINO - ov_exporter = OVExporter(args.onnx_path) - ov_model = ov_exporter.export_model() - - models["ov"] = ov_model - - # Benchmark the OpenVINO model - ov_benchmark = OVBenchmark(ov_model, input_shape=(1, 3, 224, 224)) - ov_benchmark.run() - - # Run inference using the OpenVINO model - img_batch_ov = ( - img_processor.process_image().cpu().numpy() - ) # Assuming batch size of 1 - print(f"Making prediction with OpenVINO model") - make_prediction( - ov_benchmark.compiled_model, - img_batch_ov, - topk=args.topk, - categories=model_loader.categories, - ) - if args.mode == "cuda" or args.mode == "all": - # Define configurations for which to run benchmarks and make predictions - configs = [ - ("cpu", torch.float32), - ("cuda", torch.float32), - ("cuda", torch.float16), + # ONNX + if args.mode in ["onnx", "all"]: + ort_session = init_onnx_model(args.onnx_path, model_loader, device) + benchmark_onnx_model(ort_session) + predict_onnx_model(ort_session, img_batch, args.topk, model_loader.categories) + + # OpenVINO + if args.mode in ["ov", "all"]: + ov_model = init_ov_model(args.onnx_path) + benchmark_ov_model(ov_model) + predict_ov_model(ov_model, img_batch, args.topk, model_loader.categories) + + # CUDA + if args.mode in ["cuda", "all"]: + # CUDA configurations + cuda_configs = [ + {"device": "cpu", "precision": torch.float32, "is_trt": False}, + {"device": "cuda", "precision": torch.float32, "is_trt": False}, + {"device": "cuda", "precision": torch.float32, "is_trt": True}, + {"device": "cuda", "precision": torch.float16, "is_trt": True}, ] - for device, precision in configs: - model_to_use = model_loader.model.to(device) - models["pytorch"] = model_loader.model + for config in cuda_configs: + device = config["device"] + precision = config["precision"] + is_trt = config["is_trt"] - if device == "cuda": - print(f"Tracing {device} model") - model_to_use = torch.jit.trace( - model_to_use, [torch.randn((1, 3, 224, 224)).to(device)] - ) + model = init_cuda_model(model_loader, device, precision) - if precision == torch.float32 or precision == torch.float16: - print("Compiling TensorRT model") - model_to_use = torch_tensorrt.compile( - model_to_use, - inputs=[torch_tensorrt.Input((32, 3, 224, 224), dtype=precision)], - enabled_precisions={precision}, - truncate_long_and_double=True, - ) - if precision == torch.float32: - models["trt_fp32"] = model_to_use - else: - models["trt_fp16"] = model_to_use + # If the configuration is not for TensorRT, store the model under a PyTorch key + if not is_trt: + models[f"PyTorch_{device}"] = model + else: + # If it is for TensorRT, determine the mode (FP32 or FP16) and store under a TensorRT key + mode = "fp32" if precision == torch.float32 else "fp16" + models[f"trt_{mode}"] = model - """print(f"Making prediction with {device} model in {precision} precision") - make_prediction( - model_to_use, - img_batch.to(device), - args.topk, - model_loader.categories, - precision, + predict_cuda_model( + model, img_batch, args.topk, model_loader.categories, precision ) - print(f"Running Benchmark for {device} model in {precision} precision") - run_benchmark(model_to_use, device, precision) """ + # Aggregate Benchmark (if mode is "all") if args.mode == "all": - # Run all benchmarks - results = run_all_benchmarks(models, img_batch) + models["onnx"] = ort_session + models["ov"] = ov_model - # Plot results + results = run_all_benchmarks(models, img_batch) plot_benchmark_results(results) From a14095c2a2c2067ee81d18b85f57a41b33c642e4 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 21:06:47 +0300 Subject: [PATCH 02/29] Fixed imports --- benchmark/benchmark_models.py | 4 ++-- prediction/prediction_models.py | 2 +- src/main.py | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/benchmark/benchmark_models.py b/benchmark/benchmark_models.py index 71b791e..de4cb1d 100644 --- a/benchmark/benchmark_models.py +++ b/benchmark/benchmark_models.py @@ -1,5 +1,5 @@ -from benchmark.benchmark_utils import run_benchmark -from src.benchmark import PyTorchBenchmark, ONNXBenchmark, OVBenchmark +from benchmark_utils import run_benchmark +from benchmark import PyTorchBenchmark, ONNXBenchmark, OVBenchmark import openvino as ov import torch import onnxruntime as ort diff --git a/prediction/prediction_models.py b/prediction/prediction_models.py index b02b0be..75a73ec 100644 --- a/prediction/prediction_models.py +++ b/prediction/prediction_models.py @@ -3,7 +3,7 @@ import numpy as np import torch from typing import List -from prediction.prediction_utils import make_prediction +from prediction_utils import make_prediction # Prediction Functions diff --git a/src/main.py b/src/main.py index 9ea0ac4..f6f8123 100644 --- a/src/main.py +++ b/src/main.py @@ -1,9 +1,9 @@ -from benchmark.benchmark_models import * -from benchmark.benchmark_utils import * -from common.utils import * +from benchmark_models import * +from benchmark_utils import * +from utils import * from image_processor import ImageProcessor -from prediction.prediction_models import * -from src.model import ModelLoader +from prediction_models import * +from model import ModelLoader # Configure logging logging.basicConfig(filename="model.log", level=logging.INFO) From 86540f944e8a175e0e0d266e3cf74547c3bd007d Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 21:16:47 +0300 Subject: [PATCH 03/29] Fixed imports and refactored --- benchmark/benchmark_models.py | 2 +- benchmark/benchmark_utils.py | 2 +- common/utils.py | 12 +++++++++--- prediction/prediction_models.py | 21 +++++++++++++++++---- prediction/prediction_utils.py | 2 +- src/main.py | 8 ++++---- 6 files changed, 33 insertions(+), 14 deletions(-) diff --git a/benchmark/benchmark_models.py b/benchmark/benchmark_models.py index de4cb1d..d35e0dc 100644 --- a/benchmark/benchmark_models.py +++ b/benchmark/benchmark_models.py @@ -1,4 +1,4 @@ -from benchmark_utils import run_benchmark +from benchmark.benchmark_utils import run_benchmark from benchmark import PyTorchBenchmark, ONNXBenchmark, OVBenchmark import openvino as ov import torch diff --git a/benchmark/benchmark_utils.py b/benchmark/benchmark_utils.py index 74ae67f..15afea3 100644 --- a/benchmark/benchmark_utils.py +++ b/benchmark/benchmark_utils.py @@ -118,4 +118,4 @@ def plot_benchmark_results(results: Dict[str, float]): # Save the plot to a file plt.savefig("./inference/plot.png", bbox_inches="tight") - plt.show() \ No newline at end of file + plt.show() diff --git a/common/utils.py b/common/utils.py index a190468..5eb26e6 100644 --- a/common/utils.py +++ b/common/utils.py @@ -8,7 +8,9 @@ # Model Initialization Functions -def init_onnx_model(onnx_path: str, model_loader: ModelLoader, device: torch.device) -> ort.InferenceSession: +def init_onnx_model( + onnx_path: str, model_loader: ModelLoader, device: torch.device +) -> ort.InferenceSession: onnx_exporter = ONNXExporter(model_loader.model, device, onnx_path) onnx_exporter.export_model() return ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"]) @@ -19,10 +21,14 @@ def init_ov_model(onnx_path: str) -> ov.CompiledModel: return ov_exporter.export_model() -def init_cuda_model(model_loader: ModelLoader, device: torch.device, dtype: torch.dtype) -> torch.nn.Module: +def init_cuda_model( + model_loader: ModelLoader, device: torch.device, dtype: torch.dtype +) -> torch.nn.Module: cuda_model = model_loader.model.to(device) if device == "cuda": - cuda_model = torch.jit.trace(cuda_model, [torch.randn((1, 3, 224, 224)).to(device)]) + cuda_model = torch.jit.trace( + cuda_model, [torch.randn((1, 3, 224, 224)).to(device)] + ) return cuda_model diff --git a/prediction/prediction_models.py b/prediction/prediction_models.py index 75a73ec..929ad18 100644 --- a/prediction/prediction_models.py +++ b/prediction/prediction_models.py @@ -7,13 +7,26 @@ # Prediction Functions -def predict_onnx_model(ort_session: ort.InferenceSession, img_batch: np.ndarray, topk: int, categories: List[str]): +def predict_onnx_model( + ort_session: ort.InferenceSession, + img_batch: np.ndarray, + topk: int, + categories: List[str], +): make_prediction(ort_session, img_batch.cpu().numpy(), topk, categories) -def predict_ov_model(ov_model: ov.CompiledModel, img_batch: np.ndarray, topk: int, categories: List[str]): +def predict_ov_model( + ov_model: ov.CompiledModel, img_batch: np.ndarray, topk: int, categories: List[str] +): make_prediction(ov_model, img_batch.cpu().numpy(), topk, categories) -def predict_cuda_model(cuda_model: torch.nn.Module, img_batch: torch.Tensor, topk: int, categories: List[str], precision: torch.dtype): - make_prediction(cuda_model, img_batch, topk, categories, precision) \ No newline at end of file +def predict_cuda_model( + cuda_model: torch.nn.Module, + img_batch: torch.Tensor, + topk: int, + categories: List[str], + precision: torch.dtype, +): + make_prediction(cuda_model, img_batch, topk, categories, precision) diff --git a/prediction/prediction_utils.py b/prediction/prediction_utils.py index 44dddfd..6abab00 100644 --- a/prediction/prediction_utils.py +++ b/prediction/prediction_utils.py @@ -76,4 +76,4 @@ def make_prediction( class_label = categories.iloc[top_indices[i]].item() else: class_label = categories[0][int(top_indices[i])] - logging.info(f"#{i + 1}: {int(probability * 100)}% {class_label}") \ No newline at end of file + logging.info(f"#{i + 1}: {int(probability * 100)}% {class_label}") diff --git a/src/main.py b/src/main.py index f6f8123..34a1a91 100644 --- a/src/main.py +++ b/src/main.py @@ -1,8 +1,8 @@ -from benchmark_models import * -from benchmark_utils import * -from utils import * +from benchmark.benchmark_models import * +from benchmark.benchmark_utils import * +from common.utils import * from image_processor import ImageProcessor -from prediction_models import * +from prediction.prediction_models import * from model import ModelLoader # Configure logging From 4f5b75d6d4218e07006c598602621f04b707747b Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 21:20:08 +0300 Subject: [PATCH 04/29] Fixed imports and refactored --- src/main.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/main.py b/src/main.py index 34a1a91..53c6f66 100644 --- a/src/main.py +++ b/src/main.py @@ -1,6 +1,13 @@ -from benchmark.benchmark_models import * -from benchmark.benchmark_utils import * -from common.utils import * +import logging + +from benchmark.benchmark_models import benchmark_onnx_model, benchmark_ov_model +from benchmark.benchmark_utils import run_all_benchmarks, plot_benchmark_results +from common.utils import ( + parse_arguments, + init_onnx_model, + init_ov_model, + init_cuda_model, +) from image_processor import ImageProcessor from prediction.prediction_models import * from model import ModelLoader From bc67224c90e7eb5bcb4e0c4f14253f43ea77d361 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 21:24:09 +0300 Subject: [PATCH 05/29] Fixed imports and refactored --- benchmark/benchmark_models.py | 2 +- benchmark/benchmark_utils.py | 2 +- src/main.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmark/benchmark_models.py b/benchmark/benchmark_models.py index d35e0dc..de4cb1d 100644 --- a/benchmark/benchmark_models.py +++ b/benchmark/benchmark_models.py @@ -1,4 +1,4 @@ -from benchmark.benchmark_utils import run_benchmark +from benchmark_utils import run_benchmark from benchmark import PyTorchBenchmark, ONNXBenchmark, OVBenchmark import openvino as ov import torch diff --git a/benchmark/benchmark_utils.py b/benchmark/benchmark_utils.py index 15afea3..f0c2ec5 100644 --- a/benchmark/benchmark_utils.py +++ b/benchmark/benchmark_utils.py @@ -8,7 +8,7 @@ import torch import onnxruntime as ort -from src.benchmark import PyTorchBenchmark, ONNXBenchmark, OVBenchmark +from benchmark import PyTorchBenchmark, ONNXBenchmark, OVBenchmark def run_benchmark( diff --git a/src/main.py b/src/main.py index 53c6f66..503d4cf 100644 --- a/src/main.py +++ b/src/main.py @@ -1,15 +1,15 @@ import logging -from benchmark.benchmark_models import benchmark_onnx_model, benchmark_ov_model -from benchmark.benchmark_utils import run_all_benchmarks, plot_benchmark_results -from common.utils import ( +from benchmark_models import benchmark_onnx_model, benchmark_ov_model +from benchmark_utils import run_all_benchmarks, plot_benchmark_results +from utils import ( parse_arguments, init_onnx_model, init_ov_model, init_cuda_model, ) from image_processor import ImageProcessor -from prediction.prediction_models import * +from prediction_models import * from model import ModelLoader # Configure logging From 0f59440104c224f898bc4688038a688ad4bf8e30 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 21:37:35 +0300 Subject: [PATCH 06/29] Fixed imports and refactored --- benchmark/benchmark_models.py | 2 +- prediction/prediction_models.py | 2 +- src/main.py | 10 ++++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/benchmark/benchmark_models.py b/benchmark/benchmark_models.py index de4cb1d..d35e0dc 100644 --- a/benchmark/benchmark_models.py +++ b/benchmark/benchmark_models.py @@ -1,4 +1,4 @@ -from benchmark_utils import run_benchmark +from benchmark.benchmark_utils import run_benchmark from benchmark import PyTorchBenchmark, ONNXBenchmark, OVBenchmark import openvino as ov import torch diff --git a/prediction/prediction_models.py b/prediction/prediction_models.py index 929ad18..aaaf230 100644 --- a/prediction/prediction_models.py +++ b/prediction/prediction_models.py @@ -3,7 +3,7 @@ import numpy as np import torch from typing import List -from prediction_utils import make_prediction +from prediction.prediction_utils import make_prediction # Prediction Functions diff --git a/src/main.py b/src/main.py index 503d4cf..7d4ffff 100644 --- a/src/main.py +++ b/src/main.py @@ -1,19 +1,21 @@ import logging +import sys -from benchmark_models import benchmark_onnx_model, benchmark_ov_model -from benchmark_utils import run_all_benchmarks, plot_benchmark_results -from utils import ( +from benchmark.benchmark_models import benchmark_onnx_model, benchmark_ov_model +from benchmark.benchmark_utils import run_all_benchmarks, plot_benchmark_results +from common.utils import ( parse_arguments, init_onnx_model, init_ov_model, init_cuda_model, ) from image_processor import ImageProcessor -from prediction_models import * +from prediction.prediction_models import * from model import ModelLoader # Configure logging logging.basicConfig(filename="model.log", level=logging.INFO) +sys.path.append('/workspace') def main() -> None: From a70efcd472c8884c8203d95ceaae1cbd42440061 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 21:41:24 +0300 Subject: [PATCH 07/29] Fixed imports and refactored --- benchmark/benchmark_models.py | 2 +- benchmark/benchmark_utils.py | 2 +- src/{benchmark.py => benchmark_class.py} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename src/{benchmark.py => benchmark_class.py} (100%) diff --git a/benchmark/benchmark_models.py b/benchmark/benchmark_models.py index d35e0dc..c917a85 100644 --- a/benchmark/benchmark_models.py +++ b/benchmark/benchmark_models.py @@ -1,5 +1,5 @@ from benchmark.benchmark_utils import run_benchmark -from benchmark import PyTorchBenchmark, ONNXBenchmark, OVBenchmark +from benchmark_class import PyTorchBenchmark, ONNXBenchmark, OVBenchmark import openvino as ov import torch import onnxruntime as ort diff --git a/benchmark/benchmark_utils.py b/benchmark/benchmark_utils.py index f0c2ec5..80c540b 100644 --- a/benchmark/benchmark_utils.py +++ b/benchmark/benchmark_utils.py @@ -8,7 +8,7 @@ import torch import onnxruntime as ort -from benchmark import PyTorchBenchmark, ONNXBenchmark, OVBenchmark +from benchmark_class import PyTorchBenchmark, ONNXBenchmark, OVBenchmark def run_benchmark( diff --git a/src/benchmark.py b/src/benchmark_class.py similarity index 100% rename from src/benchmark.py rename to src/benchmark_class.py From 79bb3670d9010c91a331d1c61ab1502285bfd317 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 21:43:17 +0300 Subject: [PATCH 08/29] Fixed imports and refactored --- src/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index 7d4ffff..cc14d32 100644 --- a/src/main.py +++ b/src/main.py @@ -1,7 +1,7 @@ import logging import sys -from benchmark.benchmark_models import benchmark_onnx_model, benchmark_ov_model +from ..benchmark.benchmark_models import benchmark_onnx_model, benchmark_ov_model from benchmark.benchmark_utils import run_all_benchmarks, plot_benchmark_results from common.utils import ( parse_arguments, From 687791d6ca336dfb4cf8e9c9092ee86c462bcff7 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 21:48:15 +0300 Subject: [PATCH 09/29] Fixed imports and refactored --- src/main.py => main.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) rename src/main.py => main.py (93%) diff --git a/src/main.py b/main.py similarity index 93% rename from src/main.py rename to main.py index cc14d32..1987da1 100644 --- a/src/main.py +++ b/main.py @@ -1,7 +1,6 @@ import logging -import sys -from ..benchmark.benchmark_models import benchmark_onnx_model, benchmark_ov_model +from benchmark.benchmark_models import benchmark_onnx_model, benchmark_ov_model from benchmark.benchmark_utils import run_all_benchmarks, plot_benchmark_results from common.utils import ( parse_arguments, @@ -9,13 +8,12 @@ init_ov_model, init_cuda_model, ) -from image_processor import ImageProcessor +from src.image_processor import ImageProcessor from prediction.prediction_models import * -from model import ModelLoader +from src.model import ModelLoader # Configure logging logging.basicConfig(filename="model.log", level=logging.INFO) -sys.path.append('/workspace') def main() -> None: From fb27d3adf3ab86e7d121e52675ae6ef320186bac Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 21:51:02 +0300 Subject: [PATCH 10/29] Fixed imports and refactored --- benchmark/benchmark_models.py | 2 +- benchmark/benchmark_utils.py | 2 +- common/utils.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmark/benchmark_models.py b/benchmark/benchmark_models.py index c917a85..483143d 100644 --- a/benchmark/benchmark_models.py +++ b/benchmark/benchmark_models.py @@ -1,5 +1,5 @@ from benchmark.benchmark_utils import run_benchmark -from benchmark_class import PyTorchBenchmark, ONNXBenchmark, OVBenchmark +from src.benchmark_class import PyTorchBenchmark, ONNXBenchmark, OVBenchmark import openvino as ov import torch import onnxruntime as ort diff --git a/benchmark/benchmark_utils.py b/benchmark/benchmark_utils.py index 80c540b..b6ba8fa 100644 --- a/benchmark/benchmark_utils.py +++ b/benchmark/benchmark_utils.py @@ -8,7 +8,7 @@ import torch import onnxruntime as ort -from benchmark_class import PyTorchBenchmark, ONNXBenchmark, OVBenchmark +from src.benchmark_class import PyTorchBenchmark, ONNXBenchmark, OVBenchmark def run_benchmark( diff --git a/common/utils.py b/common/utils.py index 5eb26e6..1d0743b 100644 --- a/common/utils.py +++ b/common/utils.py @@ -1,7 +1,7 @@ import argparse import openvino as ov import torch -from model import ModelLoader +from src.model import ModelLoader from onnx_exporter import ONNXExporter from ov_exporter import OVExporter import onnxruntime as ort From 7dfa3da5b19e0ad08534015ca062d557e868588d Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 21:52:26 +0300 Subject: [PATCH 11/29] Fixed imports and refactored --- common/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/utils.py b/common/utils.py index 1d0743b..a5b9b4a 100644 --- a/common/utils.py +++ b/common/utils.py @@ -2,8 +2,8 @@ import openvino as ov import torch from src.model import ModelLoader -from onnx_exporter import ONNXExporter -from ov_exporter import OVExporter +from src.onnx_exporter import ONNXExporter +from src.ov_exporter import OVExporter import onnxruntime as ort From f13f6728e2b5ef795fc4e074505e32c6759d96e9 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 21:54:44 +0300 Subject: [PATCH 12/29] Fixed imports and refactored --- prediction/prediction_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/prediction/prediction_utils.py b/prediction/prediction_utils.py index 6abab00..91495c3 100644 --- a/prediction/prediction_utils.py +++ b/prediction/prediction_utils.py @@ -60,7 +60,8 @@ def make_prediction( img_batch = torch.tensor(img_batch) else: img_batch = img_batch.clone().to(precision) - model.eval() + if isinstance(model, torch.nn.Module): # Check if the model is a PyTorch model + model.eval() with torch.no_grad(): outputs = model(img_batch.to(precision)) prob = torch.nn.functional.softmax(outputs[0], dim=0) From 2d01ce54fedb087b5d4ec1c15f58b1f68afbbb5c Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 22:00:19 +0300 Subject: [PATCH 13/29] Fixed imports and refactored --- benchmark/benchmark_models.py | 4 +++- main.py | 4 ++-- prediction/prediction_utils.py | 3 +-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/benchmark/benchmark_models.py b/benchmark/benchmark_models.py index 483143d..e772360 100644 --- a/benchmark/benchmark_models.py +++ b/benchmark/benchmark_models.py @@ -1,3 +1,4 @@ +import src.benchmark_class from benchmark.benchmark_utils import run_benchmark from src.benchmark_class import PyTorchBenchmark, ONNXBenchmark, OVBenchmark import openvino as ov @@ -9,9 +10,10 @@ def benchmark_onnx_model(ort_session: ort.InferenceSession): run_benchmark(None, None, None, ort_session, onnx=True) -def benchmark_ov_model(ov_model: ov.CompiledModel): +def benchmark_ov_model(ov_model: ov.CompiledModel) -> src.benchmark_class.OVBenchmark: ov_benchmark = OVBenchmark(ov_model, input_shape=(1, 3, 224, 224)) ov_benchmark.run() + return ov_benchmark def benchmark_cuda_model(cuda_model: torch.nn.Module, device: str, dtype: torch.dtype): diff --git a/main.py b/main.py index 1987da1..4ed20de 100644 --- a/main.py +++ b/main.py @@ -39,8 +39,8 @@ def main() -> None: # OpenVINO if args.mode in ["ov", "all"]: ov_model = init_ov_model(args.onnx_path) - benchmark_ov_model(ov_model) - predict_ov_model(ov_model, img_batch, args.topk, model_loader.categories) + ov_benchmark = benchmark_ov_model(ov_model) + predict_ov_model(ov_benchmark.compiled_model, img_batch, args.topk, model_loader.categories) # CUDA if args.mode in ["cuda", "all"]: diff --git a/prediction/prediction_utils.py b/prediction/prediction_utils.py index 91495c3..6abab00 100644 --- a/prediction/prediction_utils.py +++ b/prediction/prediction_utils.py @@ -60,8 +60,7 @@ def make_prediction( img_batch = torch.tensor(img_batch) else: img_batch = img_batch.clone().to(precision) - if isinstance(model, torch.nn.Module): # Check if the model is a PyTorch model - model.eval() + model.eval() with torch.no_grad(): outputs = model(img_batch.to(precision)) prob = torch.nn.functional.softmax(outputs[0], dim=0) From 555bf6df95f0bf0844a3c8fbcfce847bdbb63dc2 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 22:02:25 +0300 Subject: [PATCH 14/29] Fixed imports and refactored --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index 4ed20de..7f4dc1d 100644 --- a/main.py +++ b/main.py @@ -68,7 +68,7 @@ def main() -> None: models[f"trt_{mode}"] = model predict_cuda_model( - model, img_batch, args.topk, model_loader.categories, precision + model.to(device), img_batch.to(device), args.topk, model_loader.categories, precision ) # Aggregate Benchmark (if mode is "all") From 6bf5cb202345f61087da88b0020693205a6d3645 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 22:12:16 +0300 Subject: [PATCH 15/29] Fixed imports and refactored --- main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index 7f4dc1d..9f79ef9 100644 --- a/main.py +++ b/main.py @@ -62,13 +62,15 @@ def main() -> None: # If the configuration is not for TensorRT, store the model under a PyTorch key if not is_trt: models[f"PyTorch_{device}"] = model + model = model.to(device) + img_batch = img_batch.to(device) else: # If it is for TensorRT, determine the mode (FP32 or FP16) and store under a TensorRT key mode = "fp32" if precision == torch.float32 else "fp16" models[f"trt_{mode}"] = model predict_cuda_model( - model.to(device), img_batch.to(device), args.topk, model_loader.categories, precision + model, img_batch, args.topk, model_loader.categories, precision ) # Aggregate Benchmark (if mode is "all") From 525278aba6512fe4793966f3ab042dcef14a5b73 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 22:15:55 +0300 Subject: [PATCH 16/29] Fixed imports and refactored --- main.py | 8 ++++++++ prediction/prediction_utils.py | 1 + 2 files changed, 9 insertions(+) diff --git a/main.py b/main.py index 9f79ef9..70c6b95 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,5 @@ import logging +import torch_tensorrt from benchmark.benchmark_models import benchmark_onnx_model, benchmark_ov_model from benchmark.benchmark_utils import run_all_benchmarks, plot_benchmark_results @@ -65,6 +66,13 @@ def main() -> None: model = model.to(device) img_batch = img_batch.to(device) else: + print("Compiling TensorRT model") + model_to_use = torch_tensorrt.compile( + model_to_use, + inputs=[torch_tensorrt.Input((1, 3, 224, 224), dtype=precision)], + enabled_precisions={precision}, + truncate_long_and_double=True, + ) # If it is for TensorRT, determine the mode (FP32 or FP16) and store under a TensorRT key mode = "fp32" if precision == torch.float32 else "fp16" models[f"trt_{mode}"] = model diff --git a/prediction/prediction_utils.py b/prediction/prediction_utils.py index 6abab00..72b06dd 100644 --- a/prediction/prediction_utils.py +++ b/prediction/prediction_utils.py @@ -77,3 +77,4 @@ def make_prediction( else: class_label = categories[0][int(top_indices[i])] logging.info(f"#{i + 1}: {int(probability * 100)}% {class_label}") + print(f"#{i + 1}: {int(probability * 100)}% {class_label}") From b523c5380e6d19dfef5fdcf26e660818e5cf64bf Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 22:17:33 +0300 Subject: [PATCH 17/29] Fixed imports and refactored --- main.py | 4 ++-- prediction/prediction_utils.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index 70c6b95..5fb5b4a 100644 --- a/main.py +++ b/main.py @@ -68,14 +68,14 @@ def main() -> None: else: print("Compiling TensorRT model") model_to_use = torch_tensorrt.compile( - model_to_use, + model, inputs=[torch_tensorrt.Input((1, 3, 224, 224), dtype=precision)], enabled_precisions={precision}, truncate_long_and_double=True, ) # If it is for TensorRT, determine the mode (FP32 or FP16) and store under a TensorRT key mode = "fp32" if precision == torch.float32 else "fp16" - models[f"trt_{mode}"] = model + models[f"trt_{mode}"] = model_to_use predict_cuda_model( model, img_batch, args.topk, model_loader.categories, precision diff --git a/prediction/prediction_utils.py b/prediction/prediction_utils.py index 72b06dd..6abab00 100644 --- a/prediction/prediction_utils.py +++ b/prediction/prediction_utils.py @@ -77,4 +77,3 @@ def make_prediction( else: class_label = categories[0][int(top_indices[i])] logging.info(f"#{i + 1}: {int(probability * 100)}% {class_label}") - print(f"#{i + 1}: {int(probability * 100)}% {class_label}") From 81fa6e496af2554de3364932d3bdf5d6ee4ee4e8 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 22:22:02 +0300 Subject: [PATCH 18/29] Fixed imports and refactored --- main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 5fb5b4a..bf1f471 100644 --- a/main.py +++ b/main.py @@ -67,7 +67,7 @@ def main() -> None: img_batch = img_batch.to(device) else: print("Compiling TensorRT model") - model_to_use = torch_tensorrt.compile( + model = torch_tensorrt.compile( model, inputs=[torch_tensorrt.Input((1, 3, 224, 224), dtype=precision)], enabled_precisions={precision}, @@ -75,7 +75,7 @@ def main() -> None: ) # If it is for TensorRT, determine the mode (FP32 or FP16) and store under a TensorRT key mode = "fp32" if precision == torch.float32 else "fp16" - models[f"trt_{mode}"] = model_to_use + models[f"trt_{mode}"] = model predict_cuda_model( model, img_batch, args.topk, model_loader.categories, precision From 85486a54b71bcb3e85de0b5b62e730f3163c9af3 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 22:24:56 +0300 Subject: [PATCH 19/29] Fixed imports and refactored --- common/utils.py | 2 +- prediction/prediction_utils.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/common/utils.py b/common/utils.py index a5b9b4a..cd3e120 100644 --- a/common/utils.py +++ b/common/utils.py @@ -53,7 +53,7 @@ def parse_arguments(): parser.add_argument( "--mode", choices=["onnx", "ov", "cuda", "all"], - required=True, + default="all", help="Mode for exporting and running the model. Choices are: onnx, ov, cuda or all.", ) diff --git a/prediction/prediction_utils.py b/prediction/prediction_utils.py index 6abab00..a9ea429 100644 --- a/prediction/prediction_utils.py +++ b/prediction/prediction_utils.py @@ -26,6 +26,7 @@ def make_prediction( is_ov_model = isinstance(model, ov.CompiledModel) if is_onnx_model: + logging.info(f"Running prediction for ONNX model") # Get the input name for the ONNX model. input_name = model.get_inputs()[0].name @@ -44,6 +45,7 @@ def make_prediction( # Apply Softmax to get probabilities prob = np.exp(prob) / np.sum(np.exp(prob)) elif is_ov_model: + logging.info(f"Running prediction for OV model") # For OV, the input name is usually the first input input_name = next(iter(model.inputs)) outputs = model(inputs={input_name: img_batch}) @@ -56,6 +58,7 @@ def make_prediction( prob = np.exp(prob[0]) / np.sum(np.exp(prob[0])) else: # PyTorch Model + logging.info(f"Running prediction for PyTorch model") if isinstance(img_batch, np.ndarray): img_batch = torch.tensor(img_batch) else: From b734a87ca7f243daa304ce92e69980263775cba7 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 22:28:13 +0300 Subject: [PATCH 20/29] Fixed imports and refactored --- benchmark/benchmark_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark/benchmark_utils.py b/benchmark/benchmark_utils.py index b6ba8fa..3c7ec0e 100644 --- a/benchmark/benchmark_utils.py +++ b/benchmark/benchmark_utils.py @@ -31,7 +31,7 @@ def run_benchmark( logging.info(f"Running Benchmark for ONNX") benchmark = ONNXBenchmark(ort_session, input_shape=(32, 3, 224, 224)) else: - logging.info(f"Running Benchmark for {device.upper()}") + logging.info(f"Running Benchmark for {device.upper()} and precision {dtype}") benchmark = PyTorchBenchmark(model, device=device, dtype=dtype) benchmark.run() @@ -66,7 +66,7 @@ def run_all_benchmarks( ("cuda", torch.float16, True), ] for device, precision, is_trt in configs: - model_to_use = models["pytorch"].to(device) + model_to_use = models[f"PyTorch_{device}"].to(device) if not is_trt: pytorch_benchmark = PyTorchBenchmark( From 9d762545eb49a2cb1f14bcacf4d2452646cd5602 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 22:35:59 +0300 Subject: [PATCH 21/29] Fixed imports and refactored --- main.py | 19 +++++++++++-------- prediction/prediction_models.py | 2 +- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/main.py b/main.py index bf1f471..3ccb895 100644 --- a/main.py +++ b/main.py @@ -34,14 +34,16 @@ def main() -> None: # ONNX if args.mode in ["onnx", "all"]: ort_session = init_onnx_model(args.onnx_path, model_loader, device) - benchmark_onnx_model(ort_session) - predict_onnx_model(ort_session, img_batch, args.topk, model_loader.categories) + if args.mode != "all": + benchmark_onnx_model(ort_session) + predict_onnx_model(ort_session, img_batch, args.topk, model_loader.categories) # OpenVINO if args.mode in ["ov", "all"]: ov_model = init_ov_model(args.onnx_path) - ov_benchmark = benchmark_ov_model(ov_model) - predict_ov_model(ov_benchmark.compiled_model, img_batch, args.topk, model_loader.categories) + if args.mode != "all": + ov_benchmark = benchmark_ov_model(ov_model) + predict_ov_model(ov_benchmark.compiled_model, img_batch, args.topk, model_loader.categories) # CUDA if args.mode in ["cuda", "all"]: @@ -69,7 +71,7 @@ def main() -> None: print("Compiling TensorRT model") model = torch_tensorrt.compile( model, - inputs=[torch_tensorrt.Input((1, 3, 224, 224), dtype=precision)], + inputs=[torch_tensorrt.Input((32, 3, 224, 224), dtype=precision)], enabled_precisions={precision}, truncate_long_and_double=True, ) @@ -77,9 +79,10 @@ def main() -> None: mode = "fp32" if precision == torch.float32 else "fp16" models[f"trt_{mode}"] = model - predict_cuda_model( - model, img_batch, args.topk, model_loader.categories, precision - ) + if args.mode != "all": + predict_cuda_model( + model, img_batch, args.topk, model_loader.categories, precision + ) # Aggregate Benchmark (if mode is "all") if args.mode == "all": diff --git a/prediction/prediction_models.py b/prediction/prediction_models.py index aaaf230..ad1d4de 100644 --- a/prediction/prediction_models.py +++ b/prediction/prediction_models.py @@ -17,7 +17,7 @@ def predict_onnx_model( def predict_ov_model( - ov_model: ov.CompiledModel, img_batch: np.ndarray, topk: int, categories: List[str] + ov_model: ov.CompiledModel, img_batch: np.ndarray, topk: int, categories: List[str], to_skip: bool = False ): make_prediction(ov_model, img_batch.cpu().numpy(), topk, categories) From 071ed43bb456653c5bd828098de5328aa3740da8 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 22:36:14 +0300 Subject: [PATCH 22/29] Fixed imports and refactored --- prediction/prediction_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prediction/prediction_models.py b/prediction/prediction_models.py index ad1d4de..aaaf230 100644 --- a/prediction/prediction_models.py +++ b/prediction/prediction_models.py @@ -17,7 +17,7 @@ def predict_onnx_model( def predict_ov_model( - ov_model: ov.CompiledModel, img_batch: np.ndarray, topk: int, categories: List[str], to_skip: bool = False + ov_model: ov.CompiledModel, img_batch: np.ndarray, topk: int, categories: List[str] ): make_prediction(ov_model, img_batch.cpu().numpy(), topk, categories) From 4fea89eec2b4d0cba9daa8f286d704304fbb788b Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 22:49:31 +0300 Subject: [PATCH 23/29] Fixed imports and refactored --- benchmark/benchmark_utils.py | 6 +++++- main.py | 2 +- src/benchmark_class.py | 3 --- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/benchmark/benchmark_utils.py b/benchmark/benchmark_utils.py index 3c7ec0e..38973be 100644 --- a/benchmark/benchmark_utils.py +++ b/benchmark/benchmark_utils.py @@ -49,11 +49,13 @@ def run_all_benchmarks( results = {} # ONNX benchmark + logging.info(f"Running benchmark inference for ONNX model") onnx_benchmark = ONNXBenchmark(models["onnx"], img_batch.shape) avg_time_onnx = onnx_benchmark.run() results["ONNX"] = avg_time_onnx # OpenVINO benchmark + logging.info(f"Running benchmark inference for OpenVINO model") ov_benchmark = OVBenchmark(models["ov"], img_batch.shape) avg_time_ov = ov_benchmark.run() results["OpenVINO"] = avg_time_ov @@ -72,6 +74,7 @@ def run_all_benchmarks( pytorch_benchmark = PyTorchBenchmark( model_to_use, device=device, dtype=precision ) + logging.info(f"Running benchmark inference for PyTorch_{device} model") avg_time_pytorch = pytorch_benchmark.run() results[f"PyTorch_{device}"] = avg_time_pytorch @@ -79,6 +82,7 @@ def run_all_benchmarks( # TensorRT benchmarks if precision == torch.float32 or precision == torch.float16: mode = "fp32" if precision == torch.float32 else "fp16" + logging.info(f"Running benchmark inference for TRT_{mode} model") trt_benchmark = PyTorchBenchmark( models[f"trt_{mode}"], device=device, dtype=precision ) @@ -106,7 +110,7 @@ def plot_benchmark_results(results: Dict[str, float]): # Plot plt.figure(figsize=(10, 6)) - ax = sns.barplot(x=data["Time"], y=data["Model"], palette="rocket") + ax = sns.barplot(x=data["Time"], y=data["Model"], hue=data["Model"], palette="rocket", legend=False) # Adding the actual values on the bars for index, value in enumerate(data["Time"]): diff --git a/main.py b/main.py index 3ccb895..efc7595 100644 --- a/main.py +++ b/main.py @@ -17,7 +17,7 @@ logging.basicConfig(filename="model.log", level=logging.INFO) -def main() -> None: +def main(): """ Main function to run inference, benchmarks, and predictions on the model using provided image and optional parameters. diff --git a/src/benchmark_class.py b/src/benchmark_class.py index 9e866ae..b908425 100644 --- a/src/benchmark_class.py +++ b/src/benchmark_class.py @@ -90,9 +90,6 @@ def run(self): f"Iteration {i}/{self.nruns}, ave batch time {np.mean(timings) * 1000:.2f} ms" ) - # Print and log results - print(f"Input shape: {input_data.size()}") - print(f"Output features size: {features.size()}") logging.info(f"Average batch time: {np.mean(timings) * 1000:.2f} ms") return np.mean(timings) * 1000 From 190c13bf6ac1e3f32fcf86af6e753b604673d9b5 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 23:00:41 +0300 Subject: [PATCH 24/29] Fixed imports and refactored --- README.md | 68 +++++------------------------------------- src/benchmark_class.py | 14 +++++++-- 2 files changed, 19 insertions(+), 63 deletions(-) diff --git a/README.md b/README.md index 2be52b3..533e9fc 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ - [TensorRT FP32 & FP16](#tensorrt-fp32--fp16) - [ONNX](#onnx) - [OpenVINO](#openvino) -5. [Used methodologies](#used-methodologies) ![New](https://img.shields.io/badge/-New-96E5FE) +5. [Benchmarking and Visualization](#benchmarking-and-visualization) ![New](https://img.shields.io/badge/-New-96E5FE) - [TensorRT Optimization](#tensorrt-optimization) - [ONNX Exporter](#onnx-exporter) - [OV Exporter](#ov-exporter) @@ -44,20 +44,20 @@ docker build -t awesome-tensorrt docker run --gpus all --rm -it awesome-tensorrt # 3. Run the Script inside the Container -python src/main.py +python main.py [--mode all] ``` ### Arguments - `--image_path`: (Optional) Specifies the path to the image you want to predict. - `--topk`: (Optional) Specifies the number of top predictions to show. Defaults to 5 if not provided. -- `--mode`: Specifies the mode for exporting and running the model. Choices are: `onnx`, `ov`, `all`. +- `--mode`: (Optional) Specifies the mode for exporting and running the model. Choices are: `onnx`, `ov`, `all`. If not provided, it defaults to `all`. ### Example Command ```sh -python src/main.py --topk 3 --mode=all +python main.py --topk 3 --mode=ov ``` -This command will run predictions on the default image (`./inference/cat3.jpg`), show the top 3 predictions, and run all models (PyTorch CPU, CUDA, ONNX, OV, TRT-FP16, TRT-FP32). At the end results plot will be saved to `./inference/plot.png` +This command will run predictions on the default image (`./inference/cat3.jpg`), show the top 3 predictions, and run OpenVINO model. At the end results plot will be saved to `./inference/plot.png` ## RESULTS ### Inference Benchmark Results @@ -116,62 +116,8 @@ OpenVINO is a toolkit from Intel that optimizes deep learning model inference fo 4. Perform inference on the provided image using the OpenVINO model. 5. Benchmark results, including average inference time, are logged for the OpenVINO model. -## Used methodologies -### TensorRT Optimization -TensorRT is a high-performance deep learning inference optimizer and runtime library developed by NVIDIA. It is designed for optimizing and deploying trained neural network models on production environments. This project supports TensorRT optimizations in FP32 (single precision) and FP16 (half precision) modes, offering different trade-offs between inference speed and model accuracy. - -#### Features -- **Performance Boost**: TensorRT can significantly accelerate the inference of neural network models, making it suitable for deployment in resource-constrained environments. -- **Precision Modes**: Supports FP32 for maximum accuracy and FP16 for faster performance with a minor trade-off in accuracy. -- **Layer Fusion**: TensorRT fuses layers and tensors in the neural network to reduce memory access overhead and improve execution speed. -- **Dynamic Tensor Memory**: Efficiently handles varying batch sizes without re-optimization. - -#### Usage -When running the main script, use the'- mode all' argument to employ TensorRT optimizations in the project. -This will initiate all models, including PyTorch models, that will be compiled to the TRT model with `FP16` and `FP32` precision modes. Then, in one of the steps, we will run inference on the specified image using the TensorRT-optimized model. -Example: -```sh -python src/main.py --mode all -``` -#### Requirements -Ensure you have the TensorRT library and the torch_tensorrt package installed in your environment. Also, for FP16 optimizations, it's recommended to have a GPU that supports half-precision arithmetic (like NVIDIA GPUs with Tensor Cores). - -### ONNX Exporter -ONNX Model Exporter (`ONNXExporter`) utility is incorporated within this project to enable converting the native PyTorch model into the ONNX format. -Using the ONNX format, inference and benchmarking can be performed with the ONNX Runtime, which offers platform-agnostic optimizations and is widely supported across numerous platforms and devices. - -#### Features -- **Standardized Format**: ONNX provides an open-source format for AI models. It defines an extensible computation graph model and definitions of built-in operators and standard data types. -- **Interoperability**: Models in ONNX format can be used across various frameworks, tools, runtimes, and compilers. -- **Optimizations**: The ONNX Runtime provides performance optimizations for both cloud and edge devices. - -#### Usage -To leverage the `ONNXExporter` and conduct inference using the ONNX Runtime, utilize the `--mode onnx` argument when executing the main script. -This will initiate the conversion process and then run inference on the specified image using the ONNX model. -Example: -```sh -python src/main.py --mode onnx -``` - -#### Requirements -Ensure the ONNX library is installed in your environment to use the ONNXExporter. Additionally, if you want to run inference using the ONNX model, install the ONNX Runtime. - -### OV Exporter -OpenVINO Model Exporter utility (`OVExporter`) has been integrated into this project to facilitate the conversion of the ONNX model to the OpenVINO format. -This enables inference and benchmarking using OpenVINO, a framework optimized for Intel hardware, providing substantial speed improvements, especially on CPUs. - -#### Features -- **Model Optimization**: Converts the ONNX model to OpenVINO's Intermediate Representation (IR) format. This optimized format allows for faster inference times on Intel hardware. -- **Versatility**: OpenVINO can target various Intel hardware devices such as CPUs, integrated GPUs, FPGAs, and VPUs. -- **Ease of Use**: The `OVExporter` seamlessly transitions from ONNX to OpenVINO, abstracting the conversion details and providing a straightforward interface. - -#### Usage -To utilize `OVExporter` and perform inference using OpenVINO, use the `--mode ov` argument when running the main script. -This will trigger the conversion process and subsequently run inference on the provided image using the optimized OpenVINO model. -Example: -```sh -python src/main.py --mode ov -``` +## Benchmarking and Visualization +The results of the benchmarks for all modes are saved and visualized in a bar chart, showcasing the average inference times across different backends. The visualization aids in comparing the performance gains achieved with different optimizations. #### Requirements Ensure you have installed the OpenVINO Toolkit and the necessary dependencies to use OpenVINO's model optimizer and inference engine. diff --git a/src/benchmark_class.py b/src/benchmark_class.py index b908425..6465299 100644 --- a/src/benchmark_class.py +++ b/src/benchmark_class.py @@ -124,12 +124,17 @@ def run(self): print("Starting benchmark ...") timings = [] - for _ in range(self.nruns): + for i in range(1, self.nruns+1): start_time = time.time() _ = self.ort_session.run(None, {"input": input_data}) end_time = time.time() timings.append(end_time - start_time) + if i % 10 == 0: + print( + f"Iteration {i}/{self.nruns}, ave batch time {np.mean(timings) * 1000:.2f} ms" + ) + avg_time = np.mean(timings) * 1000 logging.info(f"Average ONNX inference time: {avg_time:.2f} ms") return avg_time @@ -185,11 +190,16 @@ def run(self): # Benchmarking total_time = 0 - for _ in range(self.num_runs): + for i in range(1, self.num_runs+1): start_time = time.time() _ = self.inference(self.dummy_input) total_time += time.time() - start_time + if i % 10 == 0: + print( + f"Iteration {i}/{self.nruns}, ave batch time {total_time / self.num_runs * 1000:.2f} ms" + ) + avg_time = total_time / self.num_runs logging.info(f"Average inference time: {avg_time * 1000:.2f} ms") return avg_time * 1000 From 5991cddce56d1b60096cb6da7300be834b0ccb25 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 23:04:52 +0300 Subject: [PATCH 25/29] Fixed imports and refactored --- README.md | 2 +- src/benchmark_class.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 533e9fc..d6d8140 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ python main.py [--mode all] python main.py --topk 3 --mode=ov ``` -This command will run predictions on the default image (`./inference/cat3.jpg`), show the top 3 predictions, and run OpenVINO model. At the end results plot will be saved to `./inference/plot.png` +This command will run predictions on the default image (`./inference/cat3.jpg`), show the top 3 predictions, and run OpenVINO model. Note: plot created only for `--mode=all` and results plotted and saved to `./inference/plot.png` ## RESULTS ### Inference Benchmark Results diff --git a/src/benchmark_class.py b/src/benchmark_class.py index 6465299..47e8069 100644 --- a/src/benchmark_class.py +++ b/src/benchmark_class.py @@ -156,8 +156,8 @@ def __init__( self.core = ov.Core() self.compiled_model = None self.input_shape = input_shape - self.warmup_runs = 50 - self.num_runs = 100 + self.nwarmup = 50 + self.nruns = 100 self.dummy_input = np.random.randn(*input_shape).astype(np.float32) def warmup(self): @@ -185,21 +185,21 @@ def run(self): """ # Warm-up runs logging.info("Warming up ...") - for _ in range(self.warmup_runs): + for _ in range(self.nwarmup): self.warmup() # Benchmarking total_time = 0 - for i in range(1, self.num_runs+1): + for i in range(1, self.nruns+1): start_time = time.time() _ = self.inference(self.dummy_input) total_time += time.time() - start_time if i % 10 == 0: print( - f"Iteration {i}/{self.nruns}, ave batch time {total_time / self.num_runs * 1000:.2f} ms" + f"Iteration {i}/{self.nruns}, ave batch time {total_time / self.nruns * 1000:.2f} ms" ) - avg_time = total_time / self.num_runs + avg_time = total_time / self.nruns logging.info(f"Average inference time: {avg_time * 1000:.2f} ms") return avg_time * 1000 From bf4e322cd63cbea4d7a5dfbc2a8d39339e6c5067 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 23:31:43 +0300 Subject: [PATCH 26/29] Fixed imports and refactored --- README.md | 10 ++++++++++ common/utils.py | 11 ++++++++--- main.py | 6 +++++- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index d6d8140..7feb195 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ 3. [RESULTS](#results) ![Static Badge](https://img.shields.io/badge/update-orange) - [Results explanation](#results-explanation) - [Example Input](#example-input) + - [Example prediction results](#example-prediction-results) 4. [Benchmark Implementation Details](#benchmark-implementation-details) ![New](https://img.shields.io/badge/-New-842E5B) - [PyTorch CPU & CUDA](#pytorch-cpu--cuda) - [TensorRT FP32 & FP16](#tensorrt-fp32--fp16) @@ -76,6 +77,15 @@ Here is an example of the input image to run predictions and benchmarks on: +### Example prediction results +``` +#1: 15% Egyptian cat +#2: 14% tiger cat +#3: 9% tabby +#4: 2% doormat +#5: 2% lynx +``` + ## Benchmark Implementation Details Here you can see the flow for each model and benchmark. diff --git a/common/utils.py b/common/utils.py index cd3e120..495d17b 100644 --- a/common/utils.py +++ b/common/utils.py @@ -7,12 +7,17 @@ import onnxruntime as ort -# Model Initialization Functions -def init_onnx_model( +def export_onnx_model( onnx_path: str, model_loader: ModelLoader, device: torch.device -) -> ort.InferenceSession: +) -> None: onnx_exporter = ONNXExporter(model_loader.model, device, onnx_path) onnx_exporter.export_model() + + +def init_onnx_model( + onnx_path: str, model_loader: ModelLoader, device: torch.device +) -> ort.InferenceSession: + export_onnx_model(model_loader, device, onnx_path) return ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"]) diff --git a/main.py b/main.py index efc7595..9ab5069 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,6 @@ import logging +import os.path + import torch_tensorrt from benchmark.benchmark_models import benchmark_onnx_model, benchmark_ov_model @@ -7,7 +9,7 @@ parse_arguments, init_onnx_model, init_ov_model, - init_cuda_model, + init_cuda_model, export_onnx_model, ) from src.image_processor import ImageProcessor from prediction.prediction_models import * @@ -42,6 +44,8 @@ def main(): if args.mode in ["ov", "all"]: ov_model = init_ov_model(args.onnx_path) if args.mode != "all": + if not os.path.isfile(args.onnx_path): + export_onnx_model(model_loader, device, args.onnx_path) ov_benchmark = benchmark_ov_model(ov_model) predict_ov_model(ov_benchmark.compiled_model, img_batch, args.topk, model_loader.categories) From 318f0058d6eba2327d69d44b6852c62d610bae62 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 23:33:27 +0300 Subject: [PATCH 27/29] Fixed imports and refactored --- main.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 9ab5069..be90d4b 100644 --- a/main.py +++ b/main.py @@ -42,10 +42,11 @@ def main(): # OpenVINO if args.mode in ["ov", "all"]: + # If ONNX model wasn't exporter previously - export it + if not os.path.isfile(args.onnx_path): + export_onnx_model(model_loader, device, args.onnx_path) ov_model = init_ov_model(args.onnx_path) if args.mode != "all": - if not os.path.isfile(args.onnx_path): - export_onnx_model(model_loader, device, args.onnx_path) ov_benchmark = benchmark_ov_model(ov_model) predict_ov_model(ov_benchmark.compiled_model, img_batch, args.topk, model_loader.categories) From 9da80631a7eaa3202a588f6d42d326c20a287bca Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 23:36:28 +0300 Subject: [PATCH 28/29] Fixed imports and refactored --- common/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/utils.py b/common/utils.py index 495d17b..8cadf76 100644 --- a/common/utils.py +++ b/common/utils.py @@ -17,7 +17,7 @@ def export_onnx_model( def init_onnx_model( onnx_path: str, model_loader: ModelLoader, device: torch.device ) -> ort.InferenceSession: - export_onnx_model(model_loader, device, onnx_path) + export_onnx_model(onnx_path=onnx_path, model_loader=model_loader, device=device) return ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"]) From e0b70180ef8280732c1c9952c7f5fbee2a8ab7b8 Mon Sep 17 00:00:00 2001 From: Dima Birenbaum Date: Fri, 6 Oct 2023 23:38:50 +0300 Subject: [PATCH 29/29] Fixed imports and refactored --- main.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index be90d4b..7e34521 100644 --- a/main.py +++ b/main.py @@ -42,9 +42,10 @@ def main(): # OpenVINO if args.mode in ["ov", "all"]: - # If ONNX model wasn't exporter previously - export it + # Check if ONNX model wasn't exported previously if not os.path.isfile(args.onnx_path): - export_onnx_model(model_loader, device, args.onnx_path) + export_onnx_model(onnx_path=args.onnx_path, model_loader=model_loader, device=device) + ov_model = init_ov_model(args.onnx_path) if args.mode != "all": ov_benchmark = benchmark_ov_model(ov_model)