Gemfury

ekyc / onnxruntime-gpu python

Repository URL to install this package:
Details
onnxruntime-gpu / tools / convert_onnx_models_to_ort.py
#!/usr/bin/env python3
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

from __future__ import annotations

import argparse
import contextlib
import enum
import os
import pathlib
import tempfile

import onnxruntime as ort

from .file_utils import files_from_file_or_dir, path_match_suffix_ignore_case
from .onnx_model_utils import get_optimization_level
from .ort_format_model import create_config_from_models


class OptimizationStyle(enum.Enum):
    Fixed = 0
    Runtime = 1


def _optimization_suffix(optimization_level_str: str, optimization_style: OptimizationStyle, suffix: str):
    return "{}{}{}".format(
        f".{optimization_level_str}" if optimization_level_str != "all" else "",
        ".with_runtime_opt" if optimization_style == OptimizationStyle.Runtime else "",
        suffix,
    )


def _create_config_file_path(
    model_path_or_dir: pathlib.Path,
    output_dir: pathlib.Path | None,
    optimization_level_str: str,
    optimization_style: OptimizationStyle,
    enable_type_reduction: bool,
):
    config_name = "{}{}".format(
        "required_operators_and_types" if enable_type_reduction else "required_operators",
        _optimization_suffix(optimization_level_str, optimization_style, ".config"),
    )

    if model_path_or_dir.is_dir():
        return (output_dir or model_path_or_dir) / config_name

    model_config_path = model_path_or_dir.with_suffix(f".{config_name}")

    if output_dir is not None:
        return output_dir / model_config_path.name

    return model_config_path


def _create_session_options(
    optimization_level: ort.GraphOptimizationLevel,
    output_model_path: pathlib.Path,
    custom_op_library: pathlib.Path,
    session_options_config_entries: dict[str, str],
):
    so = ort.SessionOptions()
    so.optimized_model_filepath = str(output_model_path)
    so.graph_optimization_level = optimization_level

    if custom_op_library:
        so.register_custom_ops_library(str(custom_op_library))

    for key, value in session_options_config_entries.items():
        so.add_session_config_entry(key, value)

    return so


def _convert(
    model_path_or_dir: pathlib.Path,
    output_dir: pathlib.Path | None,
    optimization_level_str: str,
    optimization_style: OptimizationStyle,
    custom_op_library: pathlib.Path,
    create_optimized_onnx_model: bool,
    allow_conversion_failures: bool,
    target_platform: str,
    session_options_config_entries: dict[str, str],
) -> list[pathlib.Path]:
    model_dir = model_path_or_dir if model_path_or_dir.is_dir() else model_path_or_dir.parent
    output_dir = output_dir or model_dir

    optimization_level = get_optimization_level(optimization_level_str)

    def is_model_file_to_convert(file_path: pathlib.Path):
        if not path_match_suffix_ignore_case(file_path, ".onnx"):
            return False
        # ignore any files with an extension of .optimized.onnx which are presumably from previous executions
        # of this script
        if path_match_suffix_ignore_case(file_path, ".optimized.onnx"):
            print(f"Ignoring '{file_path}'")
            return False
        return True

    models = files_from_file_or_dir(model_path_or_dir, is_model_file_to_convert)

    if len(models) == 0:
        raise ValueError(f"No model files were found in '{model_path_or_dir}'")

    providers = ["CPUExecutionProvider"]

    # if the optimization level is greater than or equal to 'layout' we manually exclude the NCHWc transformer.
    # It's not applicable to ARM devices, and creates a device specific model which won't run on all hardware.
    # If someone really really really wants to run it they could manually create an optimized onnx model first,
    # or they could comment out this code.
    optimizer_filter = None
    if (
        (optimization_level == ort.GraphOptimizationLevel.ORT_ENABLE_ALL)
        or (optimization_level == ort.GraphOptimizationLevel.ORT_ENABLE_LAYOUT)
    ) and target_platform != "amd64":
        optimizer_filter = ["NchwcTransformer"]

    converted_models = []

    for model in models:
        try:
            relative_model_path = model.relative_to(model_dir)

            (output_dir / relative_model_path).parent.mkdir(parents=True, exist_ok=True)

            ort_target_path = (output_dir / relative_model_path).with_suffix(
                _optimization_suffix(optimization_level_str, optimization_style, ".ort")
            )

            if create_optimized_onnx_model:
                # Create an ONNX file with the same optimization level that will be used for the ORT format file.
                # This allows the ONNX equivalent of the ORT format model to be easily viewed in Netron.
                # If runtime optimizations are saved in the ORT format model, there may be some difference in the
                # graphs at runtime between the ORT format model and this saved ONNX model.
                optimized_target_path = (output_dir / relative_model_path).with_suffix(
                    _optimization_suffix(optimization_level_str, optimization_style, ".optimized.onnx")
                )
                so = _create_session_options(
                    optimization_level, optimized_target_path, custom_op_library, session_options_config_entries
                )
                if optimization_style == OptimizationStyle.Runtime:
                    # Limit the optimizations to those that can run in a model with runtime optimizations.
                    so.add_session_config_entry("optimization.minimal_build_optimizations", "apply")

                print(f"Saving optimized ONNX model {model} to {optimized_target_path}")
                _ = ort.InferenceSession(
                    str(model), sess_options=so, providers=providers, disabled_optimizers=optimizer_filter
                )

            # Load ONNX model, optimize, and save to ORT format
            so = _create_session_options(
                optimization_level, ort_target_path, custom_op_library, session_options_config_entries
            )
            so.add_session_config_entry("session.save_model_format", "ORT")
            if optimization_style == OptimizationStyle.Runtime:
                so.add_session_config_entry("optimization.minimal_build_optimizations", "save")

            print(f"Converting optimized ONNX model {model} to ORT format model {ort_target_path}")
            _ = ort.InferenceSession(
                str(model), sess_options=so, providers=providers, disabled_optimizers=optimizer_filter
            )

            converted_models.append(ort_target_path)

            # orig_size = os.path.getsize(onnx_target_path)
            # new_size = os.path.getsize(ort_target_path)
            # print("Serialized {} to {}. Sizes: orig={} new={} diff={} new:old={:.4f}:1.0".format(
            #     onnx_target_path, ort_target_path, orig_size, new_size, new_size - orig_size, new_size / orig_size))
        except Exception as e:
            print(f"Error converting {model}: {e}")
            if not allow_conversion_failures:
                raise

    print(f"Converted {len(converted_models)}/{len(models)} models successfully.")

    return converted_models


def parse_args():
    parser = argparse.ArgumentParser(
        os.path.basename(__file__),
        description="""Convert the ONNX format model/s in the provided directory to ORT format models.
        All files with a `.onnx` extension will be processed. For each one, an ORT format model will be created in the
        given output directory, if specified, or the same directory.
        A configuration file will also be created containing the list of required operators for all
        converted models. This configuration file should be used as input to the minimal build via the
        `--include_ops_by_config` parameter.
        """,
    )

    parser.add_argument(
        "--output_dir",
        type=pathlib.Path,
        help="Provide an output directory for the converted model/s and configuration file. "
        "If unspecified, the converted ORT format model/s will be in the same directory as the ONNX model/s.",
    )

    parser.add_argument(
        "--optimization_style",
        nargs="+",
        default=[OptimizationStyle.Fixed.name, OptimizationStyle.Runtime.name],
        choices=[e.name for e in OptimizationStyle],
        help="Style of optimization to perform on the ORT format model. "
        "Multiple values may be provided. The conversion will run once for each value. "
        "The general guidance is to use models optimized with "
        f"'{OptimizationStyle.Runtime.name}' style when using NNAPI or CoreML and "
        f"'{OptimizationStyle.Fixed.name}' style otherwise. "
        f"'{OptimizationStyle.Fixed.name}': Run optimizations directly before saving the ORT "
        "format model. This bakes in any platform-specific optimizations. "
        f"'{OptimizationStyle.Runtime.name}': Run basic optimizations directly and save certain "
        "other optimizations to be applied at runtime if possible. This is useful when using a "
        "compiling EP like NNAPI or CoreML that may run an unknown (at model conversion time) "
        "number of nodes. The saved optimizations can further optimize nodes not assigned to the "
        "compiling EP at runtime.",
    )

    parser.add_argument(
        "--enable_type_reduction",
        action="store_true",
        help="Add operator specific type information to the configuration file to potentially reduce "
        "the types supported by individual operator implementations.",
    )

    parser.add_argument(
        "--custom_op_library",
        type=pathlib.Path,
        default=None,
        help="Provide path to shared library containing custom operator kernels to register.",
    )

    parser.add_argument(
        "--save_optimized_onnx_model",
        action="store_true",
        help="Save the optimized version of each ONNX model. "
        "This will have the same level of optimizations applied as the ORT format model.",
    )

    parser.add_argument(
        "--allow_conversion_failures",
        action="store_true",
        help="Whether to proceed after encountering model conversion failures.",
    )

    parser.add_argument(
        "--target_platform",
        type=str,
        default=None,
        choices=["arm", "amd64"],
        help="Specify the target platform where the exported model will be used. "
        "This parameter can be used to choose between platform-specific options, "
        "such as QDQIsInt8Allowed(arm), NCHWc (amd64) and NHWC (arm/amd64) format, different "
        "optimizer level options, etc.",
    )

    parser.add_argument(
        "model_path_or_dir",
        type=pathlib.Path,
        help="Provide path to ONNX model or directory containing ONNX model/s to convert. "
        "All files with a .onnx extension, including those in subdirectories, will be "
        "processed.",
    )

    parsed_args = parser.parse_args()
    parsed_args.optimization_style = [OptimizationStyle[style_str] for style_str in parsed_args.optimization_style]
    return parsed_args


def convert_onnx_models_to_ort(
    model_path_or_dir: pathlib.Path,
    output_dir: pathlib.Path | None = None,
    optimization_styles: list[OptimizationStyle] | None = None,
    custom_op_library_path: pathlib.Path | None = None,
    target_platform: str | None = None,
    save_optimized_onnx_model: bool = False,
    allow_conversion_failures: bool = False,
    enable_type_reduction: bool = False,
):
    if output_dir is not None:
        if not output_dir.is_dir():
            output_dir.mkdir(parents=True)
        output_dir = output_dir.resolve(strict=True)

    optimization_styles = optimization_styles or []

    # setting optimization level is not expected to be needed by typical users, but it can be set with this
    # environment variable
    optimization_level_str = os.getenv("ORT_CONVERT_ONNX_MODELS_TO_ORT_OPTIMIZATION_LEVEL", "all")
    model_path_or_dir = model_path_or_dir.resolve()
    custom_op_library = custom_op_library_path.resolve() if custom_op_library_path else None

    if not model_path_or_dir.is_dir() and not model_path_or_dir.is_file():
        raise FileNotFoundError(f"Model path '{model_path_or_dir}' is not a file or directory.")

    if custom_op_library and not custom_op_library.is_file():
        raise FileNotFoundError(f"Unable to find custom operator library '{custom_op_library}'")

    session_options_config_entries = {}

    if target_platform is not None and target_platform == "arm":
        session_options_config_entries["session.qdqisint8allowed"] = "1"
    else:
        session_options_config_entries["session.qdqisint8allowed"] = "0"

    for optimization_style in optimization_styles:
        print(
            f"Converting models with optimization style '{optimization_style.name}' and level '{optimization_level_str}'"
        )

        converted_models = _convert(
            model_path_or_dir=model_path_or_dir,
            output_dir=output_dir,
            optimization_level_str=optimization_level_str,
            optimization_style=optimization_style,
            custom_op_library=custom_op_library,
            create_optimized_onnx_model=save_optimized_onnx_model,
            allow_conversion_failures=allow_conversion_failures,
            target_platform=target_platform,
            session_options_config_entries=session_options_config_entries,
        )

        with contextlib.ExitStack() as context_stack:
            if optimization_style == OptimizationStyle.Runtime:
                # Convert models again without runtime optimizations.
                # Runtime optimizations may not end up being applied, so we need to use both converted models with and
                # without runtime optimizations to get a complete set of ops that may be needed for the config file.
                model_dir = model_path_or_dir if model_path_or_dir.is_dir() else model_path_or_dir.parent
                temp_output_dir = context_stack.enter_context(
                    tempfile.TemporaryDirectory(dir=model_dir, suffix=".without_runtime_opt")
                )
                session_options_config_entries_for_second_conversion = session_options_config_entries.copy()
                # Limit the optimizations to those that can run in a model with runtime optimizations.
                session_options_config_entries_for_second_conversion["optimization.minimal_build_optimizations"] = (
                    "apply"
                )

                print(
                    "Converting models again without runtime optimizations to generate a complete config file. "
                    "These converted models are temporary and will be deleted."
                )
                converted_models += _convert(
                    model_path_or_dir=model_path_or_dir,
                    output_dir=temp_output_dir,
                    optimization_level_str=optimization_level_str,
                    optimization_style=OptimizationStyle.Fixed,
                    custom_op_library=custom_op_library,
                    create_optimized_onnx_model=False,  # not useful as they would be created in a temp directory
                    allow_conversion_failures=allow_conversion_failures,
                    target_platform=target_platform,
                    session_options_config_entries=session_options_config_entries_for_second_conversion,
                )

            print(
                f"Generating config file from ORT format models with optimization style '{optimization_style.name}' and level '{optimization_level_str}'"
            )

            config_file = _create_config_file_path(
                model_path_or_dir,
                output_dir,
                optimization_level_str,
                optimization_style,
                enable_type_reduction,
            )

            create_config_from_models(converted_models, config_file, enable_type_reduction)


if __name__ == "__main__":
    args = parse_args()
    convert_onnx_models_to_ort(
        args.model_path_or_dir,
        output_dir=args.output_dir,
        optimization_styles=args.optimization_style,
        custom_op_library_path=args.custom_op_library,
        target_platform=args.target_platform,
        save_optimized_onnx_model=args.save_optimized_onnx_model,
        allow_conversion_failures=args.allow_conversion_failures,
        enable_type_reduction=args.enable_type_reduction,
    )
ekyc / onnxruntime-gpu python

Products

About

Resources

Contact Gemfury