Savely Krasovsky 2025-12-18 02:23:36 -05:00 committed by GitHub
commit 27655a3959
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 744 additions and 1190 deletions

View File

@ -8,6 +8,9 @@ indent_size = 2
insert_final_newline = true insert_final_newline = true
trim_trailing_whitespace = true trim_trailing_whitespace = true
[*.py]
indent_size = 4
[*.{ts,js}] [*.{ts,js}]
quote_type = single quote_type = single

View File

@ -574,9 +574,9 @@ jobs:
uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4 uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
# TODO: add caching when supported (https://github.com/actions/setup-python/pull/818) # TODO: add caching when supported (https://github.com/actions/setup-python/pull/818)
# with: with:
# python-version: 3.11 python-version: 3.11
# cache: 'uv' #cache: 'uv'
- name: Install dependencies - name: Install dependencies
run: | run: |
uv sync --extra cpu uv sync --extra cpu

View File

@ -174,6 +174,7 @@ Redis (Sentinel) URL example JSON before encoding:
| `MACHINE_LEARNING_RKNN_THREADS` | How many threads of RKNN runtime should be spun up while inferencing. | `1` | machine learning | | `MACHINE_LEARNING_RKNN_THREADS` | How many threads of RKNN runtime should be spun up while inferencing. | `1` | machine learning |
| `MACHINE_LEARNING_MODEL_ARENA` | Pre-allocates CPU memory to avoid memory fragmentation | true | machine learning | | `MACHINE_LEARNING_MODEL_ARENA` | Pre-allocates CPU memory to avoid memory fragmentation | true | machine learning |
| `MACHINE_LEARNING_OPENVINO_PRECISION` | If set to FP16, uses half-precision floating-point operations for faster inference with reduced accuracy (one of [`FP16`, `FP32`], applies only to OpenVINO) | `FP32` | machine learning | | `MACHINE_LEARNING_OPENVINO_PRECISION` | If set to FP16, uses half-precision floating-point operations for faster inference with reduced accuracy (one of [`FP16`, `FP32`], applies only to OpenVINO) | `FP32` | machine learning |
| `MACHINE_LEARNING_OPENVINO_CACHE_CAPACITY` | The max number of image dimensions for which models have cached optimizations | `20` | machine learning |
\*1: It is recommended to begin with this parameter when changing the concurrency levels of the machine learning service and then tune the other ones. \*1: It is recommended to begin with this parameter when changing the concurrency levels of the machine learning service and then tune the other ones.

View File

@ -2,7 +2,7 @@ ARG DEVICE=cpu
FROM python:3.11-bookworm@sha256:e39286476f84ffedf7c3564b0b74e32c9e1193ec9ca32ee8a11f8c09dbf6aafe AS builder-cpu FROM python:3.11-bookworm@sha256:e39286476f84ffedf7c3564b0b74e32c9e1193ec9ca32ee8a11f8c09dbf6aafe AS builder-cpu
FROM builder-cpu AS builder-openvino FROM python:3.13-slim-trixie@sha256:0222b795db95bf7412cede36ab46a266cfb31f632e64051aac9806dabf840a61 AS builder-openvino
FROM builder-cpu AS builder-cuda FROM builder-cpu AS builder-cuda
@ -22,20 +22,18 @@ FROM builder-cpu AS builder-rknn
# Warning: 25GiB+ disk space required to pull this image # Warning: 25GiB+ disk space required to pull this image
# TODO: find a way to reduce the image size # TODO: find a way to reduce the image size
FROM rocm/dev-ubuntu-22.04:6.4.3-complete@sha256:6cda50e312f3aac068cea9ec06c560ca1f522ad546bc8b3d2cf06da0fe8e8a76 AS builder-rocm FROM rocm/dev-ubuntu-24.04:6.4.4-complete@sha256:31418ac10a3769a71eaef330c07280d1d999d7074621339b8f93c484c35f6078 AS builder-rocm
# renovate: datasource=github-releases depName=Microsoft/onnxruntime # renovate: datasource=github-releases depName=Microsoft/onnxruntime
ARG ONNXRUNTIME_VERSION="v1.22.1" ARG ONNXRUNTIME_VERSION="v1.22.1"
WORKDIR /code WORKDIR /code
RUN apt-get update && apt-get install -y --no-install-recommends wget git python3.10-venv RUN apt-get update && apt-get install -y --no-install-recommends wget git
RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.30.1/cmake-3.30.1-linux-x86_64.sh && \ RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.31.9/cmake-3.31.9-linux-x86_64.sh && \
chmod +x cmake-3.30.1-linux-x86_64.sh && \ chmod +x cmake-3.31.9-linux-x86_64.sh && \
mkdir -p /code/cmake-3.30.1-linux-x86_64 && \ mkdir -p /code/cmake-3.31.9-linux-x86_64 && \
./cmake-3.30.1-linux-x86_64.sh --skip-license --prefix=/code/cmake-3.30.1-linux-x86_64 && \ ./cmake-3.31.9-linux-x86_64.sh --skip-license --prefix=/code/cmake-3.31.9-linux-x86_64 && \
rm cmake-3.30.1-linux-x86_64.sh rm cmake-3.31.9-linux-x86_64.sh
ENV PATH=/code/cmake-3.30.1-linux-x86_64/bin:${PATH}
RUN git clone --single-branch --branch "${ONNXRUNTIME_VERSION}" --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime RUN git clone --single-branch --branch "${ONNXRUNTIME_VERSION}" --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime
WORKDIR /code/onnxruntime WORKDIR /code/onnxruntime
@ -45,9 +43,26 @@ COPY ./patches/* /tmp/
RUN git apply /tmp/*.patch RUN git apply /tmp/*.patch
RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh
ENV PATH=/opt/rocm-venv/bin:/code/cmake-3.31.9-linux-x86_64/bin:${PATH}
ENV CCACHE_DIR="/ccache"
# Note: the `parallel` setting uses a substantial amount of RAM # Note: the `parallel` setting uses a substantial amount of RAM
RUN ./build.sh --allow_running_as_root --config Release --build_wheel --update --build --parallel 17 --cmake_extra_defines\ RUN --mount=type=cache,target=/ccache \
ONNXRUNTIME_VERSION="${ONNXRUNTIME_VERSION}" --skip_tests --use_rocm --rocm_home=/opt/rocm ./build.sh \
--allow_running_as_root \
--config Release \
--build_wheel \
--update \
--build \
--parallel 17 \
--cmake_extra_defines \
ONNXRUNTIME_VERSION="${ONNXRUNTIME_VERSION}" \
CMAKE_HIP_ARCHITECTURES="gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" \
--skip_tests \
--use_rocm \
--rocm_home=/opt/rocm \
--use_cache \
--compile_no_warning_as_error
RUN mv /code/onnxruntime/build/Linux/Release/dist/*.whl /opt/ RUN mv /code/onnxruntime/build/Linux/Release/dist/*.whl /opt/
FROM builder-${DEVICE} AS builder FROM builder-${DEVICE} AS builder
@ -73,15 +88,18 @@ FROM python:3.11-slim-bookworm@sha256:2c5bc243b1cc47985ee4fb768bb0bbd4490481c5d0
ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \ ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
MACHINE_LEARNING_MODEL_ARENA=false MACHINE_LEARNING_MODEL_ARENA=false
FROM python:3.11-slim-bookworm@sha256:2c5bc243b1cc47985ee4fb768bb0bbd4490481c5d0897a62da31b7f30b7304a7 AS prod-openvino FROM python:3.13-slim-trixie@sha256:0222b795db95bf7412cede36ab46a266cfb31f632e64051aac9806dabf840a61 AS prod-openvino
RUN apt-get update && \ RUN apt-get update && \
apt-get install --no-install-recommends -yqq ocl-icd-libopencl1 wget && \ apt-get install --no-install-recommends -yqq ocl-icd-libopencl1 wget && \
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17384.11/intel-igc-core_1.0.17384.11_amd64.deb && \ wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/v2.24.8/intel-igc-core-2_2.24.8+20344_amd64.deb && \
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17384.11/intel-igc-opencl_1.0.17384.11_amd64.deb && \ wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/v2.24.8/intel-igc-opencl-2_2.24.8+20344_amd64.deb && \
wget -nv https://github.com/intel/compute-runtime/releases/download/24.31.30508.7/intel-opencl-icd_24.31.30508.7_amd64.deb && \ wget -nv https://github.com/intel/compute-runtime/releases/download/25.48.36300.8/intel-opencl-icd_25.48.36300.8-0_amd64.deb && \
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17537.24/intel-igc-core_1.0.17537.24_amd64.deb && \
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17537.24/intel-igc-opencl_1.0.17537.24_amd64.deb && \
wget -nv https://github.com/intel/compute-runtime/releases/download/24.35.30872.36/intel-opencl-icd-legacy1_24.35.30872.36_amd64.deb && \
# TODO: Figure out how to get renovate to manage this differently versioned libigdgmm file # TODO: Figure out how to get renovate to manage this differently versioned libigdgmm file
wget -nv https://github.com/intel/compute-runtime/releases/download/24.31.30508.7/libigdgmm12_22.4.1_amd64.deb && \ wget -nv https://github.com/intel/compute-runtime/releases/download/25.48.36300.8/libigdgmm12_22.8.2_amd64.deb && \
dpkg -i *.deb && \ dpkg -i *.deb && \
rm *.deb && \ rm *.deb && \
apt-get remove wget -yqq && \ apt-get remove wget -yqq && \
@ -102,7 +120,7 @@ COPY --from=builder-cuda /usr/local/bin/python3 /usr/local/bin/python3
COPY --from=builder-cuda /usr/local/lib/python3.11 /usr/local/lib/python3.11 COPY --from=builder-cuda /usr/local/lib/python3.11 /usr/local/lib/python3.11
COPY --from=builder-cuda /usr/local/lib/libpython3.11.so /usr/local/lib/libpython3.11.so COPY --from=builder-cuda /usr/local/lib/libpython3.11.so /usr/local/lib/libpython3.11.so
FROM rocm/dev-ubuntu-22.04:6.4.3-complete@sha256:6cda50e312f3aac068cea9ec06c560ca1f522ad546bc8b3d2cf06da0fe8e8a76 AS prod-rocm FROM rocm/dev-ubuntu-24.04:6.4.4-complete@sha256:31418ac10a3769a71eaef330c07280d1d999d7074621339b8f93c484c35f6078 AS prod-rocm
FROM prod-cpu AS prod-armnn FROM prod-cpu AS prod-armnn

View File

@ -79,6 +79,7 @@ class Settings(BaseSettings):
preload: PreloadModelData | None = None preload: PreloadModelData | None = None
max_batch_size: MaxBatchSize | None = None max_batch_size: MaxBatchSize | None = None
openvino_precision: ModelPrecision = ModelPrecision.FP32 openvino_precision: ModelPrecision = ModelPrecision.FP32
openvino_cache_capacity: int = 20
@property @property
def device_id(self) -> str: def device_id(self) -> str:

View File

@ -5,6 +5,7 @@ from typing import Any
import numpy as np import numpy as np
import onnxruntime as ort import onnxruntime as ort
import orjson
from numpy.typing import NDArray from numpy.typing import NDArray
from immich_ml.models.constants import SUPPORTED_PROVIDERS from immich_ml.models.constants import SUPPORTED_PROVIDERS
@ -99,6 +100,11 @@ class OrtSession:
"device_type": device, "device_type": device,
"precision": settings.openvino_precision.value, "precision": settings.openvino_precision.value,
"cache_dir": openvino_dir.as_posix(), "cache_dir": openvino_dir.as_posix(),
"load_config": orjson.dumps(
{
"CPU": {"CPU_RUNTIME_CACHE_CAPACITY": str(settings.openvino_cache_capacity)},
}
).decode(),
} }
case "CoreMLExecutionProvider": case "CoreMLExecutionProvider":
options = { options = {

View File

@ -0,0 +1,33 @@
diff --git a/dockerfiles/scripts/install_common_deps.sh b/dockerfiles/scripts/install_common_deps.sh
index bbb672a99e..0dc652fbda 100644
--- a/dockerfiles/scripts/install_common_deps.sh
+++ b/dockerfiles/scripts/install_common_deps.sh
@@ -8,16 +8,23 @@ apt-get update && apt-get install -y --no-install-recommends \
curl \
libcurl4-openssl-dev \
libssl-dev \
- python3-dev
+ python3-dev \
+ ccache
# Dependencies: conda
-wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh -O ~/miniconda.sh --no-check-certificate && /bin/bash ~/miniconda.sh -b -p /opt/miniconda
+wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py312_25.9.1-1-Linux-x86_64.sh -O ~/miniconda.sh && /bin/bash ~/miniconda.sh -b -p /opt/miniconda
rm ~/miniconda.sh
/opt/miniconda/bin/conda clean -ya
-pip install numpy
-pip install packaging
-pip install "wheel>=0.35.1"
+# Dependencies: venv and packages
+/opt/miniconda/bin/python3 -m venv /opt/rocm-venv
+/opt/rocm-venv/bin/pip install --no-cache-dir --upgrade pip
+/opt/rocm-venv/bin/pip install --no-cache-dir \
+ "numpy==2.3.4" \
+ "packaging==25.0" \
+ "wheel==0.45.1" \
+ "setuptools==80.9.0"
+
rm -rf /opt/miniconda/pkgs
# Dependencies: cmake

View File

@ -1,13 +0,0 @@
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index 2714e6f59..a69da76b4 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -338,7 +338,7 @@ if (onnxruntime_USE_ROCM)
if (ROCM_VERSION_DEV VERSION_LESS "6.2")
message(FATAL_ERROR "CMAKE_HIP_ARCHITECTURES is not set when ROCm version < 6.2")
else()
- set(CMAKE_HIP_ARCHITECTURES "gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942;gfx1200;gfx1201")
+ set(CMAKE_HIP_ARCHITECTURES "gfx900;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx1102;gfx940;gfx941;gfx942;gfx1200;gfx1201")
endif()
endif()

View File

@ -3,7 +3,7 @@ name = "immich-ml"
version = "2.4.0" version = "2.4.0"
description = "" description = ""
authors = [{ name = "Hau Tran", email = "alex.tran1502@gmail.com" }] authors = [{ name = "Hau Tran", email = "alex.tran1502@gmail.com" }]
requires-python = ">=3.10,<4.0" requires-python = ">=3.11,<4"
readme = "README.md" readme = "README.md"
dependencies = [ dependencies = [
"aiocache>=0.12.1,<1.0", "aiocache>=0.12.1,<1.0",
@ -12,7 +12,7 @@ dependencies = [
"gunicorn>=21.1.0", "gunicorn>=21.1.0",
"huggingface-hub>=0.20.1,<1.0", "huggingface-hub>=0.20.1,<1.0",
"insightface>=0.7.3,<1.0", "insightface>=0.7.3,<1.0",
"numpy<2", "numpy>=2.3.4",
"opencv-python-headless>=4.7.0.72,<5.0", "opencv-python-headless>=4.7.0.72,<5.0",
"orjson>=3.9.5", "orjson>=3.9.5",
"pillow>=9.5.0,<11.0", "pillow>=9.5.0,<11.0",
@ -49,24 +49,16 @@ lint = [
dev = ["locust>=2.15.1", { include-group = "test" }, { include-group = "lint" }] dev = ["locust>=2.15.1", { include-group = "test" }, { include-group = "lint" }]
[project.optional-dependencies] [project.optional-dependencies]
cpu = ["onnxruntime>=1.15.0,<2"] cpu = ["onnxruntime>=1.23.2,<2"]
cuda = ["onnxruntime-gpu>=1.17.0,<2"] cuda = ["onnxruntime-gpu>=1.23.2,<2"]
openvino = ["onnxruntime-openvino>=1.17.1,<1.19.0"] openvino = ["onnxruntime-openvino>=1.23.0,<2"]
armnn = ["onnxruntime>=1.15.0,<2"] armnn = ["onnxruntime>=1.23.2,<2"]
rknn = ["onnxruntime>=1.15.0,<2", "rknn-toolkit-lite2>=2.3.0,<3"] rknn = ["onnxruntime>=1.23.2,<2", "rknn-toolkit-lite2>=2.3.0,<3"]
rocm = [] rocm = []
[tool.uv] [tool.uv]
compile-bytecode = true compile-bytecode = true
[[tool.uv.index]]
name = "cuda12"
url = "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/"
explicit = true
[tool.uv.sources]
onnxruntime-gpu = { index = "cuda12" }
[tool.hatch.build.targets.sdist] [tool.hatch.build.targets.sdist]
include = ["immich_ml"] include = ["immich_ml"]

View File

@ -249,13 +249,15 @@ class TestOrtSession:
"device_type": "GPU.0", "device_type": "GPU.0",
"precision": "FP32", "precision": "FP32",
"cache_dir": "/cache/ViT-B-32__openai/textual/openvino", "cache_dir": "/cache/ViT-B-32__openai/textual/openvino",
"load_config": "{\"CPU\":{\"CPU_RUNTIME_CACHE_CAPACITY\":\"20\"}}",
}, },
{"arena_extend_strategy": "kSameAsRequested"}, {"arena_extend_strategy": "kSameAsRequested"},
] ]
def test_sets_provider_options_for_openvino(self) -> None: def test_sets_provider_options_for_openvino(self, mocker: MockerFixture) -> None:
model_path = "/cache/ViT-B-32__openai/textual/model.onnx" model_path = "/cache/ViT-B-32__openai/textual/model.onnx"
os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1" os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1"
mocker.patch.object(settings, "openvino_cache_capacity", 10)
session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"]) session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"])
@ -264,6 +266,7 @@ class TestOrtSession:
"device_type": "GPU.1", "device_type": "GPU.1",
"precision": "FP32", "precision": "FP32",
"cache_dir": "/cache/ViT-B-32__openai/textual/openvino", "cache_dir": "/cache/ViT-B-32__openai/textual/openvino",
"load_config": "{\"CPU\":{\"CPU_RUNTIME_CACHE_CAPACITY\":\"10\"}}"
} }
] ]
@ -279,6 +282,7 @@ class TestOrtSession:
"device_type": "GPU.1", "device_type": "GPU.1",
"precision": "FP16", "precision": "FP16",
"cache_dir": "/cache/ViT-B-32__openai/textual/openvino", "cache_dir": "/cache/ViT-B-32__openai/textual/openvino",
"load_config": "{\"CPU\":{\"CPU_RUNTIME_CACHE_CAPACITY\":\"20\"}}",
} }
] ]

File diff suppressed because it is too large Load Diff