Merge 4d24756fd2 into 5ade152bc5

2025-12-18 02:23:36 -05:00 · 2025-12-18 02:23:36 -05:00 · 27655a3959
parent 5ade152bc5 4d24756fd2
commit 27655a3959
11 changed files with 744 additions and 1190 deletions
--- a/.editorconfig
+++ b/.editorconfig
@ -8,6 +8,9 @@ indent_size = 2
 insert_final_newline = true
 trim_trailing_whitespace = true

+[*.py]
+indent_size = 4
+
 [*.{ts,js}]
 quote_type = single

--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -574,9 +574,9 @@ jobs:
        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
      - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
        # TODO: add caching when supported (https://github.com/actions/setup-python/pull/818)
-        # with:
-        #   python-version: 3.11
-        #   cache: 'uv'
+        with:
+          python-version: 3.11
+          #cache: 'uv'
      - name: Install dependencies
        run: |
          uv sync --extra cpu
--- a/docs/docs/install/environment-variables.md
+++ b/docs/docs/install/environment-variables.md
@ -174,6 +174,7 @@ Redis (Sentinel) URL example JSON before encoding:
 | `MACHINE_LEARNING_RKNN_THREADS`                             | How many threads of RKNN runtime should be spun up while inferencing.                                                                                        |               `1`               | machine learning |
 | `MACHINE_LEARNING_MODEL_ARENA`                              | Pre-allocates CPU memory to avoid memory fragmentation                                                                                                       |              true               | machine learning |
 | `MACHINE_LEARNING_OPENVINO_PRECISION`                       | If set to FP16, uses half-precision floating-point operations for faster inference with reduced accuracy (one of [`FP16`, `FP32`], applies only to OpenVINO) |             `FP32`              | machine learning |
+| `MACHINE_LEARNING_OPENVINO_CACHE_CAPACITY`                  | The max number of image dimensions for which models have cached optimizations                                                                                |              `20`               | machine learning |

 \*1: It is recommended to begin with this parameter when changing the concurrency levels of the machine learning service and then tune the other ones.

--- a/machine-learning/Dockerfile
+++ b/machine-learning/Dockerfile
@ -2,7 +2,7 @@ ARG DEVICE=cpu

 FROM python:3.11-bookworm@sha256:e39286476f84ffedf7c3564b0b74e32c9e1193ec9ca32ee8a11f8c09dbf6aafe AS builder-cpu

-FROM builder-cpu AS builder-openvino
+FROM python:3.13-slim-trixie@sha256:0222b795db95bf7412cede36ab46a266cfb31f632e64051aac9806dabf840a61 AS builder-openvino

 FROM builder-cpu AS builder-cuda

@ -22,20 +22,18 @@ FROM builder-cpu AS builder-rknn

 # Warning: 25GiB+ disk space required to pull this image
 # TODO: find a way to reduce the image size
-FROM rocm/dev-ubuntu-22.04:6.4.3-complete@sha256:6cda50e312f3aac068cea9ec06c560ca1f522ad546bc8b3d2cf06da0fe8e8a76 AS builder-rocm
+FROM rocm/dev-ubuntu-24.04:6.4.4-complete@sha256:31418ac10a3769a71eaef330c07280d1d999d7074621339b8f93c484c35f6078 AS builder-rocm

 # renovate: datasource=github-releases depName=Microsoft/onnxruntime
 ARG ONNXRUNTIME_VERSION="v1.22.1"
 WORKDIR /code

-RUN apt-get update && apt-get install -y --no-install-recommends wget git python3.10-venv
-RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.30.1/cmake-3.30.1-linux-x86_64.sh && \
-    chmod +x cmake-3.30.1-linux-x86_64.sh && \
-    mkdir -p /code/cmake-3.30.1-linux-x86_64 && \
-    ./cmake-3.30.1-linux-x86_64.sh --skip-license --prefix=/code/cmake-3.30.1-linux-x86_64 && \
-    rm cmake-3.30.1-linux-x86_64.sh
-
-ENV PATH=/code/cmake-3.30.1-linux-x86_64/bin:${PATH}
+RUN apt-get update && apt-get install -y --no-install-recommends wget git
+RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.31.9/cmake-3.31.9-linux-x86_64.sh && \
+    chmod +x cmake-3.31.9-linux-x86_64.sh && \
+    mkdir -p /code/cmake-3.31.9-linux-x86_64 && \
+    ./cmake-3.31.9-linux-x86_64.sh --skip-license --prefix=/code/cmake-3.31.9-linux-x86_64 && \
+    rm cmake-3.31.9-linux-x86_64.sh

 RUN git clone --single-branch --branch "${ONNXRUNTIME_VERSION}" --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime
 WORKDIR /code/onnxruntime
@ -45,9 +43,26 @@ COPY ./patches/* /tmp/
 RUN git apply /tmp/*.patch

 RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh
+
+ENV PATH=/opt/rocm-venv/bin:/code/cmake-3.31.9-linux-x86_64/bin:${PATH}
+ENV CCACHE_DIR="/ccache"
 # Note: the `parallel` setting uses a substantial amount of RAM
-RUN ./build.sh --allow_running_as_root --config Release --build_wheel --update --build --parallel 17 --cmake_extra_defines\
-    ONNXRUNTIME_VERSION="${ONNXRUNTIME_VERSION}" --skip_tests --use_rocm --rocm_home=/opt/rocm
+RUN --mount=type=cache,target=/ccache \
+    ./build.sh \
+    --allow_running_as_root \
+    --config Release \
+    --build_wheel \
+    --update \
+    --build \
+    --parallel 17 \
+    --cmake_extra_defines \
+    ONNXRUNTIME_VERSION="${ONNXRUNTIME_VERSION}" \
+    CMAKE_HIP_ARCHITECTURES="gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" \
+    --skip_tests \
+    --use_rocm \
+    --rocm_home=/opt/rocm \
+    --use_cache \
+    --compile_no_warning_as_error
 RUN mv /code/onnxruntime/build/Linux/Release/dist/*.whl /opt/

 FROM builder-${DEVICE} AS builder
@ -73,15 +88,18 @@ FROM python:3.11-slim-bookworm@sha256:2c5bc243b1cc47985ee4fb768bb0bbd4490481c5d0
 ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
    MACHINE_LEARNING_MODEL_ARENA=false

-FROM python:3.11-slim-bookworm@sha256:2c5bc243b1cc47985ee4fb768bb0bbd4490481c5d0897a62da31b7f30b7304a7 AS prod-openvino
+FROM python:3.13-slim-trixie@sha256:0222b795db95bf7412cede36ab46a266cfb31f632e64051aac9806dabf840a61 AS prod-openvino

 RUN apt-get update && \
    apt-get install --no-install-recommends -yqq ocl-icd-libopencl1 wget && \
-    wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17384.11/intel-igc-core_1.0.17384.11_amd64.deb && \
-    wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17384.11/intel-igc-opencl_1.0.17384.11_amd64.deb && \
-    wget -nv https://github.com/intel/compute-runtime/releases/download/24.31.30508.7/intel-opencl-icd_24.31.30508.7_amd64.deb && \
+    wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/v2.24.8/intel-igc-core-2_2.24.8+20344_amd64.deb && \
+    wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/v2.24.8/intel-igc-opencl-2_2.24.8+20344_amd64.deb && \
+    wget -nv https://github.com/intel/compute-runtime/releases/download/25.48.36300.8/intel-opencl-icd_25.48.36300.8-0_amd64.deb &&  \
+    wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17537.24/intel-igc-core_1.0.17537.24_amd64.deb && \
+    wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17537.24/intel-igc-opencl_1.0.17537.24_amd64.deb && \
+    wget -nv https://github.com/intel/compute-runtime/releases/download/24.35.30872.36/intel-opencl-icd-legacy1_24.35.30872.36_amd64.deb && \
    # TODO: Figure out how to get renovate to manage this differently versioned libigdgmm file
-    wget -nv https://github.com/intel/compute-runtime/releases/download/24.31.30508.7/libigdgmm12_22.4.1_amd64.deb && \
+    wget -nv https://github.com/intel/compute-runtime/releases/download/25.48.36300.8/libigdgmm12_22.8.2_amd64.deb && \
    dpkg -i *.deb && \
    rm *.deb && \
    apt-get remove wget -yqq && \
@ -102,7 +120,7 @@ COPY --from=builder-cuda /usr/local/bin/python3 /usr/local/bin/python3
 COPY --from=builder-cuda /usr/local/lib/python3.11 /usr/local/lib/python3.11
 COPY --from=builder-cuda /usr/local/lib/libpython3.11.so /usr/local/lib/libpython3.11.so

-FROM rocm/dev-ubuntu-22.04:6.4.3-complete@sha256:6cda50e312f3aac068cea9ec06c560ca1f522ad546bc8b3d2cf06da0fe8e8a76 AS prod-rocm
+FROM rocm/dev-ubuntu-24.04:6.4.4-complete@sha256:31418ac10a3769a71eaef330c07280d1d999d7074621339b8f93c484c35f6078 AS prod-rocm

 FROM prod-cpu AS prod-armnn

--- a/machine-learning/immich_ml/config.py
+++ b/machine-learning/immich_ml/config.py
@ -79,6 +79,7 @@ class Settings(BaseSettings):
    preload: PreloadModelData | None = None
    max_batch_size: MaxBatchSize | None = None
    openvino_precision: ModelPrecision = ModelPrecision.FP32
+    openvino_cache_capacity: int = 20

    @property
    def device_id(self) -> str:
--- a/machine-learning/immich_ml/sessions/ort.py
+++ b/machine-learning/immich_ml/sessions/ort.py
@ -5,6 +5,7 @@ from typing import Any

 import numpy as np
 import onnxruntime as ort
+import orjson
 from numpy.typing import NDArray

 from immich_ml.models.constants import SUPPORTED_PROVIDERS
@ -99,6 +100,11 @@ class OrtSession:
                        "device_type": device,
                        "precision": settings.openvino_precision.value,
                        "cache_dir": openvino_dir.as_posix(),
+                        "load_config": orjson.dumps(
+                            {
+                                "CPU": {"CPU_RUNTIME_CACHE_CAPACITY": str(settings.openvino_cache_capacity)},
+                            }
+                        ).decode(),
                    }
                case "CoreMLExecutionProvider":
                    options = {
--- a/machine-learning/patches/0002-install-system-deps.patch
+++ b/machine-learning/patches/0002-install-system-deps.patch
@ -0,0 +1,33 @@
+diff --git a/dockerfiles/scripts/install_common_deps.sh b/dockerfiles/scripts/install_common_deps.sh
+index bbb672a99e..0dc652fbda 100644
+--- a/dockerfiles/scripts/install_common_deps.sh
+++ b/dockerfiles/scripts/install_common_deps.sh
+@@ -8,16 +8,23 @@ apt-get update && apt-get install -y --no-install-recommends \
+         curl \
+         libcurl4-openssl-dev \
+         libssl-dev \
+-        python3-dev
+        python3-dev \
+        ccache
+ 
+ # Dependencies: conda
+-wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh -O ~/miniconda.sh --no-check-certificate && /bin/bash ~/miniconda.sh -b -p /opt/miniconda
+wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py312_25.9.1-1-Linux-x86_64.sh -O ~/miniconda.sh && /bin/bash ~/miniconda.sh -b -p /opt/miniconda
+ rm ~/miniconda.sh
+ /opt/miniconda/bin/conda clean -ya
+ 
+-pip install numpy
+-pip install packaging
+-pip install "wheel>=0.35.1"
+# Dependencies: venv and packages
+/opt/miniconda/bin/python3 -m venv /opt/rocm-venv
+/opt/rocm-venv/bin/pip install --no-cache-dir --upgrade pip
+/opt/rocm-venv/bin/pip install --no-cache-dir \
+  "numpy==2.3.4" \
+  "packaging==25.0" \
+  "wheel==0.45.1" \
+  "setuptools==80.9.0"
+
+ rm -rf /opt/miniconda/pkgs
+ 
+ # Dependencies: cmake
--- a/machine-learning/patches/0002-target-gfx900-gfx1102.patch
+++ b/machine-learning/patches/0002-target-gfx900-gfx1102.patch
@ -1,13 +0,0 @@
-diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
-index 2714e6f59..a69da76b4 100644
--- a/cmake/CMakeLists.txt
-+++ b/cmake/CMakeLists.txt
-@@ -338,7 +338,7 @@ if (onnxruntime_USE_ROCM)
-     if (ROCM_VERSION_DEV VERSION_LESS "6.2")
-       message(FATAL_ERROR "CMAKE_HIP_ARCHITECTURES is not set when ROCm version < 6.2")
-     else()
-      set(CMAKE_HIP_ARCHITECTURES "gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942;gfx1200;gfx1201")
-+      set(CMAKE_HIP_ARCHITECTURES "gfx900;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx1102;gfx940;gfx941;gfx942;gfx1200;gfx1201")
-     endif()
-   endif()
- 
--- a/machine-learning/pyproject.toml
+++ b/machine-learning/pyproject.toml
@ -3,7 +3,7 @@ name = "immich-ml"
 version = "2.4.0"
 description = ""
 authors = [{ name = "Hau Tran", email = "alex.tran1502@gmail.com" }]
-requires-python = ">=3.10,<4.0"
+requires-python = ">=3.11,<4"
 readme = "README.md"
 dependencies = [
    "aiocache>=0.12.1,<1.0",
@ -12,7 +12,7 @@ dependencies = [
    "gunicorn>=21.1.0",
    "huggingface-hub>=0.20.1,<1.0",
    "insightface>=0.7.3,<1.0",
-    "numpy<2",
+    "numpy>=2.3.4",
    "opencv-python-headless>=4.7.0.72,<5.0",
    "orjson>=3.9.5",
    "pillow>=9.5.0,<11.0",
@ -49,24 +49,16 @@ lint = [
 dev = ["locust>=2.15.1", { include-group = "test" }, { include-group = "lint" }]

 [project.optional-dependencies]
-cpu = ["onnxruntime>=1.15.0,<2"]
-cuda = ["onnxruntime-gpu>=1.17.0,<2"]
-openvino = ["onnxruntime-openvino>=1.17.1,<1.19.0"]
-armnn = ["onnxruntime>=1.15.0,<2"]
-rknn = ["onnxruntime>=1.15.0,<2", "rknn-toolkit-lite2>=2.3.0,<3"]
+cpu = ["onnxruntime>=1.23.2,<2"]
+cuda = ["onnxruntime-gpu>=1.23.2,<2"]
+openvino = ["onnxruntime-openvino>=1.23.0,<2"]
+armnn = ["onnxruntime>=1.23.2,<2"]
+rknn = ["onnxruntime>=1.23.2,<2", "rknn-toolkit-lite2>=2.3.0,<3"]
 rocm = []

 [tool.uv]
 compile-bytecode = true

-[[tool.uv.index]]
-name = "cuda12"
-url = "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/"
-explicit = true
-
-[tool.uv.sources]
-onnxruntime-gpu = { index = "cuda12" }
-
 [tool.hatch.build.targets.sdist]
 include = ["immich_ml"]

--- a/machine-learning/test_main.py
+++ b/machine-learning/test_main.py
@ -249,13 +249,15 @@ class TestOrtSession:
                "device_type": "GPU.0",
                "precision": "FP32",
                "cache_dir": "/cache/ViT-B-32__openai/textual/openvino",
+                "load_config": "{\"CPU\":{\"CPU_RUNTIME_CACHE_CAPACITY\":\"20\"}}",
            },
            {"arena_extend_strategy": "kSameAsRequested"},
        ]

-    def test_sets_provider_options_for_openvino(self) -> None:
+    def test_sets_provider_options_for_openvino(self, mocker: MockerFixture) -> None:
        model_path = "/cache/ViT-B-32__openai/textual/model.onnx"
        os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1"
+        mocker.patch.object(settings, "openvino_cache_capacity", 10)

        session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"])

@ -264,6 +266,7 @@ class TestOrtSession:
                "device_type": "GPU.1",
                "precision": "FP32",
                "cache_dir": "/cache/ViT-B-32__openai/textual/openvino",
+                "load_config": "{\"CPU\":{\"CPU_RUNTIME_CACHE_CAPACITY\":\"10\"}}"
            }
        ]

@ -279,6 +282,7 @@ class TestOrtSession:
                "device_type": "GPU.1",
                "precision": "FP16",
                "cache_dir": "/cache/ViT-B-32__openai/textual/openvino",
+                "load_config": "{\"CPU\":{\"CPU_RUNTIME_CACHE_CAPACITY\":\"20\"}}",
            }
        ]

--- a/machine-learning/uv.lock
+++ b/machine-learning/uv.lock