Merge 4d24756fd2 into 5ade152bc5
commit
27655a3959
|
|
@ -8,6 +8,9 @@ indent_size = 2
|
|||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
[*.py]
|
||||
indent_size = 4
|
||||
|
||||
[*.{ts,js}]
|
||||
quote_type = single
|
||||
|
||||
|
|
|
|||
|
|
@ -574,9 +574,9 @@ jobs:
|
|||
uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
|
||||
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
# TODO: add caching when supported (https://github.com/actions/setup-python/pull/818)
|
||||
# with:
|
||||
# python-version: 3.11
|
||||
# cache: 'uv'
|
||||
with:
|
||||
python-version: 3.11
|
||||
#cache: 'uv'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv sync --extra cpu
|
||||
|
|
|
|||
|
|
@ -174,6 +174,7 @@ Redis (Sentinel) URL example JSON before encoding:
|
|||
| `MACHINE_LEARNING_RKNN_THREADS` | How many threads of RKNN runtime should be spun up while inferencing. | `1` | machine learning |
|
||||
| `MACHINE_LEARNING_MODEL_ARENA` | Pre-allocates CPU memory to avoid memory fragmentation | true | machine learning |
|
||||
| `MACHINE_LEARNING_OPENVINO_PRECISION` | If set to FP16, uses half-precision floating-point operations for faster inference with reduced accuracy (one of [`FP16`, `FP32`], applies only to OpenVINO) | `FP32` | machine learning |
|
||||
| `MACHINE_LEARNING_OPENVINO_CACHE_CAPACITY` | The max number of image dimensions for which models have cached optimizations | `20` | machine learning |
|
||||
|
||||
\*1: It is recommended to begin with this parameter when changing the concurrency levels of the machine learning service and then tune the other ones.
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ ARG DEVICE=cpu
|
|||
|
||||
FROM python:3.11-bookworm@sha256:e39286476f84ffedf7c3564b0b74e32c9e1193ec9ca32ee8a11f8c09dbf6aafe AS builder-cpu
|
||||
|
||||
FROM builder-cpu AS builder-openvino
|
||||
FROM python:3.13-slim-trixie@sha256:0222b795db95bf7412cede36ab46a266cfb31f632e64051aac9806dabf840a61 AS builder-openvino
|
||||
|
||||
FROM builder-cpu AS builder-cuda
|
||||
|
||||
|
|
@ -22,20 +22,18 @@ FROM builder-cpu AS builder-rknn
|
|||
|
||||
# Warning: 25GiB+ disk space required to pull this image
|
||||
# TODO: find a way to reduce the image size
|
||||
FROM rocm/dev-ubuntu-22.04:6.4.3-complete@sha256:6cda50e312f3aac068cea9ec06c560ca1f522ad546bc8b3d2cf06da0fe8e8a76 AS builder-rocm
|
||||
FROM rocm/dev-ubuntu-24.04:6.4.4-complete@sha256:31418ac10a3769a71eaef330c07280d1d999d7074621339b8f93c484c35f6078 AS builder-rocm
|
||||
|
||||
# renovate: datasource=github-releases depName=Microsoft/onnxruntime
|
||||
ARG ONNXRUNTIME_VERSION="v1.22.1"
|
||||
WORKDIR /code
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends wget git python3.10-venv
|
||||
RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.30.1/cmake-3.30.1-linux-x86_64.sh && \
|
||||
chmod +x cmake-3.30.1-linux-x86_64.sh && \
|
||||
mkdir -p /code/cmake-3.30.1-linux-x86_64 && \
|
||||
./cmake-3.30.1-linux-x86_64.sh --skip-license --prefix=/code/cmake-3.30.1-linux-x86_64 && \
|
||||
rm cmake-3.30.1-linux-x86_64.sh
|
||||
|
||||
ENV PATH=/code/cmake-3.30.1-linux-x86_64/bin:${PATH}
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends wget git
|
||||
RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.31.9/cmake-3.31.9-linux-x86_64.sh && \
|
||||
chmod +x cmake-3.31.9-linux-x86_64.sh && \
|
||||
mkdir -p /code/cmake-3.31.9-linux-x86_64 && \
|
||||
./cmake-3.31.9-linux-x86_64.sh --skip-license --prefix=/code/cmake-3.31.9-linux-x86_64 && \
|
||||
rm cmake-3.31.9-linux-x86_64.sh
|
||||
|
||||
RUN git clone --single-branch --branch "${ONNXRUNTIME_VERSION}" --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime
|
||||
WORKDIR /code/onnxruntime
|
||||
|
|
@ -45,9 +43,26 @@ COPY ./patches/* /tmp/
|
|||
RUN git apply /tmp/*.patch
|
||||
|
||||
RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh
|
||||
|
||||
ENV PATH=/opt/rocm-venv/bin:/code/cmake-3.31.9-linux-x86_64/bin:${PATH}
|
||||
ENV CCACHE_DIR="/ccache"
|
||||
# Note: the `parallel` setting uses a substantial amount of RAM
|
||||
RUN ./build.sh --allow_running_as_root --config Release --build_wheel --update --build --parallel 17 --cmake_extra_defines\
|
||||
ONNXRUNTIME_VERSION="${ONNXRUNTIME_VERSION}" --skip_tests --use_rocm --rocm_home=/opt/rocm
|
||||
RUN --mount=type=cache,target=/ccache \
|
||||
./build.sh \
|
||||
--allow_running_as_root \
|
||||
--config Release \
|
||||
--build_wheel \
|
||||
--update \
|
||||
--build \
|
||||
--parallel 17 \
|
||||
--cmake_extra_defines \
|
||||
ONNXRUNTIME_VERSION="${ONNXRUNTIME_VERSION}" \
|
||||
CMAKE_HIP_ARCHITECTURES="gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" \
|
||||
--skip_tests \
|
||||
--use_rocm \
|
||||
--rocm_home=/opt/rocm \
|
||||
--use_cache \
|
||||
--compile_no_warning_as_error
|
||||
RUN mv /code/onnxruntime/build/Linux/Release/dist/*.whl /opt/
|
||||
|
||||
FROM builder-${DEVICE} AS builder
|
||||
|
|
@ -73,15 +88,18 @@ FROM python:3.11-slim-bookworm@sha256:2c5bc243b1cc47985ee4fb768bb0bbd4490481c5d0
|
|||
ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
|
||||
MACHINE_LEARNING_MODEL_ARENA=false
|
||||
|
||||
FROM python:3.11-slim-bookworm@sha256:2c5bc243b1cc47985ee4fb768bb0bbd4490481c5d0897a62da31b7f30b7304a7 AS prod-openvino
|
||||
FROM python:3.13-slim-trixie@sha256:0222b795db95bf7412cede36ab46a266cfb31f632e64051aac9806dabf840a61 AS prod-openvino
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends -yqq ocl-icd-libopencl1 wget && \
|
||||
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17384.11/intel-igc-core_1.0.17384.11_amd64.deb && \
|
||||
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17384.11/intel-igc-opencl_1.0.17384.11_amd64.deb && \
|
||||
wget -nv https://github.com/intel/compute-runtime/releases/download/24.31.30508.7/intel-opencl-icd_24.31.30508.7_amd64.deb && \
|
||||
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/v2.24.8/intel-igc-core-2_2.24.8+20344_amd64.deb && \
|
||||
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/v2.24.8/intel-igc-opencl-2_2.24.8+20344_amd64.deb && \
|
||||
wget -nv https://github.com/intel/compute-runtime/releases/download/25.48.36300.8/intel-opencl-icd_25.48.36300.8-0_amd64.deb && \
|
||||
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17537.24/intel-igc-core_1.0.17537.24_amd64.deb && \
|
||||
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17537.24/intel-igc-opencl_1.0.17537.24_amd64.deb && \
|
||||
wget -nv https://github.com/intel/compute-runtime/releases/download/24.35.30872.36/intel-opencl-icd-legacy1_24.35.30872.36_amd64.deb && \
|
||||
# TODO: Figure out how to get renovate to manage this differently versioned libigdgmm file
|
||||
wget -nv https://github.com/intel/compute-runtime/releases/download/24.31.30508.7/libigdgmm12_22.4.1_amd64.deb && \
|
||||
wget -nv https://github.com/intel/compute-runtime/releases/download/25.48.36300.8/libigdgmm12_22.8.2_amd64.deb && \
|
||||
dpkg -i *.deb && \
|
||||
rm *.deb && \
|
||||
apt-get remove wget -yqq && \
|
||||
|
|
@ -102,7 +120,7 @@ COPY --from=builder-cuda /usr/local/bin/python3 /usr/local/bin/python3
|
|||
COPY --from=builder-cuda /usr/local/lib/python3.11 /usr/local/lib/python3.11
|
||||
COPY --from=builder-cuda /usr/local/lib/libpython3.11.so /usr/local/lib/libpython3.11.so
|
||||
|
||||
FROM rocm/dev-ubuntu-22.04:6.4.3-complete@sha256:6cda50e312f3aac068cea9ec06c560ca1f522ad546bc8b3d2cf06da0fe8e8a76 AS prod-rocm
|
||||
FROM rocm/dev-ubuntu-24.04:6.4.4-complete@sha256:31418ac10a3769a71eaef330c07280d1d999d7074621339b8f93c484c35f6078 AS prod-rocm
|
||||
|
||||
FROM prod-cpu AS prod-armnn
|
||||
|
||||
|
|
|
|||
|
|
@ -79,6 +79,7 @@ class Settings(BaseSettings):
|
|||
preload: PreloadModelData | None = None
|
||||
max_batch_size: MaxBatchSize | None = None
|
||||
openvino_precision: ModelPrecision = ModelPrecision.FP32
|
||||
openvino_cache_capacity: int = 20
|
||||
|
||||
@property
|
||||
def device_id(self) -> str:
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ from typing import Any
|
|||
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
import orjson
|
||||
from numpy.typing import NDArray
|
||||
|
||||
from immich_ml.models.constants import SUPPORTED_PROVIDERS
|
||||
|
|
@ -99,6 +100,11 @@ class OrtSession:
|
|||
"device_type": device,
|
||||
"precision": settings.openvino_precision.value,
|
||||
"cache_dir": openvino_dir.as_posix(),
|
||||
"load_config": orjson.dumps(
|
||||
{
|
||||
"CPU": {"CPU_RUNTIME_CACHE_CAPACITY": str(settings.openvino_cache_capacity)},
|
||||
}
|
||||
).decode(),
|
||||
}
|
||||
case "CoreMLExecutionProvider":
|
||||
options = {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,33 @@
|
|||
diff --git a/dockerfiles/scripts/install_common_deps.sh b/dockerfiles/scripts/install_common_deps.sh
|
||||
index bbb672a99e..0dc652fbda 100644
|
||||
--- a/dockerfiles/scripts/install_common_deps.sh
|
||||
+++ b/dockerfiles/scripts/install_common_deps.sh
|
||||
@@ -8,16 +8,23 @@ apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
libcurl4-openssl-dev \
|
||||
libssl-dev \
|
||||
- python3-dev
|
||||
+ python3-dev \
|
||||
+ ccache
|
||||
|
||||
# Dependencies: conda
|
||||
-wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh -O ~/miniconda.sh --no-check-certificate && /bin/bash ~/miniconda.sh -b -p /opt/miniconda
|
||||
+wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py312_25.9.1-1-Linux-x86_64.sh -O ~/miniconda.sh && /bin/bash ~/miniconda.sh -b -p /opt/miniconda
|
||||
rm ~/miniconda.sh
|
||||
/opt/miniconda/bin/conda clean -ya
|
||||
|
||||
-pip install numpy
|
||||
-pip install packaging
|
||||
-pip install "wheel>=0.35.1"
|
||||
+# Dependencies: venv and packages
|
||||
+/opt/miniconda/bin/python3 -m venv /opt/rocm-venv
|
||||
+/opt/rocm-venv/bin/pip install --no-cache-dir --upgrade pip
|
||||
+/opt/rocm-venv/bin/pip install --no-cache-dir \
|
||||
+ "numpy==2.3.4" \
|
||||
+ "packaging==25.0" \
|
||||
+ "wheel==0.45.1" \
|
||||
+ "setuptools==80.9.0"
|
||||
+
|
||||
rm -rf /opt/miniconda/pkgs
|
||||
|
||||
# Dependencies: cmake
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
|
||||
index 2714e6f59..a69da76b4 100644
|
||||
--- a/cmake/CMakeLists.txt
|
||||
+++ b/cmake/CMakeLists.txt
|
||||
@@ -338,7 +338,7 @@ if (onnxruntime_USE_ROCM)
|
||||
if (ROCM_VERSION_DEV VERSION_LESS "6.2")
|
||||
message(FATAL_ERROR "CMAKE_HIP_ARCHITECTURES is not set when ROCm version < 6.2")
|
||||
else()
|
||||
- set(CMAKE_HIP_ARCHITECTURES "gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942;gfx1200;gfx1201")
|
||||
+ set(CMAKE_HIP_ARCHITECTURES "gfx900;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx1102;gfx940;gfx941;gfx942;gfx1200;gfx1201")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
|
@ -3,7 +3,7 @@ name = "immich-ml"
|
|||
version = "2.4.0"
|
||||
description = ""
|
||||
authors = [{ name = "Hau Tran", email = "alex.tran1502@gmail.com" }]
|
||||
requires-python = ">=3.10,<4.0"
|
||||
requires-python = ">=3.11,<4"
|
||||
readme = "README.md"
|
||||
dependencies = [
|
||||
"aiocache>=0.12.1,<1.0",
|
||||
|
|
@ -12,7 +12,7 @@ dependencies = [
|
|||
"gunicorn>=21.1.0",
|
||||
"huggingface-hub>=0.20.1,<1.0",
|
||||
"insightface>=0.7.3,<1.0",
|
||||
"numpy<2",
|
||||
"numpy>=2.3.4",
|
||||
"opencv-python-headless>=4.7.0.72,<5.0",
|
||||
"orjson>=3.9.5",
|
||||
"pillow>=9.5.0,<11.0",
|
||||
|
|
@ -49,24 +49,16 @@ lint = [
|
|||
dev = ["locust>=2.15.1", { include-group = "test" }, { include-group = "lint" }]
|
||||
|
||||
[project.optional-dependencies]
|
||||
cpu = ["onnxruntime>=1.15.0,<2"]
|
||||
cuda = ["onnxruntime-gpu>=1.17.0,<2"]
|
||||
openvino = ["onnxruntime-openvino>=1.17.1,<1.19.0"]
|
||||
armnn = ["onnxruntime>=1.15.0,<2"]
|
||||
rknn = ["onnxruntime>=1.15.0,<2", "rknn-toolkit-lite2>=2.3.0,<3"]
|
||||
cpu = ["onnxruntime>=1.23.2,<2"]
|
||||
cuda = ["onnxruntime-gpu>=1.23.2,<2"]
|
||||
openvino = ["onnxruntime-openvino>=1.23.0,<2"]
|
||||
armnn = ["onnxruntime>=1.23.2,<2"]
|
||||
rknn = ["onnxruntime>=1.23.2,<2", "rknn-toolkit-lite2>=2.3.0,<3"]
|
||||
rocm = []
|
||||
|
||||
[tool.uv]
|
||||
compile-bytecode = true
|
||||
|
||||
[[tool.uv.index]]
|
||||
name = "cuda12"
|
||||
url = "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/"
|
||||
explicit = true
|
||||
|
||||
[tool.uv.sources]
|
||||
onnxruntime-gpu = { index = "cuda12" }
|
||||
|
||||
[tool.hatch.build.targets.sdist]
|
||||
include = ["immich_ml"]
|
||||
|
||||
|
|
|
|||
|
|
@ -249,13 +249,15 @@ class TestOrtSession:
|
|||
"device_type": "GPU.0",
|
||||
"precision": "FP32",
|
||||
"cache_dir": "/cache/ViT-B-32__openai/textual/openvino",
|
||||
"load_config": "{\"CPU\":{\"CPU_RUNTIME_CACHE_CAPACITY\":\"20\"}}",
|
||||
},
|
||||
{"arena_extend_strategy": "kSameAsRequested"},
|
||||
]
|
||||
|
||||
def test_sets_provider_options_for_openvino(self) -> None:
|
||||
def test_sets_provider_options_for_openvino(self, mocker: MockerFixture) -> None:
|
||||
model_path = "/cache/ViT-B-32__openai/textual/model.onnx"
|
||||
os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1"
|
||||
mocker.patch.object(settings, "openvino_cache_capacity", 10)
|
||||
|
||||
session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"])
|
||||
|
||||
|
|
@ -264,6 +266,7 @@ class TestOrtSession:
|
|||
"device_type": "GPU.1",
|
||||
"precision": "FP32",
|
||||
"cache_dir": "/cache/ViT-B-32__openai/textual/openvino",
|
||||
"load_config": "{\"CPU\":{\"CPU_RUNTIME_CACHE_CAPACITY\":\"10\"}}"
|
||||
}
|
||||
]
|
||||
|
||||
|
|
@ -279,6 +282,7 @@ class TestOrtSession:
|
|||
"device_type": "GPU.1",
|
||||
"precision": "FP16",
|
||||
"cache_dir": "/cache/ViT-B-32__openai/textual/openvino",
|
||||
"load_config": "{\"CPU\":{\"CPU_RUNTIME_CACHE_CAPACITY\":\"20\"}}",
|
||||
}
|
||||
]
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue