From 057c7629bd7bf368a4c955e9a592a6f31ab0b961 Mon Sep 17 00:00:00 2001
From: Huan Yu <huan.yu@gmail.com>
Date: Mon, 13 Apr 2026 22:18:33 +0800
Subject: [PATCH] align ocr detection normalization with paddle ppocrv5

---
 machine-learning/immich_ml/models/ocr/detection.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/machine-learning/immich_ml/models/ocr/detection.py b/machine-learning/immich_ml/models/ocr/detection.py
index 0a2cb8ad91..18ef4c1013 100644
--- a/machine-learning/immich_ml/models/ocr/detection.py
+++ b/machine-learning/immich_ml/models/ocr/detection.py
@@ -25,8 +25,12 @@ class TextDetector(InferenceModel):
     def __init__(self, model_name: str, min_score: float = 0.5, **model_kwargs: Any) -> None:
         super().__init__(model_name.split("__")[-1], **model_kwargs, model_format=ModelFormat.ONNX)
         self.max_resolution = 736
-        self.mean = np.array([0.5, 0.5, 0.5], dtype=np.float32)
-        self.std_inv = np.float32(1.0) / (np.array([0.5, 0.5, 0.5], dtype=np.float32) * 255.0)
+        # Align with Paddle NormalizeImage:
+        # scale=1/255, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+        # This implementation works on raw 0..255 pixels, so we fold scale into mean/std:
+        # (x/255 - mean) / std == (x - mean*255) / (std*255)
+        self.mean = np.array([0.485, 0.456, 0.406], dtype=np.float32) * 255.0
+        self.std_inv = np.float32(1.0) / (np.array([0.229, 0.224, 0.225], dtype=np.float32) * 255.0)
         self._empty: TextDetectionOutput = {
             "boxes": np.empty(0, dtype=np.float32),
             "scores": np.empty(0, dtype=np.float32),