diff --git a/pos/gunicorn.conf.py b/pos/gunicorn.conf.py
index 92906dc..e94ed72 100644
--- a/pos/gunicorn.conf.py
+++ b/pos/gunicorn.conf.py
@@ -1,8 +1,19 @@
 import multiprocessing
 
 bind = "0.0.0.0:5001"
-workers = multiprocessing.cpu_count() * 2 + 1
-worker_class = "sync"
+
+# gthread workers handle multiple concurrent requests per worker via threads.
+# Ideal for I/O-bound Flask apps with DB queries.
+# 4 workers × 4 threads = 16 concurrent requests.
+workers = 4
+threads = 4
+worker_class = "gthread"
+worker_connections = 1000
+
+# Recycle workers after N requests to prevent memory leaks
+max_requests = 1000
+max_requests_jitter = 50
+
 timeout = 120
 keepalive = 5
 accesslog = "/var/log/nexus-pos/access.log"
diff --git a/pos/services/catalog_service.py b/pos/services/catalog_service.py
index aa81eb5..0895457 100644
--- a/pos/services/catalog_service.py
+++ b/pos/services/catalog_service.py
@@ -10,11 +10,76 @@ PERFORMANCE: vehicle_parts has 14B+ rows. Every query MUST:
   - Use EXISTS instead of COUNT(*) on vehicle_parts where possible
 """
 
+import json
+import os
 import re
 
+import redis
+
 from services.na_models import is_na_model
 from services.translations import translate_part_name, translate_category
 
+# Lazy Redis client for catalog caches
+_redis_client = None
+_CLASSIFY_TTL_SECONDS = 300
+_VEHICLE_TTL_SECONDS = 3600
+
+def _get_redis():
+    global _redis_client
+    if _redis_client is None:
+        try:
+            _redis_client = redis.from_url(
+                os.environ.get('REDIS_URL', 'redis://localhost:6379/0'),
+                decode_responses=True
+            )
+            _redis_client.ping()
+        except Exception:
+            _redis_client = False
+    return _redis_client if _redis_client is not False else None
+
+
+def _classify_cache_get(mye_id):
+    r = _get_redis()
+    if not r:
+        return None
+    try:
+        val = r.get(f'nexus:classify:{mye_id}')
+        return json.loads(val) if val else None
+    except Exception:
+        return None
+
+
+def _classify_cache_set(mye_id, data):
+    r = _get_redis()
+    if not r:
+        return
+    try:
+        r.setex(f'nexus:classify:{mye_id}', _CLASSIFY_TTL_SECONDS, json.dumps(data))
+    except Exception:
+        pass
+
+
+def classify_cache_clear():
+    r = _get_redis()
+    if not r:
+        return
+    try:
+        for key in r.scan_iter(match='nexus:classify:*'):
+            r.delete(key)
+    except Exception:
+        pass
+
+
+def classify_cache_stats():
+    r = _get_redis()
+    if not r:
+        return {'redis_available': False}
+    try:
+        count = sum(1 for _ in r.scan_iter(match='nexus:classify:*'))
+        return {'redis_available': True, 'total_entries': count, 'ttl_seconds': _CLASSIFY_TTL_SECONDS}
+    except Exception:
+        return {'redis_available': False}
+
 
 def _clean_model_name(name):
     """Parse TecDoc model name to show only the primary name.
@@ -196,52 +261,9 @@ def get_categories(master_conn, mye_id):
 # NEXPART HIERARCHY (Local mode) — filtered by what TecDoc has for this vehicle
 # ─────────────────────────────────────────────────────────────────────────────
 
-# ─── In-memory cache for vehicle → Nexpart classification ─────────────────
-# Key: mye_id (int). Value: (expires_at_timestamp, classified_dict).
-# TTL is short (5 min) because catalog data rarely changes but we don't
-# want stale data lingering across sessions. Single-process cache —
-# Gunicorn workers each have their own, which is fine for this workload.
-import time as _time
-_CLASSIFY_CACHE = {}
-_CLASSIFY_TTL_SECONDS = 300
-
-
-def _classify_cache_get(mye_id):
-    entry = _CLASSIFY_CACHE.get(mye_id)
-    if entry is None:
-        return None
-    expires_at, data = entry
-    if _time.time() > expires_at:
-        _CLASSIFY_CACHE.pop(mye_id, None)
-        return None
-    return data
-
-
-def _classify_cache_set(mye_id, data):
-    _CLASSIFY_CACHE[mye_id] = (_time.time() + _CLASSIFY_TTL_SECONDS, data)
-    # Simple unbounded-growth protection: if cache grows past 500 entries,
-    # evict the oldest half. Real production would use an LRU library.
-    if len(_CLASSIFY_CACHE) > 500:
-        sorted_keys = sorted(_CLASSIFY_CACHE.items(), key=lambda kv: kv[1][0])
-        for k, _v in sorted_keys[:250]:
-            _CLASSIFY_CACHE.pop(k, None)
-
-
-def classify_cache_clear():
-    """Manual cache invalidation — call after catalog import."""
-    _CLASSIFY_CACHE.clear()
-
-
-def classify_cache_stats():
-    """Diagnostic helper for the cache state."""
-    now = _time.time()
-    alive = sum(1 for expires, _ in _CLASSIFY_CACHE.values() if expires > now)
-    return {
-        'total_entries': len(_CLASSIFY_CACHE),
-        'alive': alive,
-        'expired': len(_CLASSIFY_CACHE) - alive,
-        'ttl_seconds': _CLASSIFY_TTL_SECONDS,
-    }
+# ─── Redis-backed cache for vehicle → Nexpart classification ──────────────
+# Replaces the old in-process dict so all Gunicorn workers share the same
+# cache. TTL 5 min. If Redis is down, classification still works (cache miss).
 
 
 def _classify_vehicle_parts(master_conn, mye_id):
@@ -1334,21 +1356,40 @@ def smart_search(master_conn, q, tenant_conn, branch_id, limit=50):
     part_ids = [r[0] for r in rows]
     oem_numbers = [r[1] for r in rows]
 
-    # Get vehicle info for each part (first match only)
+    # Get vehicle info for each part (Redis cache first, then DB fallback)
     vehicle_info_map = {}
-    cur.execute("""
-        SELECT DISTINCT ON (vp.part_id)
-               vp.part_id, b.name_brand, m.name_model, y.year_car
-        FROM vehicle_parts vp
-        JOIN model_year_engine mye ON mye.id_mye = vp.model_year_engine_id
-        JOIN models m ON m.id_model = mye.model_id
-        JOIN brands b ON b.id_brand = m.brand_id
-        JOIN years y ON y.id_year = mye.year_id
-        WHERE vp.part_id = ANY(%s)
-        ORDER BY vp.part_id, y.year_car DESC
-    """, (part_ids,))
-    for r in cur.fetchall():
-        vehicle_info_map[r[0]] = f"{r[1]} {r[2]} {r[3]}"
+    missing_ids = []
+    r = _get_redis()
+    if r:
+        for pid in part_ids:
+            cached = r.get(f'nexus:vehicle:{pid}')
+            if cached is not None:
+                vehicle_info_map[pid] = cached
+            else:
+                missing_ids.append(pid)
+    else:
+        missing_ids = part_ids
+
+    if missing_ids:
+        cur.execute("""
+            SELECT DISTINCT ON (vp.part_id)
+                   vp.part_id, b.name_brand, m.name_model, y.year_car
+            FROM vehicle_parts vp
+            JOIN model_year_engine mye ON mye.id_mye = vp.model_year_engine_id
+            JOIN models m ON m.id_model = mye.model_id
+            JOIN brands b ON b.id_brand = m.brand_id
+            JOIN years y ON y.id_year = mye.year_id
+            WHERE vp.part_id = ANY(%s)
+            ORDER BY vp.part_id, y.year_car DESC
+        """, (missing_ids,))
+        for row in cur.fetchall():
+            info = f"{row[1]} {row[2]} {row[3]}"
+            vehicle_info_map[row[0]] = info
+            if r:
+                try:
+                    r.setex(f'nexus:vehicle:{row[0]}', _VEHICLE_TTL_SECONDS, info)
+                except Exception:
+                    pass
     cur.close()
 
     # Local stock enrichment