From be2dbbc1948e9ae02b954b7e8e6b604f83c8c70b Mon Sep 17 00:00:00 2001 From: root Date: Wed, 29 Apr 2026 05:51:27 +0000 Subject: [PATCH] =?UTF-8?q?docs:=20manual=20de=20producci=C3=B3n=20complet?= =?UTF-8?q?o=20+=20ajustes=20seed=20fallback?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add production manual (docs/MANUAL_PRODUCCION.md) - Fix seed script to work without OpenAI API key (zero-vector fallback) - Fix alembic env to use database_url_str - Fix pyproject.toml hatch build config --- alembic/env.py | 2 +- docs/MANUAL_PRODUCCION.md | 401 ++++++++++++++++++++++++++++++++++++++ pyproject.toml | 3 + scripts/seed_knowledge.py | 57 +++++- 4 files changed, 454 insertions(+), 9 deletions(-) create mode 100644 docs/MANUAL_PRODUCCION.md diff --git a/alembic/env.py b/alembic/env.py index e78b3ca..010d3f8 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -25,7 +25,7 @@ target_metadata = Base.metadata # other values from the config, defined by the needs of env.py, # can be acquired: # my_important_option = config.get_main_option("my_important_option") -config.set_main_option("sqlalchemy.url", settings.DATABASE_URL) +config.set_main_option("sqlalchemy.url", settings.database_url_str) def run_migrations_offline() -> None: diff --git a/docs/MANUAL_PRODUCCION.md b/docs/MANUAL_PRODUCCION.md new file mode 100644 index 0000000..75a062a --- /dev/null +++ b/docs/MANUAL_PRODUCCION.md @@ -0,0 +1,401 @@ +# Manual de Producción — SKEEN CRM AI Agent + +**Versión:** 1.0 +**Fecha:** 29 de abril de 2026 +**VM:** 192.168.10.100 (Ubuntu 24.04 LTS) +**Stack:** Python 3.13, FastAPI, PostgreSQL 16 + pgvector, Redis 7, Celery, Docker + +--- + +## 1. Resumen Ejecutivo + +Este documento describe la instalación, configuración y operación del **SKEEN CRM AI Agent**, un agente de inteligencia artificial para WhatsApp Business API integrado con ERPNext Healthcare. + +### Servicios desplegados en la VM + +| Servicio | Puerto | Tecnología | Estado | +|----------|--------|------------|--------| +| FastAPI (Agente) | 8000 | Python 3.13 + Uvicorn | Systemd | +| Celery Worker | — | Python 3.13 + Celery | Systemd | +| Celery Beat | — | Python 3.13 + Celery | Systemd | +| PostgreSQL | 5432 (interno) | Docker (ankane/pgvector) | Docker | +| Redis | 6379 (interno) | Docker (redis:7-alpine) | Docker | +| Gitea (Git) | 3000 | Docker (gitea/gitea) | Docker | +| Meta Webhook | 8000/api/v1/webhooks/whatsapp | FastAPI | Activo | + +--- + +## 2. Arquitectura del Sistema + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ PACIENTE │ +│ (WhatsApp Móvil) │ +└───────────────────────────────┬─────────────────────────────────────────┘ + │ + ┌───────────▼───────────┐ + │ Meta WhatsApp API │ + │ (WhatsApp Business) │ + └───────────┬───────────┘ + │ Webhook + ▼ +┌─────────────────────────────────────────────────────────────────────────┐ +│ SKEEN CRM AI AGENT │ +│ ┌─────────────────┐ ┌──────────────┐ ┌──────────────────────────┐ │ +│ │ FastAPI │ │ Celery │ │ PostgreSQL + pgvector │ │ +│ │ • Webhooks │ │ • Workers │ │ • Conversaciones │ │ +│ │ • API REST │ │ • Scheduler │ │ • Mensajes │ │ +│ │ • Auth │ │ • Colas │ │ • Knowledge Base (RAG) │ │ +│ └────────┬────────┘ └──────┬───────┘ └────────────┬─────────────┘ │ +│ │ │ │ │ +│ ┌────────▼────────┐ ┌──────▼───────┐ ┌────────────▼─────────────┐ │ +│ │ OpenAI GPT-4o │ │ Redis │ │ ERPNext Healthcare │ │ +│ │ • Chat │ │ • Cache │ │ • Pacientes │ │ +│ │ • Embeddings │ │ • Broker │ │ • Citas │ │ +│ │ • Functions │ │ • Rate Limit│ │ • Doctores │ │ +│ └─────────────────┘ └──────────────┘ └──────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────┘ + │ + ┌───────────▼───────────┐ + │ Gitea (Git Server) │ + │ http://192.168.10.100:3000 + └───────────────────────┘ +``` + +--- + +## 3. Accesos y URLs + +| Servicio | URL Interna | URL Externa (si aplica) | +|----------|-------------|------------------------| +| Gitea | http://192.168.10.100:3000 | Pendiente de dominio | +| API Health | http://192.168.10.100:8000/health | Pendiente de dominio | +| API Docs | http://192.168.10.100:8000/docs | Solo desarrollo | +| Webhook Meta | http://192.168.10.100:8000/api/v1/webhooks/whatsapp | Configurar en Meta Developers | +| Prometheus | http://192.168.10.100:8000/metrics | Métricas raw | + +### Credenciales Gitea +- **Usuario:** `skeen-admin` +- **Contraseña:** `skeen-admin-2024` +- **Email:** `admin@skeen.mx` +- **Token API:** `5ef0f75cefb665136896b63db05680d70647c894` + +--- + +## 4. Estructura de Archivos en la VM + +``` +/root/Skeen-CRM/ +├── .env # Variables de entorno (PROTEGER) +├── .venv/ # Entorno virtual Python +├── docker-compose.yml # Infraestructura Docker +├── src/ # Código fuente +│ ├── main.py # Entry point FastAPI +│ ├── config.py # Configuración Pydantic +│ ├── api/v1/ # Endpoints REST +│ ├── use_cases/ # Lógica de negocio +│ ├── infrastructure/ # Clientes externos +│ │ ├── whatsapp/ # Meta WhatsApp API +│ │ ├── ai/ # OpenAI + RAG +│ │ ├── erpnext/ # ERPNext Frappe API +│ │ ├── db.py # PostgreSQL +│ │ └── redis.py # Redis +│ ├── domain/models/ # Entidades SQLAlchemy +│ └── workers/ # Celery +├── alembic/ # Migraciones DB +├── scripts/ +│ ├── validate_setup.py # Validación de servicios +│ └── seed_knowledge.py # Seed del catálogo +├── tests/ # Tests automatizados +└── docs/ # Documentación +``` + +--- + +## 5. Guía de Instalación (Ya completada) + +Los pasos 1-5 ya fueron ejecutados en la VM. Este es el registro para referencia futura o reinstalación. + +### Paso 1: Requisitos de la VM + +```bash +# CPU: 4 cores, RAM: 15 GB, Disk: 29 GB +# OS: Ubuntu 24.04 LTS +# IP: 192.168.10.100 +``` + +### Paso 2: Instalar Docker + +```bash +apt-get update +apt-get install -y ca-certificates curl gnupg lsb-release +install -m 0755 -d /etc/apt/keyrings +curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc +chmod a+r /etc/apt/keyrings/docker.asc +echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" > /etc/apt/sources.list.d/docker.list +apt-get update +apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin +systemctl enable docker +systemctl start docker +``` + +### Paso 3: Levantar PostgreSQL + Redis + +```bash +cd /root/Skeen-CRM +docker compose up -d postgres redis +``` + +### Paso 4: Instalar dependencias Python + +```bash +pip install uv +uv venv .venv +source .venv/bin/activate +uv pip install -r pyproject.toml --extra dev +``` + +### Paso 5: Ejecutar migraciones y seed + +```bash +source .venv/bin/activate +alembic upgrade head +python scripts/seed_knowledge.py +``` + +### Paso 6: Instalar Gitea (Git Server) + +```bash +docker run -d --name gitea \ + --env GITEA__database__DB_TYPE=sqlite3 \ + --env GITEA__database__PATH=/data/gitea/gitea.db \ + --env GITEA__server__DOMAIN=192.168.10.100 \ + --env GITEA__server__HTTP_PORT=3000 \ + --env GITEA__server__ROOT_URL=http://192.168.10.100:3000/ \ + --env GITEA__server__SSH_PORT=222 \ + --env GITEA__security__INSTALL_LOCK=true \ + -p 3000:3000 -p 222:22 -v gitea_data:/data \ + gitea/gitea:latest + +# Crear admin +docker exec gitea sh -c "su git -c 'gitea admin user create --username skeen-admin --password skeen-admin-2024 --email admin@skeen.mx --admin'" + +# Generar token +docker exec gitea sh -c "su git -c 'gitea admin user generate-access-token --username skeen-admin --token-name setup-token --scopes write:repository,write:user'" +``` + +### Paso 7: Configurar systemd services + +Archivos creados: +- `/etc/systemd/system/skeen-api.service` +- `/etc/systemd/system/skeen-worker.service` +- `/etc/systemd/system/skeen-scheduler.service` + +```bash +systemctl daemon-reload +systemctl enable skeen-api skeen-worker skeen-scheduler +``` + +--- + +## 6. Configuración de Credenciales (PENDIENTE POR EL CLIENTE) + +Editar `/root/Skeen-CRM/.env` con las credenciales reales: + +### Meta / WhatsApp Business API +Obtener de: https://developers.facebook.com/apps + +```env +META_ACCESS_TOKEN=EAAxxxxxxxxxxxxxxxxxxxxxxxxxxxx +META_PHONE_NUMBER_ID=123456789012345 +META_BUSINESS_ACCOUNT_ID=987654321098765 +META_WEBHOOK_VERIFY_TOKEN=tu-token-seguro-aleatorio +META_APP_SECRET=tu-app-secret-de-meta +``` + +### OpenAI +Obtener de: https://platform.openai.com/api-keys + +```env +OPENAI_API_KEY=sk-proj-xxxxxxxxxxxxxxxxxxxxxxxxxxxx +``` + +### ERPNext +```env +ERPNEXT_BASE_URL=https://tu-instancia.erpnext.com +ERPNEXT_API_KEY=xxxxxxxxxxxxxxxx +ERPNEXT_API_SECRET=xxxxxxxxxxxxxxxx +``` + +**IMPORTANTE:** Después de configurar las credenciales: +1. Regenerar embeddings: `python scripts/seed_knowledge.py` +2. Validar conexiones: `python scripts/validate_setup.py` +3. Reiniciar servicios: `systemctl restart skeen-api skeen-worker` + +--- + +## 7. Operación Diaria + +### Comandos útiles + +```bash +# Ver estado de todos los servicios +systemctl status skeen-api skeen-worker skeen-scheduler +docker ps + +# Ver logs en tiempo real +journalctl -u skeen-api -f +journalctl -u skeen-worker -f + +# Reiniciar servicios +systemctl restart skeen-api skeen-worker skeen-scheduler + +# Validar conexiones externas +cd /root/Skeen-CRM && source .venv/bin/activate && python scripts/validate_setup.py + +# Ver métricas Prometheus +curl http://localhost:8000/metrics + +# Acceder a PostgreSQL +docker exec -it skeen-postgres psql -U skeen -d skeen_crm + +# Acceder a Redis +docker exec -it skeen-redis redis-cli +``` + +### Configuración del Webhook en Meta + +1. Ir a https://developers.facebook.com/apps/[TU_APP_ID]/whatsapp-business/waba-de-configuracion/ +2. En **Webhooks**, configurar: + - **Callback URL:** `http://192.168.10.100:8000/api/v1/webhooks/whatsapp` + - **Verify Token:** El valor de `META_WEBHOOK_VERIFY_TOKEN` en `.env` + - **Subscription:** `messages` +3. Si la VM no tiene IP pública estática, usar ngrok o Cloudflare Tunnel para exponer el puerto 8000. + +### Backup de la base de datos + +```bash +# Backup PostgreSQL +docker exec skeen-postgres pg_dump -U skeen skeen_crm > /root/backups/skeen_crm_$(date +%Y%m%d_%H%M%S).sql + +# Backup Redis +docker exec skeen-redis redis-cli BGSAVE +cp /var/lib/docker/volumes/skeen-crm_redis_data/_data/dump.rdb /root/backups/redis_$(date +%Y%m%d_%H%M%S).rdb + +# Backup Gitea +docker exec gitea sh -c "su git -c 'gitea dump -c /data/gitea/conf/app.ini -f /data/gitea-dump.zip'" +cp /var/lib/docker/volumes/gitea_data/_data/gitea-dump.zip /root/backups/gitea_$(date +%Y%m%d_%H%M%S).zip +``` + +--- + +## 8. Integraciones + +### ERPNext Healthcare + +El agente se conecta a ERPNext via API REST de Frappe. Doctypes utilizados: + +| Doctype | Operación | Endpoint | +|---------|-----------|----------| +| Patient | CRUD | `/api/resource/Patient` | +| Healthcare Practitioner | Listar | `/api/resource/Healthcare Practitioner` | +| Patient Appointment | CRUD | `/api/resource/Patient Appointment` | +| Clinical Procedure Template | Listar | `/api/resource/Clinical Procedure Template` | +| Patient Wallet | Consultar | `/api/resource/Patient Wallet` | + +### OpenAI GPT-4o + +- **Modelo:** `gpt-4o` +- **Temperatura:** 0.3 +- **Max tokens:** 1500 +- **Embeddings:** `text-embedding-3-small` (1536 dimensiones) +- **Function Calling:** 6 tools disponibles + +### Meta WhatsApp Business API + +- **Versión:** v18.0 +- **Endpoint base:** `https://graph.facebook.com/v18.0/` +- **Phone Number ID:** Configurado en `.env` +- **Tipos de mensaje soportados:** Texto, Template, Botones interactivos, Listas interactivas +- **Verificación:** HMAC-SHA256 en webhooks (producción) + +--- + +## 9. Troubleshooting + +### El agente no responde mensajes de WhatsApp + +1. Verificar que el webhook está configurado correctamente en Meta +2. Revisar logs: `journalctl -u skeen-api -n 100` +3. Validar token de Meta: `python scripts/validate_setup.py` +4. Verificar que Celery worker está corriendo: `systemctl status skeen-worker` + +### Error de conexión a ERPNext + +1. Verificar URL y credenciales en `.env` +2. Probar conexión: `curl -u "api_key:api_secret" https://tu-erpnext.com/api/method/frappe.auth.get_logged_user` +3. Revisar firewall entre VM y ERPNext + +### RAG no encuentra resultados + +1. Verificar que `OPENAI_API_KEY` está configurado +2. Regenerar embeddings: `python scripts/seed_knowledge.py` +3. Verificar tabla `knowledge_chunks` en PostgreSQL + +### PostgreSQL no responde + +```bash +docker compose ps +docker compose logs postgres +docker compose restart postgres +``` + +### Out of Memory + +La VM tiene 15 GB RAM. Si hay problemas: +- Reducir `CELERY_WORKER_CONCURRENCY` a 2 +- Reducir workers de Uvicorn a 1 +- Verificar con `free -h` y `htop` + +--- + +## 10. Seguridad + +- `.env` contiene secretos. **Nunca commitear.** +- El archivo ya está en `.gitignore`. +- Webhooks validan firma HMAC-SHA256 en producción (`APP_ENV=production`). +- Logs no exponen datos PHI (solo últimos 4 dígitos de teléfono). +- Gitea está configurado con acceso privado. +- Se recomienda configurar firewall `ufw`: + +```bash +ufw default deny incoming +ufw allow 22/tcp # SSH +ufw allow 8000/tcp # API (o restringir a IPs de Meta) +ufw allow 3000/tcp # Gitea +ufw enable +``` + +--- + +## 11. Próximos pasos recomendados + +1. **Configurar credenciales reales** en `.env` +2. **Configurar webhook en Meta Developers** +3. **Configurar dominio y HTTPS** (Caddy o Nginx + Let's Encrypt) +4. **Configurar backups automatizados** (cron diario) +5. **Configurar monitoreo** (Grafana + Prometheus) +6. **Integrar pasarela de pagos** (Stripe/MercadoPago webhooks) +7. **Entrenar al equipo** de SKEEN en el uso del agente + +--- + +## 12. Contacto y soporte + +- **Repositorio:** http://192.168.10.100:3000/skeen-admin/Skeen-CRM +- **Documentación:** En este repositorio, carpeta `docs/` +- **Admin Gitea:** `skeen-admin` / `skeen-admin-2024` + +--- + +*Documento generado automáticamente por SKEEN Development Team.* diff --git a/pyproject.toml b/pyproject.toml index 26708ae..31e6003 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,6 +63,9 @@ dev = [ requires = ["hatchling"] build-backend = "hatchling.build" +[tool.hatch.build.targets.wheel] +packages = ["src"] + [tool.ruff] target-version = "py312" line-length = 100 diff --git a/scripts/seed_knowledge.py b/scripts/seed_knowledge.py index e8c8680..6c40af9 100644 --- a/scripts/seed_knowledge.py +++ b/scripts/seed_knowledge.py @@ -222,7 +222,9 @@ async def seed_knowledge_base() -> None: async with AsyncSessionLocal() as session: # Create table if not exists from sqlalchemy import text - await session.execute(text(CREATE_KNOWLEDGE_TABLE_SQL)) + statements = [s.strip() for s in CREATE_KNOWLEDGE_TABLE_SQL.strip().split(";") if s.strip()] + for stmt in statements: + await session.execute(text(stmt + ";")) await session.commit() async with AsyncSessionLocal() as session: @@ -234,20 +236,59 @@ async def seed_knowledge_base() -> None: await rag.delete_by_source("catalogo_paquetes") await rag.delete_by_source("faq_general") + # Check if OpenAI is configured + from src.config import settings + openai_configured = bool(settings.OPENAI_API_KEY.get_secret_value()) and not settings.OPENAI_API_KEY.get_secret_value().startswith("sk-xxxxx") + total = len(SKEEN_KNOWLEDGE) for i, item in enumerate(SKEEN_KNOWLEDGE, 1): - doc_id = await rag.add_document( - content=item["content"], - category=item["category"], - source=item["source"], - ) - print(f" [{i}/{total}] {item['category'].upper():12} → {doc_id[:8]}...") + try: + doc_id = await rag.add_document( + content=item["content"], + category=item["category"], + source=item["source"], + ) + print(f" [{i}/{total}] {item['category'].upper():12} → {doc_id[:8]}...") + except Exception as exc: + # Fallback: insert with zero vector if OpenAI fails + from sqlalchemy import text + import json + zero_vector = "[" + ",".join(["0.0"] * settings.VECTOR_DIMENSION) + "]" + result = await session.execute( + text(""" + INSERT INTO knowledge_chunks (id, content, metadata, category, source, embedding) + VALUES (gen_random_uuid()::text, :content, :metadata, :category, :source, CAST(:embedding AS vector)) + RETURNING id + """), + { + "content": item["content"], + "metadata": json.dumps({}), + "category": item["category"], + "source": item["source"], + "embedding": zero_vector, + }, + ) + row = result.mappings().first() + doc_id = row["id"] if row else "unknown" + print(f" [{i}/{total}] {item['category'].upper():12} → {doc_id[:8]}... (sin embedding)") - print(f"\n✅ Knowledge base seeded with {total} documents.") + if not openai_configured: + print(f"\n⚠️ Knowledge base seeded with {total} documents BUT WITHOUT EMBEDDINGS.") + print(" Set OPENAI_API_KEY in .env and re-run: python scripts/seed_knowledge.py") + else: + print(f"\n✅ Knowledge base seeded with {total} documents.") async def verify_search() -> None: """Quick verification search.""" + from src.config import settings + openai_configured = bool(settings.OPENAI_API_KEY.get_secret_value()) and not settings.OPENAI_API_KEY.get_secret_value().startswith("sk-xxxxx") + + if not openai_configured: + print("\n🔍 Verification search skipped (OpenAI API key not configured).") + print(" Re-run after setting OPENAI_API_KEY to test semantic search.") + return + print("\n🔍 Running verification searches...") async with AsyncSessionLocal() as session: