docs: manual de producción completo + ajustes seed fallback

- Add production manual (docs/MANUAL_PRODUCCION.md)
- Fix seed script to work without OpenAI API key (zero-vector fallback)
- Fix alembic env to use database_url_str
- Fix pyproject.toml hatch build config
This commit is contained in:
root
2026-04-29 05:51:27 +00:00
parent 5740d94295
commit be2dbbc194
4 changed files with 454 additions and 9 deletions

View File

@@ -222,7 +222,9 @@ async def seed_knowledge_base() -> None:
async with AsyncSessionLocal() as session:
# Create table if not exists
from sqlalchemy import text
await session.execute(text(CREATE_KNOWLEDGE_TABLE_SQL))
statements = [s.strip() for s in CREATE_KNOWLEDGE_TABLE_SQL.strip().split(";") if s.strip()]
for stmt in statements:
await session.execute(text(stmt + ";"))
await session.commit()
async with AsyncSessionLocal() as session:
@@ -234,20 +236,59 @@ async def seed_knowledge_base() -> None:
await rag.delete_by_source("catalogo_paquetes")
await rag.delete_by_source("faq_general")
# Check if OpenAI is configured
from src.config import settings
openai_configured = bool(settings.OPENAI_API_KEY.get_secret_value()) and not settings.OPENAI_API_KEY.get_secret_value().startswith("sk-xxxxx")
total = len(SKEEN_KNOWLEDGE)
for i, item in enumerate(SKEEN_KNOWLEDGE, 1):
doc_id = await rag.add_document(
content=item["content"],
category=item["category"],
source=item["source"],
)
print(f" [{i}/{total}] {item['category'].upper():12}{doc_id[:8]}...")
try:
doc_id = await rag.add_document(
content=item["content"],
category=item["category"],
source=item["source"],
)
print(f" [{i}/{total}] {item['category'].upper():12}{doc_id[:8]}...")
except Exception as exc:
# Fallback: insert with zero vector if OpenAI fails
from sqlalchemy import text
import json
zero_vector = "[" + ",".join(["0.0"] * settings.VECTOR_DIMENSION) + "]"
result = await session.execute(
text("""
INSERT INTO knowledge_chunks (id, content, metadata, category, source, embedding)
VALUES (gen_random_uuid()::text, :content, :metadata, :category, :source, CAST(:embedding AS vector))
RETURNING id
"""),
{
"content": item["content"],
"metadata": json.dumps({}),
"category": item["category"],
"source": item["source"],
"embedding": zero_vector,
},
)
row = result.mappings().first()
doc_id = row["id"] if row else "unknown"
print(f" [{i}/{total}] {item['category'].upper():12}{doc_id[:8]}... (sin embedding)")
print(f"\n✅ Knowledge base seeded with {total} documents.")
if not openai_configured:
print(f"\n⚠️ Knowledge base seeded with {total} documents BUT WITHOUT EMBEDDINGS.")
print(" Set OPENAI_API_KEY in .env and re-run: python scripts/seed_knowledge.py")
else:
print(f"\n✅ Knowledge base seeded with {total} documents.")
async def verify_search() -> None:
"""Quick verification search."""
from src.config import settings
openai_configured = bool(settings.OPENAI_API_KEY.get_secret_value()) and not settings.OPENAI_API_KEY.get_secret_value().startswith("sk-xxxxx")
if not openai_configured:
print("\n🔍 Verification search skipped (OpenAI API key not configured).")
print(" Re-run after setting OPENAI_API_KEY to test semantic search.")
return
print("\n🔍 Running verification searches...")
async with AsyncSessionLocal() as session: