docs: manual de producción completo + ajustes seed fallback
- Add production manual (docs/MANUAL_PRODUCCION.md) - Fix seed script to work without OpenAI API key (zero-vector fallback) - Fix alembic env to use database_url_str - Fix pyproject.toml hatch build config
This commit is contained in:
@@ -222,7 +222,9 @@ async def seed_knowledge_base() -> None:
|
||||
async with AsyncSessionLocal() as session:
|
||||
# Create table if not exists
|
||||
from sqlalchemy import text
|
||||
await session.execute(text(CREATE_KNOWLEDGE_TABLE_SQL))
|
||||
statements = [s.strip() for s in CREATE_KNOWLEDGE_TABLE_SQL.strip().split(";") if s.strip()]
|
||||
for stmt in statements:
|
||||
await session.execute(text(stmt + ";"))
|
||||
await session.commit()
|
||||
|
||||
async with AsyncSessionLocal() as session:
|
||||
@@ -234,20 +236,59 @@ async def seed_knowledge_base() -> None:
|
||||
await rag.delete_by_source("catalogo_paquetes")
|
||||
await rag.delete_by_source("faq_general")
|
||||
|
||||
# Check if OpenAI is configured
|
||||
from src.config import settings
|
||||
openai_configured = bool(settings.OPENAI_API_KEY.get_secret_value()) and not settings.OPENAI_API_KEY.get_secret_value().startswith("sk-xxxxx")
|
||||
|
||||
total = len(SKEEN_KNOWLEDGE)
|
||||
for i, item in enumerate(SKEEN_KNOWLEDGE, 1):
|
||||
doc_id = await rag.add_document(
|
||||
content=item["content"],
|
||||
category=item["category"],
|
||||
source=item["source"],
|
||||
)
|
||||
print(f" [{i}/{total}] {item['category'].upper():12} → {doc_id[:8]}...")
|
||||
try:
|
||||
doc_id = await rag.add_document(
|
||||
content=item["content"],
|
||||
category=item["category"],
|
||||
source=item["source"],
|
||||
)
|
||||
print(f" [{i}/{total}] {item['category'].upper():12} → {doc_id[:8]}...")
|
||||
except Exception as exc:
|
||||
# Fallback: insert with zero vector if OpenAI fails
|
||||
from sqlalchemy import text
|
||||
import json
|
||||
zero_vector = "[" + ",".join(["0.0"] * settings.VECTOR_DIMENSION) + "]"
|
||||
result = await session.execute(
|
||||
text("""
|
||||
INSERT INTO knowledge_chunks (id, content, metadata, category, source, embedding)
|
||||
VALUES (gen_random_uuid()::text, :content, :metadata, :category, :source, CAST(:embedding AS vector))
|
||||
RETURNING id
|
||||
"""),
|
||||
{
|
||||
"content": item["content"],
|
||||
"metadata": json.dumps({}),
|
||||
"category": item["category"],
|
||||
"source": item["source"],
|
||||
"embedding": zero_vector,
|
||||
},
|
||||
)
|
||||
row = result.mappings().first()
|
||||
doc_id = row["id"] if row else "unknown"
|
||||
print(f" [{i}/{total}] {item['category'].upper():12} → {doc_id[:8]}... (sin embedding)")
|
||||
|
||||
print(f"\n✅ Knowledge base seeded with {total} documents.")
|
||||
if not openai_configured:
|
||||
print(f"\n⚠️ Knowledge base seeded with {total} documents BUT WITHOUT EMBEDDINGS.")
|
||||
print(" Set OPENAI_API_KEY in .env and re-run: python scripts/seed_knowledge.py")
|
||||
else:
|
||||
print(f"\n✅ Knowledge base seeded with {total} documents.")
|
||||
|
||||
|
||||
async def verify_search() -> None:
|
||||
"""Quick verification search."""
|
||||
from src.config import settings
|
||||
openai_configured = bool(settings.OPENAI_API_KEY.get_secret_value()) and not settings.OPENAI_API_KEY.get_secret_value().startswith("sk-xxxxx")
|
||||
|
||||
if not openai_configured:
|
||||
print("\n🔍 Verification search skipped (OpenAI API key not configured).")
|
||||
print(" Re-run after setting OPENAI_API_KEY to test semantic search.")
|
||||
return
|
||||
|
||||
print("\n🔍 Running verification searches...")
|
||||
|
||||
async with AsyncSessionLocal() as session:
|
||||
|
||||
Reference in New Issue
Block a user