feat: initial Skeen-CRM AI Agent architecture

- FastAPI + Python 3.12 backend - Meta WhatsApp Business API client (official) - OpenAI GPT-4o with function calling - RAG vector store with pgvector - ERPNext Frappe REST client - Celery + Redis async task queue - PostgreSQL with migrations (Alembic) - Docker Compose full stack - Enterprise logging, metrics, health checks
2026-04-29 05:30:59 +00:00
commit d30b22b50c
44 changed files with 3603 additions and 0 deletions
--- a/alembic/README
+++ b/alembic/README
@@ -0,0 +1 @@
+Generic single-database configuration.
--- a/alembic/env.py
+++ b/alembic/env.py
@@ -0,0 +1,76 @@
+import asyncio
+from logging.config import fileConfig
+
+from sqlalchemy import pool
+from sqlalchemy.engine import Connection
+from sqlalchemy.ext.asyncio import async_engine_from_config
+
+from alembic import context
+from src.config import settings
+from src.infrastructure.db import Base
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+target_metadata = Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+config.set_main_option("sqlalchemy.url", settings.DATABASE_URL)
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode."""
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def do_run_migrations(connection: Connection) -> None:
+    context.configure(connection=connection, target_metadata=target_metadata)
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+async def run_async_migrations() -> None:
+    """In this scenario we need to create an Engine
+    and associate a connection with the context.
+    """
+    connectable = async_engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    async with connectable.connect() as connection:
+        await connection.run_sync(do_run_migrations)
+
+    await connectable.dispose()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode."""
+    asyncio.run(run_async_migrations())
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
--- a/alembic/script.py.mako
+++ b/alembic/script.py.mako
@@ -0,0 +1,26 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    ${downgrades if downgrades else "pass"}
--- a/alembic/versions/20260428_init.py
+++ b/alembic/versions/20260428_init.py
@@ -0,0 +1,100 @@
+"""Initial migration: conversations, messages, knowledge_chunks.
+
+Revision ID: 001
+Revises:
+Create Date: 2026-04-28 00:00:00.000000
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision: str = "001"
+down_revision: Union[str, None] = None
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # Enable pgvector extension
+    op.execute("CREATE EXTENSION IF NOT EXISTS vector")
+
+    # Conversations table
+    op.create_table(
+        "conversations",
+        sa.Column("id", sa.String(36), primary_key=True),
+        sa.Column("phone_number", sa.String(20), nullable=False, index=True),
+        sa.Column("patient_id", sa.String(100), nullable=True, index=True),
+        sa.Column("patient_name", sa.String(255), nullable=True),
+        sa.Column(
+            "status",
+            sa.Enum("active", "paused", "resolved", "escalated", "appointment_confirmed", name="conversationstatus"),
+            nullable=False,
+            server_default="active",
+        ),
+        sa.Column("context", postgresql.JSONB(astext_type=sa.Text()), server_default="{}"),
+        sa.Column("last_message_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
+        sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), onupdate=sa.func.now()),
+    )
+
+    # Messages table
+    op.create_table(
+        "messages",
+        sa.Column("id", sa.String(36), primary_key=True),
+        sa.Column("conversation_id", sa.String(36), nullable=False, index=True),
+        sa.Column("direction", sa.String(10), nullable=False),
+        sa.Column("role", sa.String(20), nullable=False),
+        sa.Column("message_type", sa.String(50), server_default="text"),
+        sa.Column("content", sa.Text(), nullable=False),
+        sa.Column("whatsapp_message_id", sa.String(100), nullable=True),
+        sa.Column("tool_calls", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        sa.Column("tool_results", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
+        sa.Column("tokens_used", sa.Integer(), server_default="0"),
+        sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), server_default="{}"),
+        sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
+    )
+
+    # Knowledge chunks table (for RAG)
+    op.create_table(
+        "knowledge_chunks",
+        sa.Column("id", sa.String(36), primary_key=True, server_default=sa.text("gen_random_uuid()::text")),
+        sa.Column("content", sa.Text(), nullable=False),
+        sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), server_default="{}"),
+        sa.Column("category", sa.String(50), server_default="general"),
+        sa.Column("source", sa.String(255), server_default=""),
+        sa.Column("embedding", sa.String(), nullable=True),  # Stored as string; pgvector uses special type
+        sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
+        sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
+    )
+
+    # Create pgvector column properly using raw SQL
+    op.execute("""
+        ALTER TABLE knowledge_chunks
+        ALTER COLUMN embedding TYPE vector(1536)
+        USING embedding::vector(1536)
+    """)
+
+    # Indexes
+    op.create_index("idx_knowledge_category", "knowledge_chunks", ["category"])
+    op.create_index("idx_knowledge_source", "knowledge_chunks", ["source"])
+    op.execute("""
+        CREATE INDEX idx_knowledge_embedding
+        ON knowledge_chunks
+        USING ivfflat (embedding vector_cosine_ops)
+        WITH (lists = 100)
+    """)
+
+
+def downgrade() -> None:
+    op.drop_index("idx_knowledge_embedding", table_name="knowledge_chunks")
+    op.drop_index("idx_knowledge_source", table_name="knowledge_chunks")
+    op.drop_index("idx_knowledge_category", table_name="knowledge_chunks")
+    op.drop_table("knowledge_chunks")
+    op.drop_table("messages")
+    op.drop_table("conversations")
+    op.execute("DROP TYPE IF EXISTS conversationstatus")
+    op.execute("DROP EXTENSION IF EXISTS vector")