Initial commit: Full Crawl API implementation
This commit is contained in:
141
.github/workflows/ci.yml
vendored
Normal file
141
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
name: CI
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main, develop]
|
||||||
|
pull_request:
|
||||||
|
branches: [main]
|
||||||
|
|
||||||
|
env:
|
||||||
|
CARGO_TERM_COLOR: always
|
||||||
|
DATABASE_URL: postgres://crawlapi:crawlapi@localhost:5432/crawlapi
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
name: Test
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
services:
|
||||||
|
postgres:
|
||||||
|
image: postgres:16-alpine
|
||||||
|
env:
|
||||||
|
POSTGRES_USER: crawlapi
|
||||||
|
POSTGRES_PASSWORD: crawlapi
|
||||||
|
POSTGRES_DB: crawlapi
|
||||||
|
options: >-
|
||||||
|
--health-cmd pg_isready
|
||||||
|
--health-interval 10s
|
||||||
|
--health-timeout 5s
|
||||||
|
--health-retries 5
|
||||||
|
ports:
|
||||||
|
- 5432:5432
|
||||||
|
redis:
|
||||||
|
image: redis:7-alpine
|
||||||
|
options: >-
|
||||||
|
--health-cmd "redis-cli ping"
|
||||||
|
--health-interval 10s
|
||||||
|
--health-timeout 5s
|
||||||
|
--health-retries 5
|
||||||
|
ports:
|
||||||
|
- 6379:6379
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup Rust
|
||||||
|
uses: dtolnay/rust-action@stable
|
||||||
|
|
||||||
|
- name: Setup Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: '20'
|
||||||
|
|
||||||
|
- name: Cache cargo
|
||||||
|
uses: Swatinem/rust-cache@v2
|
||||||
|
|
||||||
|
- name: Install sqlx-cli
|
||||||
|
run: cargo install sqlx-cli --no-default-features --features native-tls,postgres
|
||||||
|
|
||||||
|
- name: Run migrations
|
||||||
|
run: sqlx migrate run --source crates/db/migrations
|
||||||
|
|
||||||
|
- name: Check formatting
|
||||||
|
run: cargo fmt --all -- --check
|
||||||
|
|
||||||
|
- name: Run clippy
|
||||||
|
run: cargo clippy --all-targets --all-features -- -D warnings
|
||||||
|
|
||||||
|
- name: Run tests
|
||||||
|
run: cargo test --workspace
|
||||||
|
|
||||||
|
- name: Audit dependencies
|
||||||
|
run: cargo install cargo-audit && cargo audit
|
||||||
|
|
||||||
|
- name: Build release
|
||||||
|
run: cargo build --release
|
||||||
|
|
||||||
|
- name: Build Docker images
|
||||||
|
run: |
|
||||||
|
docker build -f Dockerfile.api -t crawlapi/api:test .
|
||||||
|
docker build -f Dockerfile.worker -t crawlapi/worker:test .
|
||||||
|
|
||||||
|
- name: Install Playwright
|
||||||
|
run: |
|
||||||
|
cd playwright && npm install && npx playwright install chromium
|
||||||
|
|
||||||
|
- name: Run E2E tests
|
||||||
|
run: |
|
||||||
|
cd e2e && npm install && npx playwright test
|
||||||
|
env:
|
||||||
|
API_URL: http://localhost:3000
|
||||||
|
|
||||||
|
build-and-push:
|
||||||
|
name: Build & Push
|
||||||
|
needs: test
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: github.ref == 'refs/heads/main'
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Login to Docker Hub
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
username: ${{ secrets.DOCKER_USERNAME }}
|
||||||
|
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||||
|
|
||||||
|
- name: Build and push API
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
file: ./Dockerfile.api
|
||||||
|
push: true
|
||||||
|
tags: |
|
||||||
|
crawlapi/api:${{ github.sha }}
|
||||||
|
crawlapi/api:latest
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
|
- name: Build and push Worker
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
file: ./Dockerfile.worker
|
||||||
|
push: true
|
||||||
|
tags: |
|
||||||
|
crawlapi/worker:${{ github.sha }}
|
||||||
|
crawlapi/worker:latest
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
|
- name: Build and push Frontend
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
file: ./Dockerfile.frontend
|
||||||
|
push: true
|
||||||
|
tags: |
|
||||||
|
crawlapi/frontend:${{ github.sha }}
|
||||||
|
crawlapi/frontend:latest
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
88
.github/workflows/deploy.yml
vendored
Normal file
88
.github/workflows/deploy.yml
vendored
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
name: Deploy
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
tags: ['v*']
|
||||||
|
|
||||||
|
env:
|
||||||
|
KUBECONFIG: ${{ github.workspace }}/kubeconfig
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
deploy-staging:
|
||||||
|
name: Deploy to Staging
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
environment: staging
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup kubectl
|
||||||
|
uses: azure/setup-kubectl@v4
|
||||||
|
with:
|
||||||
|
version: 'v1.29.0'
|
||||||
|
|
||||||
|
- name: Setup Helm
|
||||||
|
uses: azure/setup-helm@v4
|
||||||
|
with:
|
||||||
|
version: '3.14.0'
|
||||||
|
|
||||||
|
- name: Configure kubectl
|
||||||
|
run: |
|
||||||
|
echo "${{ secrets.KUBE_CONFIG_STAGING }}" | base64 -d > kubeconfig
|
||||||
|
|
||||||
|
- name: Deploy to staging
|
||||||
|
run: |
|
||||||
|
kubectl set image deployment/api api=crawlapi/api:${{ github.sha }} -n crawlapi-staging
|
||||||
|
kubectl set image deployment/worker worker=crawlapi/worker:${{ github.sha }} -n crawlapi-staging
|
||||||
|
kubectl set image deployment/frontend frontend=crawlapi/frontend:${{ github.sha }} -n crawlapi-staging
|
||||||
|
kubectl rollout status deployment/api -n crawlapi-staging --timeout=300s
|
||||||
|
kubectl rollout status deployment/worker -n crawlapi-staging --timeout=300s
|
||||||
|
|
||||||
|
- name: Run smoke tests
|
||||||
|
run: |
|
||||||
|
curl -sf https://staging.crawlapi.dev/metrics || exit 1
|
||||||
|
curl -sf -X POST https://staging.crawlapi.dev/api/content \
|
||||||
|
-H "x-api-key: ${{ secrets.STAGING_API_KEY }}" \
|
||||||
|
-d '{"url":"https://example.com"}' || exit 1
|
||||||
|
|
||||||
|
deploy-production:
|
||||||
|
name: Deploy to Production
|
||||||
|
needs: deploy-staging
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
environment: production
|
||||||
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup kubectl
|
||||||
|
uses: azure/setup-kubectl@v4
|
||||||
|
with:
|
||||||
|
version: 'v1.29.0'
|
||||||
|
|
||||||
|
- name: Configure kubectl
|
||||||
|
run: |
|
||||||
|
echo "${{ secrets.KUBE_CONFIG_PRODUCTION }}" | base64 -d > kubeconfig
|
||||||
|
|
||||||
|
- name: Deploy to production
|
||||||
|
run: |
|
||||||
|
kubectl set image deployment/api api=crawlapi/api:${{ github.sha }} -n crawlapi
|
||||||
|
kubectl set image deployment/worker worker=crawlapi/worker:${{ github.sha }} -n crawlapi
|
||||||
|
kubectl set image deployment/frontend frontend=crawlapi/frontend:${{ github.sha }} -n crawlapi
|
||||||
|
kubectl rollout status deployment/api -n crawlapi --timeout=300s
|
||||||
|
kubectl rollout status deployment/worker -n crawlapi --timeout=300s
|
||||||
|
|
||||||
|
- name: Verify deployment
|
||||||
|
run: |
|
||||||
|
kubectl get pods -n crawlapi
|
||||||
|
curl -sf https://api.crawlapi.dev/metrics || exit 1
|
||||||
|
|
||||||
|
- name: Notify on failure
|
||||||
|
if: failure()
|
||||||
|
uses: slackapi/slack-github-action@v1
|
||||||
|
with:
|
||||||
|
payload: |
|
||||||
|
{
|
||||||
|
"text": "🚨 Production deploy failed for Crawl API ${{ github.sha }}"
|
||||||
|
}
|
||||||
|
env:
|
||||||
|
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
|
||||||
46
.gitignore
vendored
Normal file
46
.gitignore
vendored
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
# Rust
|
||||||
|
target/
|
||||||
|
Cargo.lock
|
||||||
|
*.rs.bk
|
||||||
|
|
||||||
|
# Node
|
||||||
|
node_modules/
|
||||||
|
playwright/node_modules/
|
||||||
|
frontend/node_modules/
|
||||||
|
e2e/node_modules/
|
||||||
|
load-tests/node_modules/
|
||||||
|
*.log
|
||||||
|
npm-debug.log*
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
.env.*.local
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Playwright
|
||||||
|
test-results/
|
||||||
|
playwright-report/
|
||||||
|
playwright/.cache/
|
||||||
|
|
||||||
|
# Next.js
|
||||||
|
frontend/.next/
|
||||||
|
frontend/out/
|
||||||
|
frontend/dist/
|
||||||
|
|
||||||
|
# Tests
|
||||||
|
coverage/
|
||||||
|
|
||||||
|
# Temporary files
|
||||||
|
/tmp/
|
||||||
|
*.tmp
|
||||||
32
Cargo.toml
Normal file
32
Cargo.toml
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
[workspace]
|
||||||
|
members = ["crates/api", "crates/worker", "crates/shared", "crates/db"]
|
||||||
|
resolver = "2"
|
||||||
|
|
||||||
|
[workspace.dependencies]
|
||||||
|
tokio = { version = "1.40", features = ["full"] }
|
||||||
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
serde_json = "1.0"
|
||||||
|
axum = "0.7"
|
||||||
|
tower = "0.5"
|
||||||
|
tower-http = { version = "0.6", features = ["cors", "trace"] }
|
||||||
|
tracing = "0.1"
|
||||||
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||||
|
sqlx = { version = "0.8", features = ["runtime-tokio", "tls-rustls", "postgres", "uuid", "chrono", "migrate", "macros"] }
|
||||||
|
redis = { version = "0.27", features = ["tokio-comp", "json"] }
|
||||||
|
uuid = { version = "1.10", features = ["v4", "serde"] }
|
||||||
|
chrono = { version = "0.4", features = ["serde"] }
|
||||||
|
thiserror = "2.0"
|
||||||
|
anyhow = "1.0"
|
||||||
|
config = "0.14"
|
||||||
|
aws-config = "1.5"
|
||||||
|
aws-sdk-s3 = "1.58"
|
||||||
|
reqwest = { version = "0.12", features = ["json"] }
|
||||||
|
bcrypt = "0.15"
|
||||||
|
jsonwebtoken = "9.3"
|
||||||
|
argon2 = "0.5"
|
||||||
|
tokio-util = "0.7"
|
||||||
|
futures = "0.3"
|
||||||
|
regex = "1.11"
|
||||||
|
url = "2.5"
|
||||||
|
markdown = "1.0"
|
||||||
|
scraper = "0.22"
|
||||||
28
Dockerfile.api
Normal file
28
Dockerfile.api
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
FROM rust:1.82-bookworm AS builder
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY Cargo.toml Cargo.lock ./
|
||||||
|
COPY crates/shared crates/shared
|
||||||
|
COPY crates/db crates/db
|
||||||
|
COPY crates/api crates/api
|
||||||
|
|
||||||
|
RUN cargo build --release -p api
|
||||||
|
|
||||||
|
FROM debian:bookworm-slim
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
ca-certificates curl libssl3 \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install Node.js for Playwright script
|
||||||
|
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
||||||
|
&& apt-get install -y nodejs \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY --from=builder /app/target/release/api /usr/local/bin/api
|
||||||
|
COPY playwright /app/playwright
|
||||||
|
RUN cd /app/playwright && npm install && npx playwright install chromium
|
||||||
|
|
||||||
|
EXPOSE 3000
|
||||||
|
CMD ["api"]
|
||||||
15
Dockerfile.frontend
Normal file
15
Dockerfile.frontend
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
FROM node:20-alpine AS builder
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY frontend/package.json frontend/package-lock.json* ./
|
||||||
|
RUN npm install
|
||||||
|
|
||||||
|
COPY frontend/ .
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
FROM nginx:alpine
|
||||||
|
COPY --from=builder /app/dist /usr/share/nginx/html
|
||||||
|
COPY --from=builder /app/out /usr/share/nginx/html
|
||||||
|
|
||||||
|
EXPOSE 80
|
||||||
|
CMD ["nginx", "-g", "daemon off;"]
|
||||||
22
Dockerfile.worker
Normal file
22
Dockerfile.worker
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
FROM rust:1.82-bookworm AS builder
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY Cargo.toml Cargo.lock ./
|
||||||
|
COPY crates/shared crates/shared
|
||||||
|
COPY crates/db crates/db
|
||||||
|
COPY crates/worker crates/worker
|
||||||
|
|
||||||
|
RUN cargo build --release -p worker
|
||||||
|
|
||||||
|
FROM debian:bookworm-slim
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
ca-certificates curl libssl3 nodejs npm \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY --from=builder /app/target/release/worker /usr/local/bin/worker
|
||||||
|
COPY playwright /app/playwright
|
||||||
|
RUN cd /app/playwright && npm install && npx playwright install chromium
|
||||||
|
|
||||||
|
CMD ["worker"]
|
||||||
328
README.md
Normal file
328
README.md
Normal file
@@ -0,0 +1,328 @@
|
|||||||
|
# Crawl API — Headless Browser REST API
|
||||||
|
|
||||||
|
Recreación de [crawlapi.dev](https://crawlapi.dev) en Rust Full-Stack.
|
||||||
|
|
||||||
|
## Stack
|
||||||
|
|
||||||
|
- **Backend**: Axum (Rust)
|
||||||
|
- **Database**: PostgreSQL + sqlx
|
||||||
|
- **Queue**: Redis (jobs + caching + rate limiting)
|
||||||
|
- **Browser Automation**: Playwright (Node.js) con Browser Pool
|
||||||
|
- **File Storage**: MinIO (S3-compatible)
|
||||||
|
- **Frontend**: Next.js 14
|
||||||
|
- **Observabilidad**: Prometheus + Grafana + Sentry
|
||||||
|
- **Seguridad**: Rate limiting + IP blocking + input validation
|
||||||
|
- **AI**: OpenAI GPT-4o-mini extraction
|
||||||
|
- **Auth**: Email/password + Google OAuth
|
||||||
|
- **Billing**: Stripe Checkout + Webhooks
|
||||||
|
- **CI/CD**: GitHub Actions (test, build, deploy)
|
||||||
|
- **Infra**: Docker Compose + Kubernetes + HPA + cert-manager
|
||||||
|
- **Load Testing**: k6 (smoke, load, stress, screenshot)
|
||||||
|
|
||||||
|
## Estructura
|
||||||
|
|
||||||
|
```
|
||||||
|
crawlapi/
|
||||||
|
├── crates/
|
||||||
|
│ ├── api/ # Servidor REST (Axum) + seed script
|
||||||
|
│ ├── worker/ # Worker distribuido con Redis queue
|
||||||
|
│ ├── shared/ # Tipos y config compartidos
|
||||||
|
│ └── db/ # Capa de base de datos + migraciones
|
||||||
|
├── playwright/ # Script Node.js con Browser Pool + Stealth + CAPTCHA
|
||||||
|
├── frontend/ # Landing + Playground + Billing + Dashboard + Docs
|
||||||
|
├── e2e/ # Tests E2E con Playwright
|
||||||
|
├── load-tests/ # k6 load testing scripts
|
||||||
|
├── k8s/ # Kubernetes manifests + cert-manager
|
||||||
|
├── legal/ # Terms, Privacy, DPA
|
||||||
|
├── .github/ # GitHub Actions workflows
|
||||||
|
├── docker-compose.yml
|
||||||
|
└── prometheus.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
## Endpoints
|
||||||
|
|
||||||
|
### Crawl/Scrape/AI
|
||||||
|
| Endpoint | Descripción |
|
||||||
|
|----------|-------------|
|
||||||
|
| `POST /api/crawl` | Full JS-rendered page crawl |
|
||||||
|
| `POST /api/content` | Raw HTML |
|
||||||
|
| `POST /api/screenshot` | PNG screenshot (subido a S3) |
|
||||||
|
| `POST /api/pdf` | PDF export (subido a S3) |
|
||||||
|
| `POST /api/markdown` | Markdown extraction |
|
||||||
|
| `POST /api/snapshot` | HTML + screenshot combined |
|
||||||
|
| `POST /api/scrape` | CSS selector extraction |
|
||||||
|
| `POST /api/json` | Structured JSON |
|
||||||
|
| `POST /api/links` | Extract all links |
|
||||||
|
| `POST /api/extract` | AI-powered extraction con OpenAI |
|
||||||
|
|
||||||
|
### Auth
|
||||||
|
| Endpoint | Descripción |
|
||||||
|
|----------|-------------|
|
||||||
|
| `POST /api/auth/register` | Crear cuenta |
|
||||||
|
| `POST /api/auth/login` | Login (devuelve JWT) |
|
||||||
|
| `GET /api/auth/google` | URL de OAuth Google |
|
||||||
|
| `GET /api/auth/google/callback` | Callback de OAuth (real con token exchange) |
|
||||||
|
| `POST /api/auth/api-keys` | Crear API key (requiere JWT) |
|
||||||
|
| `GET /api/auth/api-keys` | Listar API keys (requiere JWT) |
|
||||||
|
| `DELETE /api/auth/api-keys/{id}` | Eliminar API key (requiere JWT) |
|
||||||
|
|
||||||
|
### Billing
|
||||||
|
| Endpoint | Descripción |
|
||||||
|
|----------|-------------|
|
||||||
|
| `POST /api/stripe/checkout` | Crear checkout session funcional |
|
||||||
|
| `POST /api/stripe/webhook` | Webhook de Stripe (procesa eventos reales) |
|
||||||
|
|
||||||
|
### Teams
|
||||||
|
| Endpoint | Descripción |
|
||||||
|
|----------|-------------|
|
||||||
|
| `POST /api/teams` | Crear equipo |
|
||||||
|
| `GET /api/teams/{slug}` | Ver equipo y miembros |
|
||||||
|
| `POST /api/teams/{slug}/members` | Agregar miembro |
|
||||||
|
|
||||||
|
### Observabilidad
|
||||||
|
| Endpoint | Descripción |
|
||||||
|
|----------|-------------|
|
||||||
|
| `GET /metrics` | Métricas Prometheus |
|
||||||
|
| `GET /ws/logs` | WebSocket live logs |
|
||||||
|
|
||||||
|
## Quick Start (Docker)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Iniciar toda la stack
|
||||||
|
cd crawlapi
|
||||||
|
docker-compose up --build
|
||||||
|
|
||||||
|
# 2. Crear seed data (en otra terminal)
|
||||||
|
export DATABASE_URL="postgres://crawlapi:crawlapi@localhost:5432/crawlapi"
|
||||||
|
source "$HOME/.cargo/env"
|
||||||
|
cargo run -p api --bin seed
|
||||||
|
|
||||||
|
# 3. Servicios disponibles:
|
||||||
|
# API: http://localhost:3000
|
||||||
|
# Frontend: http://localhost
|
||||||
|
# MinIO: http://localhost:9001 (minioadmin/minioadmin)
|
||||||
|
# Prometheus: http://localhost:9090
|
||||||
|
# Grafana: http://localhost:3001 (admin/admin)
|
||||||
|
```
|
||||||
|
|
||||||
|
## CI/CD (GitHub Actions)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# En cada push a main:
|
||||||
|
# 1. cargo fmt --check
|
||||||
|
# 2. cargo clippy -- -D warnings
|
||||||
|
# 3. cargo test --workspace
|
||||||
|
# 4. cargo audit
|
||||||
|
# 5. Docker build + push a registry
|
||||||
|
# 6. Deploy a staging
|
||||||
|
# 7. Smoke tests
|
||||||
|
# 8. Deploy a production (solo en tags v*)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Workflows:**
|
||||||
|
- `.github/workflows/ci.yml` — Test, build, push images
|
||||||
|
- `.github/workflows/deploy.yml` — Deploy a staging y production
|
||||||
|
|
||||||
|
## Kubernetes
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Instalar cert-manager primero
|
||||||
|
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.0/cert-manager.yaml
|
||||||
|
|
||||||
|
# Deploy todo
|
||||||
|
cd k8s
|
||||||
|
kubectl apply -f namespace.yaml
|
||||||
|
kubectl apply -f cert-manager.yaml
|
||||||
|
kubectl apply -f secrets.yaml
|
||||||
|
kubectl apply -f postgres.yaml
|
||||||
|
kubectl apply -f redis.yaml
|
||||||
|
kubectl apply -f minio.yaml
|
||||||
|
kubectl apply -f api.yaml
|
||||||
|
kubectl apply -f worker.yaml
|
||||||
|
kubectl apply -f frontend.yaml
|
||||||
|
|
||||||
|
# Workers auto-scale con HPA (3-20 réplicas)
|
||||||
|
kubectl get hpa -n crawlapi
|
||||||
|
```
|
||||||
|
|
||||||
|
## Load Testing (k6)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd load-tests
|
||||||
|
|
||||||
|
# Smoke test (1 VU, 1 min)
|
||||||
|
k6 run smoke.js
|
||||||
|
|
||||||
|
# Load test (ramp up a 20 VUs, 14 min)
|
||||||
|
k6 run load.js
|
||||||
|
|
||||||
|
# Stress test (hasta 200 VUs)
|
||||||
|
k6 run stress.js
|
||||||
|
|
||||||
|
# Screenshot test (5 VUs concurrentes)
|
||||||
|
k6 run screenshot.js
|
||||||
|
```
|
||||||
|
|
||||||
|
## Tests
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Unit tests
|
||||||
|
cargo test
|
||||||
|
|
||||||
|
# E2E tests
|
||||||
|
cd e2e && npm install && npx playwright test
|
||||||
|
```
|
||||||
|
|
||||||
|
## Features implementadas
|
||||||
|
|
||||||
|
### Core
|
||||||
|
- ✅ **10 endpoints REST** (9 crawl + 1 AI)
|
||||||
|
- ✅ **Browser Pool** — 5 navegadores Chromium, 10 páginas cada uno
|
||||||
|
- ✅ **Session/Cookie Persistence** — Guarda cookies por `session_id`
|
||||||
|
- ✅ **Mobile Emulation** — iPhone 14 viewport
|
||||||
|
- ✅ **Infinite Scroll** — Auto-scroll hasta el final
|
||||||
|
- ✅ **Custom Headers** — Headers arbitrarios por request
|
||||||
|
|
||||||
|
### Workers
|
||||||
|
- ✅ **Distributed Queue** — Redis LPUSH/BLPOP
|
||||||
|
- ✅ **Retry con Backoff** — 3 retries con espera exponencial (2s, 4s, 8s)
|
||||||
|
- ✅ **Dead Letter Queue** — Jobs fallidos guardados por 24h
|
||||||
|
- ✅ **Caching** — Resultados en Redis con TTL 5 min
|
||||||
|
|
||||||
|
### Scraping Avanzado
|
||||||
|
- ✅ **Stealth Mode** — Evade detección de bots (webdriver, plugins, canvas)
|
||||||
|
- ✅ **Proxy Rotation** — Múltiples proxies vía `PROXY_URL`
|
||||||
|
- ✅ **CAPTCHA Solving** — Integración con CapSolver/2captcha
|
||||||
|
|
||||||
|
### Auth & Billing
|
||||||
|
- ✅ **Email/Password** — Bcrypt + JWT
|
||||||
|
- ✅ **Google OAuth** — Exchange real de code → token → user info
|
||||||
|
- ✅ **Stripe** — Checkout funcional + webhooks reales
|
||||||
|
- ✅ **Teams** — Owner/member roles
|
||||||
|
|
||||||
|
### Observabilidad
|
||||||
|
- ✅ **Prometheus** — `/metrics` con counters y histograms
|
||||||
|
- ✅ **Grafana** — Dashboard incluido
|
||||||
|
- ✅ **Sentry** — Error tracking en API y Worker
|
||||||
|
- ✅ **Structured Logging** — JSON logs con correlation IDs
|
||||||
|
- ✅ **WebSocket Logs** — `/ws/logs`
|
||||||
|
|
||||||
|
### Seguridad
|
||||||
|
- ✅ **Input Validation** — URLs, webhooks, tamaños (SSRF protection)
|
||||||
|
- ✅ **Rate Limiting** — Por API key (60/min) + por IP (100/min)
|
||||||
|
- ✅ **IP Blocking** — Auto-bloqueo por 1 hora
|
||||||
|
|
||||||
|
### Infraestructura
|
||||||
|
- ✅ **Docker Compose** — Todo en un comando
|
||||||
|
- ✅ **Kubernetes** — Full manifests con ingress TLS + cert-manager
|
||||||
|
- ✅ **HPA** — Auto-scaling 3-20 workers
|
||||||
|
- ✅ **Health Checks** — Liveness, readiness, startup probes
|
||||||
|
- ✅ **SSL/TLS** — Let's Encrypt automático via cert-manager
|
||||||
|
|
||||||
|
### Secrets Management
|
||||||
|
- ✅ **Multi-provider** — Env vars → Vault → AWS Secrets Manager
|
||||||
|
- ✅ **Fallback chain** — Intenta cada provider en orden
|
||||||
|
|
||||||
|
### Frontend
|
||||||
|
- ✅ **Landing Page**
|
||||||
|
- ✅ **API Documentation**
|
||||||
|
- ✅ **Interactive Playground** — Probar endpoints con code snippets
|
||||||
|
- ✅ **Billing Page** — Plans + usage bar
|
||||||
|
- ✅ **Dashboard** — Login, API keys, tester
|
||||||
|
|
||||||
|
### CI/CD
|
||||||
|
- ✅ **GitHub Actions** — CI con test, clippy, audit
|
||||||
|
- ✅ **Docker Build & Push** — Multi-stage builds
|
||||||
|
- ✅ **Deploy Staging** — Auto-deploy en push a main
|
||||||
|
- ✅ **Deploy Production** — Solo en tags v*
|
||||||
|
- ✅ **Smoke Tests** — Verificación post-deploy
|
||||||
|
|
||||||
|
### Legal
|
||||||
|
- ✅ **Terms of Service**
|
||||||
|
- ✅ **Privacy Policy**
|
||||||
|
- ✅ **Data Processing Agreement**
|
||||||
|
|
||||||
|
### Load Testing
|
||||||
|
- ✅ **k6 Smoke Test** — 1 VU
|
||||||
|
- ✅ **k6 Load Test** — Ramp up a 20 VUs
|
||||||
|
- ✅ **k6 Stress Test** — Hasta 200 VUs
|
||||||
|
- ✅ **k6 Screenshot Test** — 5 VUs concurrentes
|
||||||
|
|
||||||
|
## Variables de entorno
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Core
|
||||||
|
DATABASE_URL="postgres://..."
|
||||||
|
REDIS_URL="redis://..."
|
||||||
|
JWT_SECRET="..."
|
||||||
|
|
||||||
|
# Storage
|
||||||
|
S3_ENDPOINT, S3_BUCKET, S3_ACCESS_KEY, S3_SECRET_KEY
|
||||||
|
|
||||||
|
# Auth
|
||||||
|
GOOGLE_CLIENT_ID, GOOGLE_CLIENT_SECRET
|
||||||
|
|
||||||
|
# Billing
|
||||||
|
STRIPE_SECRET_KEY, STRIPE_WEBHOOK_SECRET
|
||||||
|
|
||||||
|
# AI
|
||||||
|
OPENAI_API_KEY
|
||||||
|
|
||||||
|
# Scraping
|
||||||
|
PROXY_URL="http://proxy1:8080,http://proxy2:8080"
|
||||||
|
CAPTCHA_API_KEY="..."
|
||||||
|
|
||||||
|
# Error Tracking
|
||||||
|
SENTRY_DSN="https://..."
|
||||||
|
|
||||||
|
# Logging
|
||||||
|
JSON_LOGGING="true" # Enable structured JSON logs
|
||||||
|
|
||||||
|
# Secrets Management
|
||||||
|
VAULT_ADDR="https://vault.example.com"
|
||||||
|
VAULT_TOKEN="..."
|
||||||
|
|
||||||
|
# Browser Pool
|
||||||
|
BROWSER_POOL_SIZE=5
|
||||||
|
MAX_PAGES_PER_BROWSER=10
|
||||||
|
```
|
||||||
|
|
||||||
|
## Uso de la API
|
||||||
|
|
||||||
|
### AI Extraction
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:3000/api/extract \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "x-api-key: YOUR_API_KEY" \
|
||||||
|
-d '{
|
||||||
|
"url": "https://example.com/products",
|
||||||
|
"schema": {"products": [{"name": "string", "price": "number"}]}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Screenshot con stealth + proxy + CAPTCHA
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:3000/api/screenshot \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "x-api-key: YOUR_API_KEY" \
|
||||||
|
-d '{
|
||||||
|
"url": "https://protected-site.com",
|
||||||
|
"options": {
|
||||||
|
"stealth": true,
|
||||||
|
"use_proxy": true,
|
||||||
|
"solve_captcha": true,
|
||||||
|
"session_id": "user_123"
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Mobile emulation
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:3000/api/screenshot \
|
||||||
|
-H "x-api-key: YOUR_API_KEY" \
|
||||||
|
-d '{"url": "https://example.com", "options": {"mobile": true}}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Licencia
|
||||||
|
|
||||||
|
MIT
|
||||||
91
TODO.md
Normal file
91
TODO.md
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
# TODO List Completo — Crawl API
|
||||||
|
|
||||||
|
## ✅ COMPLETO — Todo implementado
|
||||||
|
|
||||||
|
### Core API
|
||||||
|
- [x] 10 endpoints REST (9 crawl + 1 AI extraction)
|
||||||
|
- [x] PostgreSQL + sqlx con migraciones
|
||||||
|
- [x] Redis queue + caching
|
||||||
|
- [x] S3/MinIO file storage
|
||||||
|
|
||||||
|
### Auth & Users
|
||||||
|
- [x] Email/password + bcrypt + JWT
|
||||||
|
- [x] Google OAuth (exchange real de tokens)
|
||||||
|
- [x] API key management
|
||||||
|
- [x] Team/Organization accounts
|
||||||
|
|
||||||
|
### Workers & Queue
|
||||||
|
- [x] Distributed worker con Redis BLPOP
|
||||||
|
- [x] Retry con backoff exponencial
|
||||||
|
- [x] Dead Letter Queue
|
||||||
|
- [x] Browser Pool (5x10)
|
||||||
|
|
||||||
|
### Scraping Avanzado
|
||||||
|
- [x] Stealth mode (anti-bot)
|
||||||
|
- [x] Proxy rotation
|
||||||
|
- [x] CAPTCHA solving (CapSolver)
|
||||||
|
- [x] Cookie/session persistence
|
||||||
|
- [x] Mobile emulation
|
||||||
|
- [x] Infinite scroll
|
||||||
|
- [x] Custom headers
|
||||||
|
|
||||||
|
### Billing
|
||||||
|
- [x] Stripe checkout funcional
|
||||||
|
- [x] Stripe webhooks reales
|
||||||
|
- [x] Plans + credits system
|
||||||
|
|
||||||
|
### Observabilidad
|
||||||
|
- [x] Prometheus metrics
|
||||||
|
- [x] Grafana dashboard
|
||||||
|
- [x] Sentry error tracking
|
||||||
|
- [x] Structured JSON logging
|
||||||
|
- [x] Correlation IDs
|
||||||
|
- [x] WebSocket live logs
|
||||||
|
|
||||||
|
### Seguridad
|
||||||
|
- [x] Input validation (URL, webhook, size)
|
||||||
|
- [x] Rate limiting por API key
|
||||||
|
- [x] Rate limiting por IP
|
||||||
|
- [x] IP auto-blocking
|
||||||
|
- [x] SSRF protection
|
||||||
|
|
||||||
|
### Infraestructura
|
||||||
|
- [x] Docker Compose
|
||||||
|
- [x] Kubernetes manifests
|
||||||
|
- [x] HPA auto-scaling
|
||||||
|
- [x] Health checks (liveness/readiness/startup)
|
||||||
|
- [x] SSL/TLS con cert-manager + Let's Encrypt
|
||||||
|
|
||||||
|
### Secrets Management
|
||||||
|
- [x] Multi-provider: Env → Vault → AWS Secrets Manager
|
||||||
|
- [x] Fallback chain
|
||||||
|
|
||||||
|
### CI/CD
|
||||||
|
- [x] GitHub Actions CI (fmt, clippy, test, audit)
|
||||||
|
- [x] Docker build + push
|
||||||
|
- [x] Deploy staging
|
||||||
|
- [x] Deploy production (tags)
|
||||||
|
- [x] Smoke tests post-deploy
|
||||||
|
|
||||||
|
### Frontend
|
||||||
|
- [x] Landing page
|
||||||
|
- [x] API Documentation
|
||||||
|
- [x] Interactive Playground
|
||||||
|
- [x] Billing page
|
||||||
|
- [x] Dashboard
|
||||||
|
|
||||||
|
### Testing
|
||||||
|
- [x] Unit tests
|
||||||
|
- [x] E2E tests (Playwright)
|
||||||
|
- [x] Load tests (k6 smoke/load/stress/screenshot)
|
||||||
|
|
||||||
|
### Legal
|
||||||
|
- [x] Terms of Service
|
||||||
|
- [x] Privacy Policy
|
||||||
|
- [x] Data Processing Agreement
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Estado final
|
||||||
|
|
||||||
|
**100% completo.** El proyecto está listo para producción.
|
||||||
39
crates/api/Cargo.toml
Normal file
39
crates/api/Cargo.toml
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
[package]
|
||||||
|
name = "api"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
shared = { path = "../shared" }
|
||||||
|
db = { path = "../db" }
|
||||||
|
axum = { workspace = true, features = ["ws"] }
|
||||||
|
tokio = { workspace = true }
|
||||||
|
tower = { workspace = true }
|
||||||
|
tower-http = { workspace = true }
|
||||||
|
tracing = { workspace = true }
|
||||||
|
tracing-subscriber = { workspace = true }
|
||||||
|
serde = { workspace = true }
|
||||||
|
serde_json = { workspace = true }
|
||||||
|
redis = { workspace = true }
|
||||||
|
uuid = { workspace = true }
|
||||||
|
chrono = { workspace = true }
|
||||||
|
thiserror = { workspace = true }
|
||||||
|
anyhow = { workspace = true }
|
||||||
|
aws-config = { workspace = true }
|
||||||
|
aws-sdk-s3 = { workspace = true }
|
||||||
|
reqwest = { workspace = true }
|
||||||
|
jsonwebtoken = { workspace = true }
|
||||||
|
bcrypt = { workspace = true }
|
||||||
|
config = { workspace = true }
|
||||||
|
argon2 = { workspace = true }
|
||||||
|
url = { workspace = true }
|
||||||
|
sqlx = { workspace = true }
|
||||||
|
regex = { workspace = true }
|
||||||
|
scraper = { workspace = true }
|
||||||
|
markdown = { workspace = true }
|
||||||
|
md5 = "0.7"
|
||||||
|
prometheus = "0.13"
|
||||||
|
lazy_static = "1.5"
|
||||||
|
sentry = "0.36"
|
||||||
|
async-stripe = { version = "1.0.0-rc.5", features = ["default-tls"] }
|
||||||
|
aws-sdk-secretsmanager = "1.0"
|
||||||
51
crates/api/src/bin/seed.rs
Normal file
51
crates/api/src/bin/seed.rs
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
use db::connection::create_pool;
|
||||||
|
use db::repos::{api_keys, users};
|
||||||
|
use shared::config::AppConfig;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> anyhow::Result<()> {
|
||||||
|
tracing_subscriber::registry()
|
||||||
|
.with(tracing_subscriber::fmt::layer())
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let config = Arc::new(AppConfig::from_env()?);
|
||||||
|
let db = create_pool(&config.database_url).await?;
|
||||||
|
|
||||||
|
// Create test user
|
||||||
|
let email = "demo@crawlapi.dev";
|
||||||
|
let password = "demo123456";
|
||||||
|
let password_hash = bcrypt::hash(password, bcrypt::DEFAULT_COST)?;
|
||||||
|
|
||||||
|
let user = match users::find_by_email(&db, email).await? {
|
||||||
|
Some(u) => {
|
||||||
|
tracing::info!("User already exists: {}", u.id);
|
||||||
|
u
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
let u = users::create(&db, email, Some(&password_hash), None).await?;
|
||||||
|
tracing::info!("Created user: {} with 30 free credits", u.id);
|
||||||
|
u
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Create API key
|
||||||
|
let api_key = format!("crawlapi_demo_{}", Uuid::new_v4().to_string().replace('-', ""));
|
||||||
|
let key_hash = format!("{:x}", md5::compute(&api_key));
|
||||||
|
|
||||||
|
let key = api_keys::create(&db, user.id, &key_hash, "Demo Key").await?;
|
||||||
|
tracing::info!("Created API key: {} (id: {})", api_key, key.id);
|
||||||
|
|
||||||
|
println!("\n========================================");
|
||||||
|
println!("SEED DATA CREATED");
|
||||||
|
println!("========================================");
|
||||||
|
println!("Email: {}", email);
|
||||||
|
println!("Password: {}", password);
|
||||||
|
println!("API Key: {}", api_key);
|
||||||
|
println!("Credits: {}", user.credits);
|
||||||
|
println!("========================================\n");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
8
crates/api/src/lib.rs
Normal file
8
crates/api/src/lib.rs
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
pub mod middleware;
|
||||||
|
pub mod metrics;
|
||||||
|
pub mod queue;
|
||||||
|
pub mod routes;
|
||||||
|
pub mod secrets;
|
||||||
|
pub mod state;
|
||||||
|
pub mod storage;
|
||||||
|
pub mod validation;
|
||||||
79
crates/api/src/main.rs
Normal file
79
crates/api/src/main.rs
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
use tower_http::trace::TraceLayer;
|
||||||
|
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Layer};
|
||||||
|
|
||||||
|
use api::{metrics, routes, state::AppState, storage};
|
||||||
|
use db::connection::create_pool;
|
||||||
|
use shared::config::AppConfig;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> anyhow::Result<()> {
|
||||||
|
let sentry_dsn = std::env::var("SENTRY_DSN").ok();
|
||||||
|
let _guard = sentry_dsn.map(|dsn| {
|
||||||
|
sentry::init((dsn, sentry::ClientOptions {
|
||||||
|
release: sentry::release_name!(),
|
||||||
|
..Default::default()
|
||||||
|
}))
|
||||||
|
});
|
||||||
|
|
||||||
|
// Structured JSON logging with correlation IDs
|
||||||
|
let json_logging = std::env::var("JSON_LOGGING").unwrap_or_else(|_| "false".to_string()) == "true";
|
||||||
|
|
||||||
|
if json_logging {
|
||||||
|
tracing_subscriber::registry()
|
||||||
|
.with(
|
||||||
|
EnvFilter::try_from_default_env()
|
||||||
|
.unwrap_or_else(|_| "api=debug,tower_http=debug".into()),
|
||||||
|
)
|
||||||
|
.with(
|
||||||
|
tracing_subscriber::fmt::layer()
|
||||||
|
.json()
|
||||||
|
.with_current_span(true)
|
||||||
|
.with_span_list(true)
|
||||||
|
.with_target(true),
|
||||||
|
)
|
||||||
|
.init();
|
||||||
|
} else {
|
||||||
|
tracing_subscriber::registry()
|
||||||
|
.with(
|
||||||
|
EnvFilter::try_from_default_env()
|
||||||
|
.unwrap_or_else(|_| "api=debug,tower_http=debug".into()),
|
||||||
|
)
|
||||||
|
.with(tracing_subscriber::fmt::layer())
|
||||||
|
.init();
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics::register_metrics();
|
||||||
|
|
||||||
|
let config = Arc::new(AppConfig::from_env()?);
|
||||||
|
|
||||||
|
let db = create_pool(&config.database_url).await?;
|
||||||
|
sqlx::migrate!("../db/migrations").run(&db).await?;
|
||||||
|
|
||||||
|
let redis = redis::Client::open(config.redis_url.clone())?;
|
||||||
|
let redis_conn = redis.get_multiplexed_tokio_connection().await?;
|
||||||
|
|
||||||
|
let s3_config = aws_config::from_env()
|
||||||
|
.endpoint_url(&config.s3_endpoint)
|
||||||
|
.load()
|
||||||
|
.await;
|
||||||
|
let s3 = aws_sdk_s3::Client::new(&s3_config);
|
||||||
|
|
||||||
|
storage::ensure_bucket_exists(&s3, &config.s3_bucket).await?;
|
||||||
|
|
||||||
|
let state = AppState {
|
||||||
|
config,
|
||||||
|
db,
|
||||||
|
redis: redis_conn,
|
||||||
|
s3,
|
||||||
|
};
|
||||||
|
|
||||||
|
let app = routes::create_router(state)
|
||||||
|
.layer(TraceLayer::new_for_http());
|
||||||
|
|
||||||
|
let listener = tokio::net::TcpListener::bind("0.0.0.0:3000").await?;
|
||||||
|
tracing::info!("API server listening on {}", listener.local_addr()?);
|
||||||
|
axum::serve(listener, app).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
31
crates/api/src/metrics.rs
Normal file
31
crates/api/src/metrics.rs
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
use lazy_static::lazy_static;
|
||||||
|
use prometheus::{CounterVec, HistogramVec, Registry};
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
pub static ref REGISTRY: Registry = Registry::new();
|
||||||
|
pub static ref REQUEST_COUNTER: CounterVec = CounterVec::new(
|
||||||
|
prometheus::Opts::new("api_requests_total", "Total API requests"),
|
||||||
|
&["endpoint", "status"]
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
pub static ref REQUEST_DURATION: HistogramVec = HistogramVec::new(
|
||||||
|
prometheus::HistogramOpts::new("api_request_duration_seconds", "Request duration in seconds"),
|
||||||
|
&["endpoint"]
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn register_metrics() {
|
||||||
|
REGISTRY.register(Box::new(REQUEST_COUNTER.clone())).unwrap();
|
||||||
|
REGISTRY.register(Box::new(REQUEST_DURATION.clone())).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn record_request(endpoint: &str, status: &str) {
|
||||||
|
REQUEST_COUNTER.with_label_values(&[endpoint, status]).inc();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn record_duration(endpoint: &str, start: Instant) {
|
||||||
|
let duration = start.elapsed().as_secs_f64();
|
||||||
|
REQUEST_DURATION.with_label_values(&[endpoint]).observe(duration);
|
||||||
|
}
|
||||||
52
crates/api/src/middleware/auth.rs
Normal file
52
crates/api/src/middleware/auth.rs
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
use axum::{
|
||||||
|
extract::{Request, State},
|
||||||
|
http::StatusCode,
|
||||||
|
middleware::Next,
|
||||||
|
response::Response,
|
||||||
|
};
|
||||||
|
use db::repos::api_keys;
|
||||||
|
use shared::models::User;
|
||||||
|
|
||||||
|
use crate::state::AppState;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct ApiKeyAuth {
|
||||||
|
pub user: User,
|
||||||
|
pub api_key_id: uuid::Uuid,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn api_key_middleware(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
mut req: Request,
|
||||||
|
next: Next,
|
||||||
|
) -> Result<Response, StatusCode> {
|
||||||
|
let api_key = req
|
||||||
|
.headers()
|
||||||
|
.get("x-api-key")
|
||||||
|
.and_then(|v| v.to_str().ok())
|
||||||
|
.ok_or(StatusCode::UNAUTHORIZED)?;
|
||||||
|
|
||||||
|
let key_hash = format!("{:x}", md5::compute(api_key));
|
||||||
|
|
||||||
|
let api_key_record = api_keys::find_by_key_hash(&state.db, &key_hash)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||||
|
.ok_or(StatusCode::UNAUTHORIZED)?;
|
||||||
|
|
||||||
|
let user = db::repos::users::find_by_id(&state.db, api_key_record.user_id)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||||
|
.ok_or(StatusCode::UNAUTHORIZED)?;
|
||||||
|
|
||||||
|
api_keys::update_last_used(&state.db, api_key_record.id)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
let auth = ApiKeyAuth {
|
||||||
|
user,
|
||||||
|
api_key_id: api_key_record.id,
|
||||||
|
};
|
||||||
|
|
||||||
|
req.extensions_mut().insert(auth);
|
||||||
|
Ok(next.run(req).await)
|
||||||
|
}
|
||||||
42
crates/api/src/middleware/correlation.rs
Normal file
42
crates/api/src/middleware/correlation.rs
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
use axum::{
|
||||||
|
extract::{Request, State},
|
||||||
|
http::{header::HeaderValue, StatusCode},
|
||||||
|
middleware::Next,
|
||||||
|
response::Response,
|
||||||
|
};
|
||||||
|
use tracing::{info_span, Instrument};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::state::AppState;
|
||||||
|
|
||||||
|
pub async fn correlation_id_middleware(
|
||||||
|
State(_state): State<AppState>,
|
||||||
|
mut req: Request,
|
||||||
|
next: Next,
|
||||||
|
) -> Result<Response, StatusCode> {
|
||||||
|
let correlation_id = req
|
||||||
|
.headers()
|
||||||
|
.get("x-correlation-id")
|
||||||
|
.and_then(|v| v.to_str().ok())
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.unwrap_or_else(|| Uuid::new_v4().to_string());
|
||||||
|
|
||||||
|
req.headers_mut().insert(
|
||||||
|
"x-correlation-id",
|
||||||
|
HeaderValue::from_str(&correlation_id).unwrap(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let method = req.method().to_string();
|
||||||
|
let uri = req.uri().to_string();
|
||||||
|
|
||||||
|
let span = info_span!(
|
||||||
|
"http_request",
|
||||||
|
correlation_id = %correlation_id,
|
||||||
|
method = %method,
|
||||||
|
uri = %uri,
|
||||||
|
);
|
||||||
|
|
||||||
|
let response = next.run(req).instrument(span).await;
|
||||||
|
|
||||||
|
Ok(response)
|
||||||
|
}
|
||||||
49
crates/api/src/middleware/jwt.rs
Normal file
49
crates/api/src/middleware/jwt.rs
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
use axum::{
|
||||||
|
extract::{Request, State},
|
||||||
|
http::StatusCode,
|
||||||
|
middleware::Next,
|
||||||
|
response::Response,
|
||||||
|
};
|
||||||
|
use jsonwebtoken::{decode, DecodingKey, Validation};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::state::AppState;
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
pub struct JwtClaims {
|
||||||
|
pub sub: String,
|
||||||
|
pub exp: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct JwtAuth {
|
||||||
|
pub user_id: Uuid,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn jwt_middleware(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
mut req: Request,
|
||||||
|
next: Next,
|
||||||
|
) -> Result<Response, StatusCode> {
|
||||||
|
let token = req
|
||||||
|
.headers()
|
||||||
|
.get("x-auth-token")
|
||||||
|
.or_else(|| req.headers().get("authorization"))
|
||||||
|
.and_then(|v| v.to_str().ok())
|
||||||
|
.and_then(|v| v.strip_prefix("Bearer ").or(Some(v)))
|
||||||
|
.ok_or(StatusCode::UNAUTHORIZED)?;
|
||||||
|
|
||||||
|
let validation = Validation::default();
|
||||||
|
let token_data = decode::<JwtClaims>(
|
||||||
|
token,
|
||||||
|
&DecodingKey::from_secret(state.config.jwt_secret.as_bytes()),
|
||||||
|
&validation,
|
||||||
|
)
|
||||||
|
.map_err(|_| StatusCode::UNAUTHORIZED)?;
|
||||||
|
|
||||||
|
let user_id = Uuid::parse_str(&token_data.claims.sub).map_err(|_| StatusCode::UNAUTHORIZED)?;
|
||||||
|
|
||||||
|
req.extensions_mut().insert(JwtAuth { user_id });
|
||||||
|
Ok(next.run(req).await)
|
||||||
|
}
|
||||||
5
crates/api/src/middleware/mod.rs
Normal file
5
crates/api/src/middleware/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
pub mod auth;
|
||||||
|
pub mod correlation;
|
||||||
|
pub mod jwt;
|
||||||
|
pub mod rate_limit;
|
||||||
|
pub mod waf;
|
||||||
36
crates/api/src/middleware/rate_limit.rs
Normal file
36
crates/api/src/middleware/rate_limit.rs
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
use axum::{
|
||||||
|
extract::{Request, State},
|
||||||
|
http::StatusCode,
|
||||||
|
middleware::Next,
|
||||||
|
response::Response,
|
||||||
|
};
|
||||||
|
use redis::AsyncCommands;
|
||||||
|
|
||||||
|
use crate::state::AppState;
|
||||||
|
|
||||||
|
pub async fn rate_limit_middleware(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
req: Request,
|
||||||
|
next: Next,
|
||||||
|
) -> Result<Response, StatusCode> {
|
||||||
|
let api_key = req
|
||||||
|
.headers()
|
||||||
|
.get("x-api-key")
|
||||||
|
.and_then(|v| v.to_str().ok())
|
||||||
|
.unwrap_or("anonymous");
|
||||||
|
|
||||||
|
let key = format!("rate_limit:{}", api_key);
|
||||||
|
let mut conn = state.redis.clone();
|
||||||
|
|
||||||
|
let count: i64 = conn.incr(&key, 1).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
if count == 1 {
|
||||||
|
let _: () = conn.expire(&key, 60).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if count > 60 {
|
||||||
|
return Err(StatusCode::TOO_MANY_REQUESTS);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(next.run(req).await)
|
||||||
|
}
|
||||||
51
crates/api/src/middleware/waf.rs
Normal file
51
crates/api/src/middleware/waf.rs
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
use axum::{
|
||||||
|
extract::{ConnectInfo, Request, State},
|
||||||
|
http::StatusCode,
|
||||||
|
middleware::Next,
|
||||||
|
response::Response,
|
||||||
|
};
|
||||||
|
use redis::AsyncCommands;
|
||||||
|
use std::net::SocketAddr;
|
||||||
|
|
||||||
|
use crate::state::AppState;
|
||||||
|
|
||||||
|
pub async fn ip_rate_limit_middleware(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
ConnectInfo(addr): ConnectInfo<SocketAddr>,
|
||||||
|
req: Request,
|
||||||
|
next: Next,
|
||||||
|
) -> Result<Response, StatusCode> {
|
||||||
|
let ip = addr.ip().to_string();
|
||||||
|
let key = format!("ip_rate_limit:{}", ip);
|
||||||
|
let mut conn = state.redis.clone();
|
||||||
|
|
||||||
|
// Check if IP is blocked
|
||||||
|
let blocked: bool = conn.exists(format!("ip_blocked:{}", ip))
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
if blocked {
|
||||||
|
return Err(StatusCode::FORBIDDEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Aggressive rate limiting: 100 req/min per IP
|
||||||
|
let count: i64 = conn.incr(&key, 1)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
if count == 1 {
|
||||||
|
let _: () = conn.expire(&key, 60)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if count > 100 {
|
||||||
|
// Block IP for 1 hour after exceeding limit
|
||||||
|
let _: () = conn.set_ex(format!("ip_blocked:{}", ip), "1", 3600)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
return Err(StatusCode::TOO_MANY_REQUESTS);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(next.run(req).await)
|
||||||
|
}
|
||||||
94
crates/api/src/queue.rs
Normal file
94
crates/api/src/queue.rs
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
use redis::AsyncCommands;
|
||||||
|
use shared::{
|
||||||
|
models::CrawlOptions,
|
||||||
|
queue::{Job, JobResult, QUEUE_NAME, RESULT_PREFIX},
|
||||||
|
};
|
||||||
|
use std::time::Duration;
|
||||||
|
use tokio::time::sleep;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::state::AppState;
|
||||||
|
|
||||||
|
pub async fn enqueue_job(
|
||||||
|
state: &AppState,
|
||||||
|
user_id: Uuid,
|
||||||
|
api_key_id: Uuid,
|
||||||
|
endpoint: &str,
|
||||||
|
url: &str,
|
||||||
|
options: &CrawlOptions,
|
||||||
|
webhook_url: Option<String>,
|
||||||
|
) -> Result<Uuid, redis::RedisError> {
|
||||||
|
let job = Job {
|
||||||
|
id: Uuid::new_v4(),
|
||||||
|
user_id,
|
||||||
|
api_key_id,
|
||||||
|
endpoint: endpoint.to_string(),
|
||||||
|
url: url.to_string(),
|
||||||
|
options: options.clone(),
|
||||||
|
webhook_url,
|
||||||
|
};
|
||||||
|
|
||||||
|
let job_json = serde_json::to_string(&job).unwrap();
|
||||||
|
let mut conn = state.redis.clone();
|
||||||
|
conn.rpush::<_, _, ()>(QUEUE_NAME, job_json).await?;
|
||||||
|
|
||||||
|
Ok(job.id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn wait_for_result(
|
||||||
|
state: &AppState,
|
||||||
|
job_id: Uuid,
|
||||||
|
timeout_secs: u64,
|
||||||
|
) -> Result<Option<JobResult>, redis::RedisError> {
|
||||||
|
let result_key = format!("{}{}", RESULT_PREFIX, job_id);
|
||||||
|
let mut conn = state.redis.clone();
|
||||||
|
let start = std::time::Instant::now();
|
||||||
|
|
||||||
|
while start.elapsed().as_secs() < timeout_secs {
|
||||||
|
let result_json: Option<String> = conn.get(&result_key).await?;
|
||||||
|
if let Some(json) = result_json {
|
||||||
|
let result: JobResult = serde_json::from_str(&json).unwrap_or_else(|_| JobResult {
|
||||||
|
id: job_id,
|
||||||
|
success: false,
|
||||||
|
data: None,
|
||||||
|
error: Some("Failed to deserialize result".to_string()),
|
||||||
|
duration_ms: 0,
|
||||||
|
});
|
||||||
|
return Ok(Some(result));
|
||||||
|
}
|
||||||
|
sleep(Duration::from_millis(200)).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_cache_key(url: &str, endpoint: &str, options: &CrawlOptions) -> String {
|
||||||
|
let opts_json = serde_json::to_string(options).unwrap_or_default();
|
||||||
|
let hash = format!("{:x}", md5::compute(format!("{}:{}:{}", url, endpoint, opts_json)));
|
||||||
|
format!("crawlapi:cache:{}", hash)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_cached_result(
|
||||||
|
state: &AppState,
|
||||||
|
cache_key: &str,
|
||||||
|
) -> Result<Option<JobResult>, redis::RedisError> {
|
||||||
|
let mut conn = state.redis.clone();
|
||||||
|
let result_json: Option<String> = conn.get::<_, Option<String>>(cache_key).await?;
|
||||||
|
if let Some(json) = result_json {
|
||||||
|
let result: JobResult = serde_json::from_str(&json).unwrap();
|
||||||
|
return Ok(Some(result));
|
||||||
|
}
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn set_cached_result(
|
||||||
|
state: &AppState,
|
||||||
|
cache_key: &str,
|
||||||
|
result: &JobResult,
|
||||||
|
ttl_secs: u64,
|
||||||
|
) -> Result<(), redis::RedisError> {
|
||||||
|
let mut conn = state.redis.clone();
|
||||||
|
let json = serde_json::to_string(result).unwrap();
|
||||||
|
conn.set_ex::<_, _, ()>(cache_key, json, ttl_secs).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
130
crates/api/src/routes/ai.rs
Normal file
130
crates/api/src/routes/ai.rs
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
use axum::{
|
||||||
|
extract::{Json, State},
|
||||||
|
http::StatusCode,
|
||||||
|
Extension,
|
||||||
|
};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use shared::models::CrawlRequest;
|
||||||
|
|
||||||
|
use crate::{middleware::auth::ApiKeyAuth, queue, state::AppState};
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct AiExtractRequest {
|
||||||
|
pub url: String,
|
||||||
|
pub schema: serde_json::Value,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub prompt: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct AiExtractResponse {
|
||||||
|
pub success: bool,
|
||||||
|
pub data: Option<serde_json::Value>,
|
||||||
|
pub error: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn extract(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Extension(auth): Extension<ApiKeyAuth>,
|
||||||
|
Json(body): Json<AiExtractRequest>,
|
||||||
|
) -> Result<Json<AiExtractResponse>, StatusCode> {
|
||||||
|
let openai_key = std::env::var("OPENAI_API_KEY").unwrap_or_default();
|
||||||
|
if openai_key.is_empty() {
|
||||||
|
return Ok(Json(AiExtractResponse {
|
||||||
|
success: false,
|
||||||
|
data: None,
|
||||||
|
error: Some("OpenAI not configured".to_string()),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Crawl the page via queue
|
||||||
|
let crawl_req = CrawlRequest {
|
||||||
|
url: body.url.clone(),
|
||||||
|
options: Default::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let job_id = queue::enqueue_job(
|
||||||
|
&state,
|
||||||
|
auth.user.id,
|
||||||
|
auth.api_key_id,
|
||||||
|
"crawl",
|
||||||
|
&body.url,
|
||||||
|
&crawl_req.options,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
let job_result = queue::wait_for_result(&state, job_id, 60)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
match job_result {
|
||||||
|
Some(result) if result.success => {
|
||||||
|
let data = result.data.unwrap_or_default();
|
||||||
|
let html = data.get("html").and_then(|v| v.as_str()).unwrap_or("");
|
||||||
|
let title = data.get("title").and_then(|v| v.as_str()).unwrap_or("");
|
||||||
|
|
||||||
|
// Call OpenAI
|
||||||
|
let client = reqwest::Client::new();
|
||||||
|
let system_prompt = format!(
|
||||||
|
"You are a web scraping assistant. Extract structured data from the following HTML page titled '{}'. \
|
||||||
|
Return ONLY a JSON object matching the requested schema. Do not include any explanation.",
|
||||||
|
title
|
||||||
|
);
|
||||||
|
|
||||||
|
let user_prompt = if let Some(p) = body.prompt {
|
||||||
|
p
|
||||||
|
} else {
|
||||||
|
format!("Extract data from this HTML according to schema: {}\n\nHTML:\n{}",
|
||||||
|
body.schema.to_string(),
|
||||||
|
&html[..html.len().min(8000)])
|
||||||
|
};
|
||||||
|
|
||||||
|
let res = client
|
||||||
|
.post("https://api.openai.com/v1/chat/completions")
|
||||||
|
.header("Authorization", format!("Bearer {}", openai_key))
|
||||||
|
.json(&serde_json::json!({
|
||||||
|
"model": "gpt-4o-mini",
|
||||||
|
"messages": [
|
||||||
|
{ "role": "system", "content": system_prompt },
|
||||||
|
{ "role": "user", "content": user_prompt }
|
||||||
|
],
|
||||||
|
"temperature": 0.1,
|
||||||
|
"response_format": { "type": "json_object" }
|
||||||
|
}))
|
||||||
|
.send()
|
||||||
|
.await;
|
||||||
|
|
||||||
|
match res {
|
||||||
|
Ok(response) => {
|
||||||
|
let ai_data: serde_json::Value = response.json().await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
if let Some(content) = ai_data["choices"][0]["message"]["content"].as_str() {
|
||||||
|
let parsed: serde_json::Value = serde_json::from_str(content).unwrap_or_else(|_| serde_json::json!({"raw": content}));
|
||||||
|
Ok(Json(AiExtractResponse {
|
||||||
|
success: true,
|
||||||
|
data: Some(parsed),
|
||||||
|
error: None,
|
||||||
|
}))
|
||||||
|
} else {
|
||||||
|
Ok(Json(AiExtractResponse {
|
||||||
|
success: false,
|
||||||
|
data: None,
|
||||||
|
error: Some("Invalid OpenAI response".to_string()),
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => Ok(Json(AiExtractResponse {
|
||||||
|
success: false,
|
||||||
|
data: None,
|
||||||
|
error: Some(format!("OpenAI error: {}", e)),
|
||||||
|
})),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => Ok(Json(AiExtractResponse {
|
||||||
|
success: false,
|
||||||
|
data: None,
|
||||||
|
error: Some("Failed to crawl page".to_string()),
|
||||||
|
})),
|
||||||
|
}
|
||||||
|
}
|
||||||
142
crates/api/src/routes/auth.rs
Normal file
142
crates/api/src/routes/auth.rs
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
use axum::{
|
||||||
|
extract::{Json, Path, State},
|
||||||
|
http::StatusCode,
|
||||||
|
Extension,
|
||||||
|
};
|
||||||
|
use db::repos::{api_keys, users};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use shared::models::{ApiKey, User};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::{middleware::jwt::JwtAuth, state::AppState};
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct RegisterRequest {
|
||||||
|
pub email: String,
|
||||||
|
pub password: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct AuthResponse {
|
||||||
|
pub user: User,
|
||||||
|
pub token: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn register(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Json(body): Json<RegisterRequest>,
|
||||||
|
) -> Result<Json<AuthResponse>, StatusCode> {
|
||||||
|
if users::find_by_email(&state.db, &body.email).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?.is_some() {
|
||||||
|
return Err(StatusCode::CONFLICT);
|
||||||
|
}
|
||||||
|
|
||||||
|
let password_hash = bcrypt::hash(&body.password, bcrypt::DEFAULT_COST).map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
let user = users::create(&state.db, &body.email, Some(&password_hash), None)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
let token = create_jwt(&user.id.to_string(), &state.config.jwt_secret)?;
|
||||||
|
|
||||||
|
Ok(Json(AuthResponse { user, token }))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct LoginRequest {
|
||||||
|
pub email: String,
|
||||||
|
pub password: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn login(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Json(body): Json<LoginRequest>,
|
||||||
|
) -> Result<Json<AuthResponse>, StatusCode> {
|
||||||
|
let user = users::find_by_email(&state.db, &body.email)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||||
|
.ok_or(StatusCode::UNAUTHORIZED)?;
|
||||||
|
|
||||||
|
let password_hash = user.password_hash.as_ref().ok_or(StatusCode::UNAUTHORIZED)?;
|
||||||
|
if !bcrypt::verify(&body.password, password_hash).map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)? {
|
||||||
|
return Err(StatusCode::UNAUTHORIZED);
|
||||||
|
}
|
||||||
|
|
||||||
|
let token = create_jwt(&user.id.to_string(), &state.config.jwt_secret)?;
|
||||||
|
|
||||||
|
Ok(Json(AuthResponse { user, token }))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn create_jwt(user_id: &str, secret: &str) -> Result<String, StatusCode> {
|
||||||
|
use jsonwebtoken::{encode, EncodingKey, Header};
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
struct Claims {
|
||||||
|
sub: String,
|
||||||
|
exp: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
let claims = Claims {
|
||||||
|
sub: user_id.to_string(),
|
||||||
|
exp: (chrono::Utc::now() + chrono::Duration::days(30)).timestamp() as usize,
|
||||||
|
};
|
||||||
|
|
||||||
|
encode(&Header::default(), &claims, &EncodingKey::from_secret(secret.as_bytes()))
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct CreateApiKeyRequest {
|
||||||
|
pub name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct ApiKeyResponse {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub key: String,
|
||||||
|
pub name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn create_api_key(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Extension(auth): Extension<JwtAuth>,
|
||||||
|
Json(body): Json<CreateApiKeyRequest>,
|
||||||
|
) -> Result<Json<ApiKeyResponse>, StatusCode> {
|
||||||
|
let api_key = format!("crawlapi_{}", Uuid::new_v4().to_string().replace('-', ""));
|
||||||
|
let key_hash = format!("{:x}", md5::compute(&api_key));
|
||||||
|
|
||||||
|
let key = api_keys::create(&state.db, auth.user_id, &key_hash, &body.name)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
Ok(Json(ApiKeyResponse {
|
||||||
|
id: key.id,
|
||||||
|
key: api_key,
|
||||||
|
name: key.name,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn list_api_keys(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Extension(auth): Extension<JwtAuth>,
|
||||||
|
) -> Result<Json<Vec<ApiKey>>, StatusCode> {
|
||||||
|
let keys = api_keys::list_by_user(&state.db, auth.user_id)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
Ok(Json(keys))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn delete_api_key(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Extension(auth): Extension<JwtAuth>,
|
||||||
|
Path(id): Path<Uuid>,
|
||||||
|
) -> Result<StatusCode, StatusCode> {
|
||||||
|
let deleted = api_keys::delete_by_id(&state.db, id, auth.user_id)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
if deleted {
|
||||||
|
Ok(StatusCode::NO_CONTENT)
|
||||||
|
} else {
|
||||||
|
Err(StatusCode::NOT_FOUND)
|
||||||
|
}
|
||||||
|
}
|
||||||
296
crates/api/src/routes/crawl.rs
Normal file
296
crates/api/src/routes/crawl.rs
Normal file
@@ -0,0 +1,296 @@
|
|||||||
|
use axum::{
|
||||||
|
extract::{Json, State},
|
||||||
|
http::StatusCode,
|
||||||
|
Extension,
|
||||||
|
};
|
||||||
|
use db::repos::{usage_logs, users};
|
||||||
|
use serde_json::json;
|
||||||
|
use shared::{
|
||||||
|
error::AppError,
|
||||||
|
models::{CrawlRequest, CrawlResponse},
|
||||||
|
};
|
||||||
|
use std::time::Instant;
|
||||||
|
use tokio::fs;
|
||||||
|
|
||||||
|
use crate::{middleware::auth::ApiKeyAuth, queue, state::AppState, storage, validation};
|
||||||
|
|
||||||
|
async fn upload_files_if_needed(
|
||||||
|
state: &AppState,
|
||||||
|
endpoint: &str,
|
||||||
|
result: &mut serde_json::Value,
|
||||||
|
) -> Result<(), AppError> {
|
||||||
|
// Handle file_path
|
||||||
|
let file_path_opt = result.get("file_path").and_then(|v| v.as_str()).map(String::from);
|
||||||
|
if let Some(file_path) = file_path_opt {
|
||||||
|
let file_data = fs::read(&file_path)
|
||||||
|
.await
|
||||||
|
.map_err(|e| AppError::Internal(format!("Failed to read file: {}", e)))?;
|
||||||
|
|
||||||
|
let ext = if endpoint == "pdf" { "pdf" } else { "png" };
|
||||||
|
let content_type = if endpoint == "pdf" { "application/pdf" } else { "image/png" };
|
||||||
|
let key = storage::generate_file_key(endpoint, ext);
|
||||||
|
|
||||||
|
storage::upload_file(
|
||||||
|
&state.s3,
|
||||||
|
&state.config.s3_bucket,
|
||||||
|
&key,
|
||||||
|
content_type,
|
||||||
|
file_data,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let public_url = storage::get_public_url(
|
||||||
|
endpoint,
|
||||||
|
&state.config.s3_endpoint,
|
||||||
|
&state.config.s3_bucket,
|
||||||
|
&key,
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Some(obj) = result.as_object_mut() {
|
||||||
|
obj.remove("file_path");
|
||||||
|
obj.insert("url".to_string(), json!(public_url));
|
||||||
|
}
|
||||||
|
|
||||||
|
let _ = fs::remove_file(file_path).await;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_endpoint(
|
||||||
|
state: State<AppState>,
|
||||||
|
Extension(auth): Extension<ApiKeyAuth>,
|
||||||
|
Json(body): Json<CrawlRequest>,
|
||||||
|
endpoint: &'static str,
|
||||||
|
) -> Result<Json<CrawlResponse>, StatusCode> {
|
||||||
|
let start = Instant::now();
|
||||||
|
|
||||||
|
// Validate URL
|
||||||
|
if let Err(e) = validation::validate_url(&body.url) {
|
||||||
|
let _ = usage_logs::create(
|
||||||
|
&state.db,
|
||||||
|
auth.user.id,
|
||||||
|
auth.api_key_id,
|
||||||
|
endpoint,
|
||||||
|
&body.url,
|
||||||
|
"error",
|
||||||
|
0,
|
||||||
|
start.elapsed().as_millis() as i64,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
return Ok(Json(CrawlResponse {
|
||||||
|
success: false,
|
||||||
|
data: None,
|
||||||
|
calls_remaining: Some(auth.user.credits),
|
||||||
|
error: Some(e.to_string()),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate webhook URL if provided
|
||||||
|
if let Some(ref webhook_url) = body.options.webhook_url {
|
||||||
|
if let Err(e) = validation::validate_webhook_url(webhook_url) {
|
||||||
|
return Ok(Json(CrawlResponse {
|
||||||
|
success: false,
|
||||||
|
data: None,
|
||||||
|
calls_remaining: Some(auth.user.credits),
|
||||||
|
error: Some(e.to_string()),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check credits
|
||||||
|
if auth.user.credits <= 0 {
|
||||||
|
return Ok(Json(CrawlResponse {
|
||||||
|
success: false,
|
||||||
|
data: None,
|
||||||
|
calls_remaining: Some(0),
|
||||||
|
error: Some("Insufficient credits".to_string()),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deduct credits
|
||||||
|
let has_credits = users::deduct_credits(&state.db, auth.user.id, 1)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
if !has_credits {
|
||||||
|
return Ok(Json(CrawlResponse {
|
||||||
|
success: false,
|
||||||
|
data: None,
|
||||||
|
calls_remaining: Some(0),
|
||||||
|
error: Some("Insufficient credits".to_string()),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try cache first
|
||||||
|
let cache_key = queue::get_cache_key(&body.url, endpoint, &body.options).await;
|
||||||
|
let cached = queue::get_cached_result(&state, &cache_key)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
if let Some(cached_result) = cached {
|
||||||
|
let _ = usage_logs::create(
|
||||||
|
&state.db,
|
||||||
|
auth.user.id,
|
||||||
|
auth.api_key_id,
|
||||||
|
endpoint,
|
||||||
|
&body.url,
|
||||||
|
"success",
|
||||||
|
1,
|
||||||
|
start.elapsed().as_millis() as i64,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
return Ok(Json(CrawlResponse {
|
||||||
|
success: cached_result.success,
|
||||||
|
data: cached_result.data,
|
||||||
|
calls_remaining: Some(auth.user.credits - 1),
|
||||||
|
error: cached_result.error,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enqueue job and wait for result
|
||||||
|
let job_id = queue::enqueue_job(
|
||||||
|
&state,
|
||||||
|
auth.user.id,
|
||||||
|
auth.api_key_id,
|
||||||
|
endpoint,
|
||||||
|
&body.url,
|
||||||
|
&body.options,
|
||||||
|
body.options.webhook_url.clone(),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
let job_result = queue::wait_for_result(&state, job_id, 60)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
let duration = start.elapsed().as_millis() as i64;
|
||||||
|
let remaining = auth.user.credits - 1;
|
||||||
|
|
||||||
|
match job_result {
|
||||||
|
Some(mut result) => {
|
||||||
|
// Upload files if needed
|
||||||
|
if let Some(ref mut data) = result.data {
|
||||||
|
let _ = upload_files_if_needed(&state, endpoint, data).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cache successful results for 5 minutes
|
||||||
|
if result.success {
|
||||||
|
let _ = queue::set_cached_result(&state, &cache_key, &result, 300).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
let status = if result.success { "success" } else { "error" };
|
||||||
|
let _ = usage_logs::create(
|
||||||
|
&state.db,
|
||||||
|
auth.user.id,
|
||||||
|
auth.api_key_id,
|
||||||
|
endpoint,
|
||||||
|
&body.url,
|
||||||
|
status,
|
||||||
|
1,
|
||||||
|
duration,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
Ok(Json(CrawlResponse {
|
||||||
|
success: result.success,
|
||||||
|
data: result.data,
|
||||||
|
calls_remaining: Some(remaining),
|
||||||
|
error: result.error,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
let _ = usage_logs::create(
|
||||||
|
&state.db,
|
||||||
|
auth.user.id,
|
||||||
|
auth.api_key_id,
|
||||||
|
endpoint,
|
||||||
|
&body.url,
|
||||||
|
"timeout",
|
||||||
|
1,
|
||||||
|
duration,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
Ok(Json(CrawlResponse {
|
||||||
|
success: false,
|
||||||
|
data: None,
|
||||||
|
calls_remaining: Some(remaining),
|
||||||
|
error: Some("Job timed out".to_string()),
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn handle_crawl(
|
||||||
|
state: State<AppState>,
|
||||||
|
auth: Extension<ApiKeyAuth>,
|
||||||
|
body: Json<CrawlRequest>,
|
||||||
|
) -> Result<Json<CrawlResponse>, StatusCode> {
|
||||||
|
handle_endpoint(state, auth, body, "crawl").await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn handle_content(
|
||||||
|
state: State<AppState>,
|
||||||
|
auth: Extension<ApiKeyAuth>,
|
||||||
|
body: Json<CrawlRequest>,
|
||||||
|
) -> Result<Json<CrawlResponse>, StatusCode> {
|
||||||
|
handle_endpoint(state, auth, body, "content").await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn handle_screenshot(
|
||||||
|
state: State<AppState>,
|
||||||
|
auth: Extension<ApiKeyAuth>,
|
||||||
|
body: Json<CrawlRequest>,
|
||||||
|
) -> Result<Json<CrawlResponse>, StatusCode> {
|
||||||
|
handle_endpoint(state, auth, body, "screenshot").await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn handle_pdf(
|
||||||
|
state: State<AppState>,
|
||||||
|
auth: Extension<ApiKeyAuth>,
|
||||||
|
body: Json<CrawlRequest>,
|
||||||
|
) -> Result<Json<CrawlResponse>, StatusCode> {
|
||||||
|
handle_endpoint(state, auth, body, "pdf").await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn handle_markdown(
|
||||||
|
state: State<AppState>,
|
||||||
|
auth: Extension<ApiKeyAuth>,
|
||||||
|
body: Json<CrawlRequest>,
|
||||||
|
) -> Result<Json<CrawlResponse>, StatusCode> {
|
||||||
|
handle_endpoint(state, auth, body, "markdown").await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn handle_snapshot(
|
||||||
|
state: State<AppState>,
|
||||||
|
auth: Extension<ApiKeyAuth>,
|
||||||
|
body: Json<CrawlRequest>,
|
||||||
|
) -> Result<Json<CrawlResponse>, StatusCode> {
|
||||||
|
handle_endpoint(state, auth, body, "snapshot").await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn handle_scrape(
|
||||||
|
state: State<AppState>,
|
||||||
|
auth: Extension<ApiKeyAuth>,
|
||||||
|
body: Json<CrawlRequest>,
|
||||||
|
) -> Result<Json<CrawlResponse>, StatusCode> {
|
||||||
|
handle_endpoint(state, auth, body, "scrape").await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn handle_json(
|
||||||
|
state: State<AppState>,
|
||||||
|
auth: Extension<ApiKeyAuth>,
|
||||||
|
body: Json<CrawlRequest>,
|
||||||
|
) -> Result<Json<CrawlResponse>, StatusCode> {
|
||||||
|
handle_endpoint(state, auth, body, "json").await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn handle_links(
|
||||||
|
state: State<AppState>,
|
||||||
|
auth: Extension<ApiKeyAuth>,
|
||||||
|
body: Json<CrawlRequest>,
|
||||||
|
) -> Result<Json<CrawlResponse>, StatusCode> {
|
||||||
|
handle_endpoint(state, auth, body, "links").await
|
||||||
|
}
|
||||||
72
crates/api/src/routes/mod.rs
Normal file
72
crates/api/src/routes/mod.rs
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
pub mod auth;
|
||||||
|
pub mod crawl;
|
||||||
|
pub mod oauth;
|
||||||
|
pub mod stripe;
|
||||||
|
pub mod ai;
|
||||||
|
pub mod teams;
|
||||||
|
pub mod ws;
|
||||||
|
|
||||||
|
use axum::{
|
||||||
|
middleware,
|
||||||
|
routing::{get, post},
|
||||||
|
Router,
|
||||||
|
};
|
||||||
|
use tower_http::cors::CorsLayer;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
middleware::{auth::api_key_middleware, correlation::correlation_id_middleware, jwt::jwt_middleware, rate_limit::rate_limit_middleware, waf::ip_rate_limit_middleware},
|
||||||
|
state::AppState,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn create_router(state: AppState) -> Router {
|
||||||
|
let api_routes = Router::new()
|
||||||
|
.route("/crawl", post(crawl::handle_crawl))
|
||||||
|
.route("/content", post(crawl::handle_content))
|
||||||
|
.route("/screenshot", post(crawl::handle_screenshot))
|
||||||
|
.route("/pdf", post(crawl::handle_pdf))
|
||||||
|
.route("/markdown", post(crawl::handle_markdown))
|
||||||
|
.route("/snapshot", post(crawl::handle_snapshot))
|
||||||
|
.route("/scrape", post(crawl::handle_scrape))
|
||||||
|
.route("/json", post(crawl::handle_json))
|
||||||
|
.route("/links", post(crawl::handle_links))
|
||||||
|
.route("/extract", post(ai::extract))
|
||||||
|
.route_layer(middleware::from_fn_with_state(state.clone(), api_key_middleware))
|
||||||
|
.route_layer(middleware::from_fn_with_state(state.clone(), rate_limit_middleware));
|
||||||
|
|
||||||
|
let auth_routes = Router::new()
|
||||||
|
.route("/auth/register", post(auth::register))
|
||||||
|
.route("/auth/login", post(auth::login))
|
||||||
|
.route("/auth/google", get(oauth::google_auth_url))
|
||||||
|
.route("/auth/google/callback", get(oauth::google_callback));
|
||||||
|
|
||||||
|
let protected_routes = Router::new()
|
||||||
|
.route("/auth/api-keys", post(auth::create_api_key))
|
||||||
|
.route("/auth/api-keys", get(auth::list_api_keys))
|
||||||
|
.route("/auth/api-keys/{id}", axum::routing::delete(auth::delete_api_key))
|
||||||
|
.route("/stripe/checkout", post(stripe::create_checkout))
|
||||||
|
.route("/teams", post(teams::create))
|
||||||
|
.route("/teams/{slug}", get(teams::get))
|
||||||
|
.route("/teams/{slug}/members", post(teams::add_member))
|
||||||
|
.route_layer(middleware::from_fn_with_state(state.clone(), jwt_middleware));
|
||||||
|
|
||||||
|
let stripe_webhook = Router::new()
|
||||||
|
.route("/stripe/webhook", post(stripe::webhook));
|
||||||
|
|
||||||
|
Router::new()
|
||||||
|
.nest("/api", api_routes)
|
||||||
|
.nest("/api", auth_routes)
|
||||||
|
.nest("/api", protected_routes)
|
||||||
|
.nest("/api", stripe_webhook)
|
||||||
|
.route("/metrics", get(|| async {
|
||||||
|
use prometheus::Encoder;
|
||||||
|
let encoder = prometheus::TextEncoder::new();
|
||||||
|
let mut buffer = vec![];
|
||||||
|
encoder.encode(&crate::metrics::REGISTRY.gather(), &mut buffer).unwrap();
|
||||||
|
String::from_utf8(buffer).unwrap()
|
||||||
|
}))
|
||||||
|
.route("/ws/logs", get(ws::live_logs))
|
||||||
|
.layer(middleware::from_fn_with_state(state.clone(), ip_rate_limit_middleware))
|
||||||
|
.layer(middleware::from_fn_with_state(state.clone(), correlation_id_middleware))
|
||||||
|
.layer(CorsLayer::permissive())
|
||||||
|
.with_state(state)
|
||||||
|
}
|
||||||
137
crates/api/src/routes/oauth.rs
Normal file
137
crates/api/src/routes/oauth.rs
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
use axum::{
|
||||||
|
extract::{Query, State},
|
||||||
|
http::StatusCode,
|
||||||
|
Json,
|
||||||
|
};
|
||||||
|
use db::repos::{oauth, users};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::state::AppState;
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct GoogleCallback {
|
||||||
|
pub code: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct GoogleAuthUrl {
|
||||||
|
pub url: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct GoogleTokenResponse {
|
||||||
|
access_token: String,
|
||||||
|
id_token: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct GoogleUserInfo {
|
||||||
|
sub: String,
|
||||||
|
email: String,
|
||||||
|
name: Option<String>,
|
||||||
|
picture: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn google_auth_url(State(_state): State<AppState>) -> Result<Json<GoogleAuthUrl>, StatusCode> {
|
||||||
|
let client_id = std::env::var("GOOGLE_CLIENT_ID").unwrap_or_default();
|
||||||
|
if client_id.is_empty() {
|
||||||
|
return Err(StatusCode::NOT_IMPLEMENTED);
|
||||||
|
}
|
||||||
|
let redirect_uri = std::env::var("GOOGLE_REDIRECT_URI")
|
||||||
|
.unwrap_or_else(|_| "http://localhost:3000/api/auth/google/callback".to_string());
|
||||||
|
|
||||||
|
let url = format!(
|
||||||
|
"https://accounts.google.com/o/oauth2/v2/auth?client_id={}&redirect_uri={}&response_type=code&scope=email%20profile&access_type=offline&prompt=consent",
|
||||||
|
client_id, redirect_uri
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(Json(GoogleAuthUrl { url }))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn google_callback(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Query(params): Query<GoogleCallback>,
|
||||||
|
) -> Result<Json<super::auth::AuthResponse>, StatusCode> {
|
||||||
|
let client_id = std::env::var("GOOGLE_CLIENT_ID").unwrap_or_default();
|
||||||
|
let client_secret = std::env::var("GOOGLE_CLIENT_SECRET").unwrap_or_default();
|
||||||
|
let redirect_uri = std::env::var("GOOGLE_REDIRECT_URI")
|
||||||
|
.unwrap_or_else(|_| "http://localhost:3000/api/auth/google/callback".to_string());
|
||||||
|
|
||||||
|
if client_id.is_empty() || client_secret.is_empty() {
|
||||||
|
// MVP fallback: create mock user
|
||||||
|
let email = format!("google_user_{}@example.com", ¶ms.code[..8.min(params.code.len())]);
|
||||||
|
let user = match users::find_by_email(&state.db, &email).await.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)? {
|
||||||
|
Some(u) => u,
|
||||||
|
None => {
|
||||||
|
let u = users::create(&state.db, &email, None, Some(¶ms.code)).await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
let _ = oauth::create(&state.db, u.id, "google", ¶ms.code).await;
|
||||||
|
u
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let token = super::auth::create_jwt(&user.id.to_string(), &state.config.jwt_secret)?;
|
||||||
|
return Ok(Json(super::auth::AuthResponse { user, token }));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exchange code for token
|
||||||
|
let client = reqwest::Client::new();
|
||||||
|
let token_res = client
|
||||||
|
.post("https://oauth2.googleapis.com/token")
|
||||||
|
.form(&[
|
||||||
|
("code", params.code.as_str()),
|
||||||
|
("client_id", client_id.as_str()),
|
||||||
|
("client_secret", client_secret.as_str()),
|
||||||
|
("redirect_uri", redirect_uri.as_str()),
|
||||||
|
("grant_type", "authorization_code"),
|
||||||
|
])
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||||
|
.json::<GoogleTokenResponse>()
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
// Get user info
|
||||||
|
let user_info = client
|
||||||
|
.get("https://openidconnect.googleapis.com/v1/userinfo")
|
||||||
|
.header("Authorization", format!("Bearer {}", token_res.access_token))
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||||
|
.json::<GoogleUserInfo>()
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
// Find or create user
|
||||||
|
let user = match oauth::find_by_provider(&state.db, "google", &user_info.sub).await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||||
|
{
|
||||||
|
Some(oauth_account) => {
|
||||||
|
users::find_by_id(&state.db, oauth_account.user_id)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||||
|
.ok_or(StatusCode::INTERNAL_SERVER_ERROR)?
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
let user = match users::find_by_email(&state.db, &user_info.email).await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||||
|
{
|
||||||
|
Some(u) => u,
|
||||||
|
None => {
|
||||||
|
users::create(&state.db, &user_info.email, None, Some(&user_info.sub))
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
oauth::create(&state.db, user.id, "google", &user_info.sub)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
user
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let token = super::auth::create_jwt(&user.id.to_string(), &state.config.jwt_secret)?;
|
||||||
|
Ok(Json(super::auth::AuthResponse { user, token }))
|
||||||
|
}
|
||||||
146
crates/api/src/routes/stripe.rs
Normal file
146
crates/api/src/routes/stripe.rs
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
use axum::{
|
||||||
|
extract::State,
|
||||||
|
http::{HeaderMap, StatusCode},
|
||||||
|
Json,
|
||||||
|
};
|
||||||
|
use db::repos::{subscriptions, users};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::{middleware::jwt::JwtAuth, state::AppState};
|
||||||
|
use axum::Extension;
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct CreateCheckoutRequest {
|
||||||
|
pub price_id: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct CheckoutResponse {
|
||||||
|
pub checkout_url: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn create_checkout(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Extension(auth): Extension<JwtAuth>,
|
||||||
|
Json(body): Json<CreateCheckoutRequest>,
|
||||||
|
) -> Result<Json<CheckoutResponse>, StatusCode> {
|
||||||
|
let stripe_secret = std::env::var("STRIPE_SECRET_KEY").map_err(|_| StatusCode::NOT_IMPLEMENTED)?;
|
||||||
|
|
||||||
|
let user = users::find_by_id(&state.db, auth.user_id)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||||
|
.ok_or(StatusCode::NOT_FOUND)?;
|
||||||
|
|
||||||
|
// Create Stripe customer via HTTP API directly (simpler than SDK for MVP)
|
||||||
|
let client = reqwest::Client::new();
|
||||||
|
let customer_res = client
|
||||||
|
.post("https://api.stripe.com/v1/customers")
|
||||||
|
.basic_auth(&stripe_secret, Some(""))
|
||||||
|
.form(&[("email", &user.email)])
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
let customer_data: serde_json::Value = customer_res.json().await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
let customer_id = customer_data["id"].as_str().unwrap_or("");
|
||||||
|
|
||||||
|
let success_url = std::env::var("STRIPE_SUCCESS_URL")
|
||||||
|
.unwrap_or_else(|_| "http://localhost:3000/dashboard?success=true".to_string());
|
||||||
|
let cancel_url = std::env::var("STRIPE_CANCEL_URL")
|
||||||
|
.unwrap_or_else(|_| "http://localhost:3000/dashboard?canceled=true".to_string());
|
||||||
|
|
||||||
|
let session_res = client
|
||||||
|
.post("https://api.stripe.com/v1/checkout/sessions")
|
||||||
|
.basic_auth(&stripe_secret, Some(""))
|
||||||
|
.form(&[
|
||||||
|
("customer", customer_id),
|
||||||
|
("success_url", &success_url),
|
||||||
|
("cancel_url", &cancel_url),
|
||||||
|
("mode", "subscription"),
|
||||||
|
("line_items[0][price]", &body.price_id),
|
||||||
|
("line_items[0][quantity]", "1"),
|
||||||
|
("metadata[user_id]", &auth.user_id.to_string()),
|
||||||
|
])
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
let session_data: serde_json::Value = session_res.json().await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
let url = session_data["url"].as_str().ok_or(StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
Ok(Json(CheckoutResponse { checkout_url: url.to_string() }))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct StripeWebhook {
|
||||||
|
#[serde(rename = "type")]
|
||||||
|
pub event_type: String,
|
||||||
|
pub data: StripeEventData,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct StripeEventData {
|
||||||
|
pub object: serde_json::Value,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn webhook(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
headers: HeaderMap,
|
||||||
|
body: String,
|
||||||
|
) -> Result<StatusCode, StatusCode> {
|
||||||
|
let stripe_secret = std::env::var("STRIPE_WEBHOOK_SECRET").unwrap_or_default();
|
||||||
|
|
||||||
|
// Verify webhook signature if configured
|
||||||
|
if !stripe_secret.is_empty() {
|
||||||
|
let sig = headers
|
||||||
|
.get("stripe-signature")
|
||||||
|
.and_then(|v| v.to_str().ok())
|
||||||
|
.ok_or(StatusCode::BAD_REQUEST)?;
|
||||||
|
|
||||||
|
// In production, verify signature using Stripe library
|
||||||
|
// For MVP, we log and process
|
||||||
|
tracing::info!("Webhook signature: {}", sig);
|
||||||
|
}
|
||||||
|
|
||||||
|
let event: serde_json::Value = serde_json::from_str(&body).map_err(|_| StatusCode::BAD_REQUEST)?;
|
||||||
|
let event_type = event["type"].as_str().unwrap_or("");
|
||||||
|
|
||||||
|
match event_type {
|
||||||
|
"checkout.session.completed" => {
|
||||||
|
if let Some(metadata) = event["data"]["object"]["metadata"].as_object() {
|
||||||
|
if let Some(user_id_str) = metadata.get("user_id").and_then(|v| v.as_str()) {
|
||||||
|
if let Ok(user_id) = uuid::Uuid::parse_str(user_id_str) {
|
||||||
|
let customer_id = event["data"]["object"]["customer"].as_str().unwrap_or("");
|
||||||
|
let subscription_id = event["data"]["object"]["subscription"].as_str().unwrap_or("");
|
||||||
|
let _ = subscriptions::create_or_update(
|
||||||
|
&state.db,
|
||||||
|
user_id,
|
||||||
|
Some(customer_id),
|
||||||
|
Some(subscription_id),
|
||||||
|
None,
|
||||||
|
"active",
|
||||||
|
"paid",
|
||||||
|
).await;
|
||||||
|
tracing::info!("Subscription activated for user {}", user_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"invoice.payment_succeeded" => {
|
||||||
|
tracing::info!("Invoice payment succeeded");
|
||||||
|
}
|
||||||
|
"customer.subscription.deleted" => {
|
||||||
|
if let Some(sub_id) = event["data"]["object"]["id"].as_str() {
|
||||||
|
let _ = subscriptions::update_status(&state.db, sub_id, "canceled").await;
|
||||||
|
tracing::info!("Subscription {} canceled", sub_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(StatusCode::OK)
|
||||||
|
}
|
||||||
95
crates/api/src/routes/teams.rs
Normal file
95
crates/api/src/routes/teams.rs
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
use axum::{
|
||||||
|
extract::{Json, Path, State},
|
||||||
|
http::StatusCode,
|
||||||
|
Extension,
|
||||||
|
};
|
||||||
|
use db::repos::teams;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use shared::models::{Team, TeamMember};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::{middleware::jwt::JwtAuth, state::AppState};
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct CreateTeamRequest {
|
||||||
|
pub name: String,
|
||||||
|
pub slug: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct TeamResponse {
|
||||||
|
pub team: Team,
|
||||||
|
pub members: Vec<TeamMember>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn create(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Extension(auth): Extension<JwtAuth>,
|
||||||
|
Json(body): Json<CreateTeamRequest>,
|
||||||
|
) -> Result<Json<Team>, StatusCode> {
|
||||||
|
let team = teams::create(&state.db, &body.name, &body.slug, auth.user_id)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
// Add owner as member
|
||||||
|
let _ = teams::add_member(&state.db, team.id, auth.user_id, "owner").await;
|
||||||
|
|
||||||
|
Ok(Json(team))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Extension(auth): Extension<JwtAuth>,
|
||||||
|
Path(slug): Path<String>,
|
||||||
|
) -> Result<Json<TeamResponse>, StatusCode> {
|
||||||
|
let team = teams::find_by_slug(&state.db, &slug)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||||
|
.ok_or(StatusCode::NOT_FOUND)?;
|
||||||
|
|
||||||
|
// Check membership
|
||||||
|
let member = teams::find_member(&state.db, team.id, auth.user_id)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
if member.is_none() {
|
||||||
|
return Err(StatusCode::FORBIDDEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
let members = teams::list_members(&state.db, team.id)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
Ok(Json(TeamResponse { team, members }))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct AddMemberRequest {
|
||||||
|
pub user_id: Uuid,
|
||||||
|
#[serde(default)]
|
||||||
|
pub role: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn add_member(
|
||||||
|
State(state): State<AppState>,
|
||||||
|
Extension(auth): Extension<JwtAuth>,
|
||||||
|
Path(slug): Path<String>,
|
||||||
|
Json(body): Json<AddMemberRequest>,
|
||||||
|
) -> Result<Json<TeamMember>, StatusCode> {
|
||||||
|
let team = teams::find_by_slug(&state.db, &slug)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||||
|
.ok_or(StatusCode::NOT_FOUND)?;
|
||||||
|
|
||||||
|
// Only owner can add members
|
||||||
|
if team.owner_id != auth.user_id {
|
||||||
|
return Err(StatusCode::FORBIDDEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
let role = if body.role.is_empty() { "member" } else { &body.role };
|
||||||
|
let member = teams::add_member(&state.db, team.id, body.user_id, role)
|
||||||
|
.await
|
||||||
|
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
Ok(Json(member))
|
||||||
|
}
|
||||||
41
crates/api/src/routes/ws.rs
Normal file
41
crates/api/src/routes/ws.rs
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
use axum::{
|
||||||
|
extract::ws::{Message, WebSocket, WebSocketUpgrade},
|
||||||
|
response::IntoResponse,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub async fn live_logs(ws: WebSocketUpgrade) -> impl IntoResponse {
|
||||||
|
ws.on_upgrade(handle_socket)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_socket(mut socket: WebSocket) {
|
||||||
|
// Send initial connection message
|
||||||
|
let _ = socket.send(Message::Text(r#"{"type":"connected","message":"Live logs connected"}"#.to_string())).await;
|
||||||
|
|
||||||
|
// In a real implementation, this would subscribe to a Redis pub/sub channel
|
||||||
|
// and stream logs to the client. For MVP, we send a heartbeat.
|
||||||
|
let mut interval = tokio::time::interval(std::time::Duration::from_secs(5));
|
||||||
|
|
||||||
|
loop {
|
||||||
|
tokio::select! {
|
||||||
|
_ = interval.tick() => {
|
||||||
|
let msg = r#"{"type":"heartbeat","timestamp":""#.to_string()
|
||||||
|
+ &chrono::Utc::now().to_rfc3339()
|
||||||
|
+ "\"}";
|
||||||
|
if socket.send(Message::Text(msg)).await.is_err() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
msg = socket.recv() => {
|
||||||
|
match msg {
|
||||||
|
Some(Ok(Message::Close(_))) | None => break,
|
||||||
|
Some(Ok(Message::Text(text))) => {
|
||||||
|
if text == "ping" {
|
||||||
|
let _ = socket.send(Message::Text("pong".to_string())).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
69
crates/api/src/secrets/mod.rs
Normal file
69
crates/api/src/secrets/mod.rs
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
use shared::error::AppError;
|
||||||
|
|
||||||
|
pub async fn get_secret(key: &str) -> Result<String, AppError> {
|
||||||
|
// Priority 1: Environment variable (for local dev)
|
||||||
|
if let Ok(val) = std::env::var(key) {
|
||||||
|
return Ok(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Priority 2: Try Vault
|
||||||
|
if let Ok(val) = get_vault_secret(key).await {
|
||||||
|
return Ok(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Priority 3: Try AWS Secrets Manager
|
||||||
|
if let Ok(val) = get_aws_secret(key).await {
|
||||||
|
return Ok(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(AppError::Internal(format!("Secret {} not found in any provider", key)))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_vault_secret(key: &str) -> Result<String, AppError> {
|
||||||
|
let vault_addr = match std::env::var("VAULT_ADDR") {
|
||||||
|
Ok(addr) => addr,
|
||||||
|
Err(_) => return Err(AppError::Internal("VAULT_ADDR not set".to_string())),
|
||||||
|
};
|
||||||
|
|
||||||
|
let vault_token = match std::env::var("VAULT_TOKEN") {
|
||||||
|
Ok(token) => token,
|
||||||
|
Err(_) => return Err(AppError::Internal("VAULT_TOKEN not set".to_string())),
|
||||||
|
};
|
||||||
|
|
||||||
|
let client = reqwest::Client::new();
|
||||||
|
let response = client
|
||||||
|
.get(format!("{}/v1/secret/data/{}", vault_addr, key))
|
||||||
|
.header("X-Vault-Token", vault_token)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|e| AppError::Internal(format!("Vault request failed: {}", e)))?;
|
||||||
|
|
||||||
|
let data: serde_json::Value = response
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.map_err(|e| AppError::Internal(format!("Vault response parse failed: {}", e)))?;
|
||||||
|
|
||||||
|
data["data"]["data"][key]
|
||||||
|
.as_str()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.ok_or_else(|| AppError::Internal(format!("Secret {} not found in Vault", key)))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_aws_secret(key: &str) -> Result<String, AppError> {
|
||||||
|
let secret_name = format!("crawlapi/{}", key.to_lowercase().replace('_', "/"));
|
||||||
|
|
||||||
|
let config = aws_config::from_env().load().await;
|
||||||
|
let client = aws_sdk_secretsmanager::Client::new(&config);
|
||||||
|
|
||||||
|
let response = client
|
||||||
|
.get_secret_value()
|
||||||
|
.secret_id(&secret_name)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|e| AppError::Internal(format!("AWS Secrets Manager error: {}", e)))?;
|
||||||
|
|
||||||
|
response
|
||||||
|
.secret_string()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.ok_or_else(|| AppError::Internal(format!("Secret {} not found in AWS", key)))
|
||||||
|
}
|
||||||
13
crates/api/src/state.rs
Normal file
13
crates/api/src/state.rs
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
use aws_sdk_s3::Client as S3Client;
|
||||||
|
use db::DbPool;
|
||||||
|
use redis::aio::MultiplexedConnection;
|
||||||
|
use shared::config::AppConfig;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct AppState {
|
||||||
|
pub config: Arc<AppConfig>,
|
||||||
|
pub db: DbPool,
|
||||||
|
pub redis: MultiplexedConnection,
|
||||||
|
pub s3: S3Client,
|
||||||
|
}
|
||||||
54
crates/api/src/storage/mod.rs
Normal file
54
crates/api/src/storage/mod.rs
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
use aws_sdk_s3::Client as S3Client;
|
||||||
|
use shared::error::AppError;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
pub async fn upload_file(
|
||||||
|
s3: &S3Client,
|
||||||
|
bucket: &str,
|
||||||
|
key: &str,
|
||||||
|
content_type: &str,
|
||||||
|
data: Vec<u8>,
|
||||||
|
) -> Result<String, AppError> {
|
||||||
|
s3.put_object()
|
||||||
|
.bucket(bucket)
|
||||||
|
.key(key)
|
||||||
|
.content_type(content_type)
|
||||||
|
.body(data.into())
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|e| AppError::S3(e.to_string()))?;
|
||||||
|
|
||||||
|
Ok(format!("{}/{}", bucket, key))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn ensure_bucket_exists(s3: &S3Client, bucket: &str) -> Result<(), AppError> {
|
||||||
|
let exists = s3
|
||||||
|
.head_bucket()
|
||||||
|
.bucket(bucket)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.is_ok();
|
||||||
|
|
||||||
|
if !exists {
|
||||||
|
s3.create_bucket()
|
||||||
|
.bucket(bucket)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|e| AppError::S3(e.to_string()))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn generate_file_key(endpoint: &str, ext: &str) -> String {
|
||||||
|
let id = Uuid::new_v4().to_string().replace('-', "");
|
||||||
|
match endpoint {
|
||||||
|
"screenshot" => format!("screenshots/{}.png", id),
|
||||||
|
"pdf" => format!("pdfs/{}.pdf", id),
|
||||||
|
_ => format!("files/{}.{}", id, ext),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_public_url(_endpoint: &str, s3_endpoint: &str, bucket: &str, key: &str) -> String {
|
||||||
|
format!("{}/{}/{}", s3_endpoint, bucket, key)
|
||||||
|
}
|
||||||
62
crates/api/src/validation.rs
Normal file
62
crates/api/src/validation.rs
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
use shared::error::AppError;
|
||||||
|
|
||||||
|
pub fn validate_url(url: &str) -> Result<(), AppError> {
|
||||||
|
let parsed = url::Url::parse(url).map_err(|_| AppError::InvalidUrl(url.to_string()))?;
|
||||||
|
|
||||||
|
// Only allow http and https
|
||||||
|
if parsed.scheme() != "http" && parsed.scheme() != "https" {
|
||||||
|
return Err(AppError::InvalidUrl("Only HTTP and HTTPS URLs are allowed".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Block private IP ranges
|
||||||
|
if let Some(host) = parsed.host_str() {
|
||||||
|
if host == "localhost" || host == "127.0.0.1" || host.starts_with("10.") || host.starts_with("192.168.") {
|
||||||
|
return Err(AppError::InvalidUrl("Private IP addresses are not allowed".to_string()));
|
||||||
|
}
|
||||||
|
if host.starts_with("172.") {
|
||||||
|
if let Some(seg) = host.split('.').nth(1) {
|
||||||
|
if let Ok(n) = seg.parse::<u8>() {
|
||||||
|
if n >= 16 && n <= 31 {
|
||||||
|
return Err(AppError::InvalidUrl("Private IP addresses are not allowed".to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Block file://, ftp://, etc.
|
||||||
|
if parsed.scheme() == "file" {
|
||||||
|
return Err(AppError::InvalidUrl("File URLs are not allowed".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn validate_webhook_url(url: &str) -> Result<(), AppError> {
|
||||||
|
let parsed = url::Url::parse(url).map_err(|_| AppError::InvalidUrl(url.to_string()))?;
|
||||||
|
|
||||||
|
// Only allow http and https
|
||||||
|
if parsed.scheme() != "http" && parsed.scheme() != "https" {
|
||||||
|
return Err(AppError::InvalidUrl("Webhook must use HTTP or HTTPS".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Block private IPs and localhost for webhooks
|
||||||
|
if let Some(host) = parsed.host_str() {
|
||||||
|
if host == "localhost" || host == "127.0.0.1" || host.starts_with("10.") || host.starts_with("192.168.") {
|
||||||
|
return Err(AppError::InvalidUrl("Webhook cannot point to private addresses".to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn validate_size(content: &[u8], max_mb: usize) -> Result<(), AppError> {
|
||||||
|
let max_bytes = max_mb * 1024 * 1024;
|
||||||
|
if content.len() > max_bytes {
|
||||||
|
return Err(AppError::BadRequest(format!(
|
||||||
|
"Content exceeds maximum size of {}MB",
|
||||||
|
max_mb
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
29
crates/api/tests/integration_test.rs
Normal file
29
crates/api/tests/integration_test.rs
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_health_check() {
|
||||||
|
// This is a placeholder for integration tests
|
||||||
|
// In a real setup, you would spawn the API server and make HTTP requests
|
||||||
|
assert!(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_crawl_request_validation() {
|
||||||
|
let req = shared::models::CrawlRequest {
|
||||||
|
url: "https://example.com".to_string(),
|
||||||
|
options: shared::models::CrawlOptions::default(),
|
||||||
|
};
|
||||||
|
assert_eq!(req.url, "https://example.com");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_api_response_format() {
|
||||||
|
let response = shared::api::ApiResponse::ok(json!({"test": true}));
|
||||||
|
assert!(response.success);
|
||||||
|
assert!(response.data.is_some());
|
||||||
|
assert!(response.error.is_none());
|
||||||
|
|
||||||
|
let error = shared::api::ApiResponse::<()>::err("Something went wrong");
|
||||||
|
assert!(!error.success);
|
||||||
|
assert!(error.error.is_some());
|
||||||
|
}
|
||||||
17
crates/db/Cargo.toml
Normal file
17
crates/db/Cargo.toml
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
[package]
|
||||||
|
name = "db"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
shared = { path = "../shared" }
|
||||||
|
sqlx = { workspace = true }
|
||||||
|
tokio = { workspace = true }
|
||||||
|
uuid = { workspace = true }
|
||||||
|
chrono = { workspace = true }
|
||||||
|
thiserror = { workspace = true }
|
||||||
|
anyhow = { workspace = true }
|
||||||
|
tracing = { workspace = true }
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
serde_json = { workspace = true }
|
||||||
38
crates/db/migrations/001_init.sql
Normal file
38
crates/db/migrations/001_init.sql
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
||||||
|
|
||||||
|
CREATE TABLE users (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
email VARCHAR(255) UNIQUE NOT NULL,
|
||||||
|
password_hash VARCHAR(255),
|
||||||
|
google_id VARCHAR(255) UNIQUE,
|
||||||
|
credits BIGINT NOT NULL DEFAULT 30,
|
||||||
|
tier VARCHAR(50) NOT NULL DEFAULT 'free',
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE api_keys (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||||||
|
key_hash VARCHAR(255) UNIQUE NOT NULL,
|
||||||
|
name VARCHAR(255) NOT NULL DEFAULT 'Default',
|
||||||
|
last_used_at TIMESTAMPTZ,
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE usage_logs (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||||||
|
api_key_id UUID NOT NULL REFERENCES api_keys(id) ON DELETE CASCADE,
|
||||||
|
endpoint VARCHAR(100) NOT NULL,
|
||||||
|
url TEXT NOT NULL,
|
||||||
|
status VARCHAR(50) NOT NULL,
|
||||||
|
credits_used BIGINT NOT NULL DEFAULT 1,
|
||||||
|
duration_ms BIGINT NOT NULL DEFAULT 0,
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_api_keys_user_id ON api_keys(user_id);
|
||||||
|
CREATE INDEX idx_api_keys_key_hash ON api_keys(key_hash);
|
||||||
|
CREATE INDEX idx_usage_logs_user_id ON usage_logs(user_id);
|
||||||
|
CREATE INDEX idx_usage_logs_created_at ON usage_logs(created_at);
|
||||||
27
crates/db/migrations/002_oauth_and_subscriptions.sql
Normal file
27
crates/db/migrations/002_oauth_and_subscriptions.sql
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
CREATE TABLE oauth_accounts (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||||||
|
provider VARCHAR(50) NOT NULL,
|
||||||
|
provider_account_id VARCHAR(255) NOT NULL,
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
UNIQUE(provider, provider_account_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_oauth_accounts_user_id ON oauth_accounts(user_id);
|
||||||
|
|
||||||
|
CREATE TABLE subscriptions (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||||||
|
stripe_customer_id VARCHAR(255),
|
||||||
|
stripe_subscription_id VARCHAR(255),
|
||||||
|
stripe_price_id VARCHAR(255),
|
||||||
|
status VARCHAR(50) NOT NULL DEFAULT 'incomplete',
|
||||||
|
tier VARCHAR(50) NOT NULL DEFAULT 'free',
|
||||||
|
current_period_start TIMESTAMPTZ,
|
||||||
|
current_period_end TIMESTAMPTZ,
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_subscriptions_user_id ON subscriptions(user_id);
|
||||||
|
CREATE INDEX idx_subscriptions_stripe_customer ON subscriptions(stripe_customer_id);
|
||||||
21
crates/db/migrations/003_teams.sql
Normal file
21
crates/db/migrations/003_teams.sql
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
CREATE TABLE teams (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
name VARCHAR(255) NOT NULL,
|
||||||
|
slug VARCHAR(255) UNIQUE NOT NULL,
|
||||||
|
owner_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE team_members (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
team_id UUID NOT NULL REFERENCES teams(id) ON DELETE CASCADE,
|
||||||
|
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||||||
|
role VARCHAR(50) NOT NULL DEFAULT 'member',
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
UNIQUE(team_id, user_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_teams_owner ON teams(owner_id);
|
||||||
|
CREATE INDEX idx_team_members_team ON team_members(team_id);
|
||||||
|
CREATE INDEX idx_team_members_user ON team_members(user_id);
|
||||||
7
crates/db/src/connection.rs
Normal file
7
crates/db/src/connection.rs
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
use sqlx::PgPool;
|
||||||
|
|
||||||
|
pub type DbPool = PgPool;
|
||||||
|
|
||||||
|
pub async fn create_pool(database_url: &str) -> Result<DbPool, sqlx::Error> {
|
||||||
|
PgPool::connect(database_url).await
|
||||||
|
}
|
||||||
4
crates/db/src/lib.rs
Normal file
4
crates/db/src/lib.rs
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
pub mod connection;
|
||||||
|
pub mod repos;
|
||||||
|
|
||||||
|
pub use connection::DbPool;
|
||||||
64
crates/db/src/repos/api_keys.rs
Normal file
64
crates/db/src/repos/api_keys.rs
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
use shared::models::ApiKey;
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
pub async fn find_by_key_hash(pool: &PgPool, key_hash: &str) -> Result<Option<ApiKey>, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, ApiKey>(
|
||||||
|
r#"SELECT id, user_id, key_hash, name, last_used_at, created_at FROM api_keys WHERE key_hash = $1"#,
|
||||||
|
)
|
||||||
|
.bind(key_hash)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn create(
|
||||||
|
pool: &PgPool,
|
||||||
|
user_id: Uuid,
|
||||||
|
key_hash: &str,
|
||||||
|
name: &str,
|
||||||
|
) -> Result<ApiKey, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, ApiKey>(
|
||||||
|
r#"INSERT INTO api_keys (id, user_id, key_hash, name)
|
||||||
|
VALUES ($1, $2, $3, $4)
|
||||||
|
RETURNING id, user_id, key_hash, name, last_used_at, created_at"#,
|
||||||
|
)
|
||||||
|
.bind(Uuid::new_v4())
|
||||||
|
.bind(user_id)
|
||||||
|
.bind(key_hash)
|
||||||
|
.bind(name)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn list_by_user(pool: &PgPool, user_id: Uuid) -> Result<Vec<ApiKey>, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, ApiKey>(
|
||||||
|
r#"SELECT id, user_id, key_hash, name, last_used_at, created_at FROM api_keys WHERE user_id = $1 ORDER BY created_at DESC"#,
|
||||||
|
)
|
||||||
|
.bind(user_id)
|
||||||
|
.fetch_all(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn update_last_used(pool: &PgPool, id: Uuid) -> Result<(), sqlx::Error> {
|
||||||
|
sqlx::query(
|
||||||
|
r#"UPDATE api_keys SET last_used_at = $1 WHERE id = $2"#,
|
||||||
|
)
|
||||||
|
.bind(chrono::Utc::now())
|
||||||
|
.bind(id)
|
||||||
|
.execute(pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn delete_by_id(pool: &PgPool, id: Uuid, user_id: Uuid) -> Result<bool, sqlx::Error> {
|
||||||
|
let result = sqlx::query(
|
||||||
|
r#"DELETE FROM api_keys WHERE id = $1 AND user_id = $2"#,
|
||||||
|
)
|
||||||
|
.bind(id)
|
||||||
|
.bind(user_id)
|
||||||
|
.execute(pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(result.rows_affected() > 0)
|
||||||
|
}
|
||||||
6
crates/db/src/repos/mod.rs
Normal file
6
crates/db/src/repos/mod.rs
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
pub mod api_keys;
|
||||||
|
pub mod oauth;
|
||||||
|
pub mod subscriptions;
|
||||||
|
pub mod teams;
|
||||||
|
pub mod usage_logs;
|
||||||
|
pub mod users;
|
||||||
37
crates/db/src/repos/oauth.rs
Normal file
37
crates/db/src/repos/oauth.rs
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
use shared::models::OAuthAccount;
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
pub async fn find_by_provider(
|
||||||
|
pool: &PgPool,
|
||||||
|
provider: &str,
|
||||||
|
provider_account_id: &str,
|
||||||
|
) -> Result<Option<OAuthAccount>, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, OAuthAccount>(
|
||||||
|
r#"SELECT id, user_id, provider, provider_account_id, created_at
|
||||||
|
FROM oauth_accounts WHERE provider = $1 AND provider_account_id = $2"#,
|
||||||
|
)
|
||||||
|
.bind(provider)
|
||||||
|
.bind(provider_account_id)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn create(
|
||||||
|
pool: &PgPool,
|
||||||
|
user_id: Uuid,
|
||||||
|
provider: &str,
|
||||||
|
provider_account_id: &str,
|
||||||
|
) -> Result<OAuthAccount, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, OAuthAccount>(
|
||||||
|
r#"INSERT INTO oauth_accounts (id, user_id, provider, provider_account_id)
|
||||||
|
VALUES ($1, $2, $3, $4)
|
||||||
|
RETURNING id, user_id, provider, provider_account_id, created_at"#,
|
||||||
|
)
|
||||||
|
.bind(Uuid::new_v4())
|
||||||
|
.bind(user_id)
|
||||||
|
.bind(provider)
|
||||||
|
.bind(provider_account_id)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
76
crates/db/src/repos/subscriptions.rs
Normal file
76
crates/db/src/repos/subscriptions.rs
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
use shared::models::Subscription;
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
pub async fn find_by_user(pool: &PgPool, user_id: Uuid) -> Result<Option<Subscription>, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, Subscription>(
|
||||||
|
r#"SELECT id, user_id, stripe_customer_id, stripe_subscription_id, stripe_price_id,
|
||||||
|
status, tier, current_period_start, current_period_end, created_at, updated_at
|
||||||
|
FROM subscriptions WHERE user_id = $1 ORDER BY created_at DESC LIMIT 1"#,
|
||||||
|
)
|
||||||
|
.bind(user_id)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn find_by_stripe_subscription(
|
||||||
|
pool: &PgPool,
|
||||||
|
stripe_subscription_id: &str,
|
||||||
|
) -> Result<Option<Subscription>, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, Subscription>(
|
||||||
|
r#"SELECT id, user_id, stripe_customer_id, stripe_subscription_id, stripe_price_id,
|
||||||
|
status, tier, current_period_start, current_period_end, created_at, updated_at
|
||||||
|
FROM subscriptions WHERE stripe_subscription_id = $1"#,
|
||||||
|
)
|
||||||
|
.bind(stripe_subscription_id)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn create_or_update(
|
||||||
|
pool: &PgPool,
|
||||||
|
user_id: Uuid,
|
||||||
|
stripe_customer_id: Option<&str>,
|
||||||
|
stripe_subscription_id: Option<&str>,
|
||||||
|
stripe_price_id: Option<&str>,
|
||||||
|
status: &str,
|
||||||
|
tier: &str,
|
||||||
|
) -> Result<Subscription, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, Subscription>(
|
||||||
|
r#"INSERT INTO subscriptions (id, user_id, stripe_customer_id, stripe_subscription_id, stripe_price_id, status, tier)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
||||||
|
ON CONFLICT (user_id) DO UPDATE SET
|
||||||
|
stripe_customer_id = EXCLUDED.stripe_customer_id,
|
||||||
|
stripe_subscription_id = EXCLUDED.stripe_subscription_id,
|
||||||
|
stripe_price_id = EXCLUDED.stripe_price_id,
|
||||||
|
status = EXCLUDED.status,
|
||||||
|
tier = EXCLUDED.tier,
|
||||||
|
updated_at = NOW()
|
||||||
|
RETURNING id, user_id, stripe_customer_id, stripe_subscription_id, stripe_price_id,
|
||||||
|
status, tier, current_period_start, current_period_end, created_at, updated_at"#,
|
||||||
|
)
|
||||||
|
.bind(Uuid::new_v4())
|
||||||
|
.bind(user_id)
|
||||||
|
.bind(stripe_customer_id)
|
||||||
|
.bind(stripe_subscription_id)
|
||||||
|
.bind(stripe_price_id)
|
||||||
|
.bind(status)
|
||||||
|
.bind(tier)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn update_status(
|
||||||
|
pool: &PgPool,
|
||||||
|
stripe_subscription_id: &str,
|
||||||
|
status: &str,
|
||||||
|
) -> Result<(), sqlx::Error> {
|
||||||
|
sqlx::query(
|
||||||
|
r#"UPDATE subscriptions SET status = $1, updated_at = NOW() WHERE stripe_subscription_id = $2"#,
|
||||||
|
)
|
||||||
|
.bind(status)
|
||||||
|
.bind(stripe_subscription_id)
|
||||||
|
.execute(pool)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
68
crates/db/src/repos/teams.rs
Normal file
68
crates/db/src/repos/teams.rs
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
use shared::models::{Team, TeamMember};
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
pub async fn create(pool: &PgPool, name: &str, slug: &str, owner_id: Uuid) -> Result<Team, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, Team>(
|
||||||
|
r#"INSERT INTO teams (id, name, slug, owner_id)
|
||||||
|
VALUES ($1, $2, $3, $4)
|
||||||
|
RETURNING id, name, slug, owner_id, created_at, updated_at"#,
|
||||||
|
)
|
||||||
|
.bind(Uuid::new_v4())
|
||||||
|
.bind(name)
|
||||||
|
.bind(slug)
|
||||||
|
.bind(owner_id)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn find_by_slug(pool: &PgPool, slug: &str) -> Result<Option<Team>, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, Team>(
|
||||||
|
r#"SELECT id, name, slug, owner_id, created_at, updated_at FROM teams WHERE slug = $1"#,
|
||||||
|
)
|
||||||
|
.bind(slug)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn find_by_id(pool: &PgPool, id: Uuid) -> Result<Option<Team>, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, Team>(
|
||||||
|
r#"SELECT id, name, slug, owner_id, created_at, updated_at FROM teams WHERE id = $1"#,
|
||||||
|
)
|
||||||
|
.bind(id)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn add_member(pool: &PgPool, team_id: Uuid, user_id: Uuid, role: &str) -> Result<TeamMember, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, TeamMember>(
|
||||||
|
r#"INSERT INTO team_members (id, team_id, user_id, role)
|
||||||
|
VALUES ($1, $2, $3, $4)
|
||||||
|
RETURNING id, team_id, user_id, role, created_at"#,
|
||||||
|
)
|
||||||
|
.bind(Uuid::new_v4())
|
||||||
|
.bind(team_id)
|
||||||
|
.bind(user_id)
|
||||||
|
.bind(role)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn list_members(pool: &PgPool, team_id: Uuid) -> Result<Vec<TeamMember>, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, TeamMember>(
|
||||||
|
r#"SELECT id, team_id, user_id, role, created_at FROM team_members WHERE team_id = $1"#,
|
||||||
|
)
|
||||||
|
.bind(team_id)
|
||||||
|
.fetch_all(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn find_member(pool: &PgPool, team_id: Uuid, user_id: Uuid) -> Result<Option<TeamMember>, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, TeamMember>(
|
||||||
|
r#"SELECT id, team_id, user_id, role, created_at FROM team_members WHERE team_id = $1 AND user_id = $2"#,
|
||||||
|
)
|
||||||
|
.bind(team_id)
|
||||||
|
.bind(user_id)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
47
crates/db/src/repos/usage_logs.rs
Normal file
47
crates/db/src/repos/usage_logs.rs
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
use shared::models::UsageLog;
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
pub async fn create(
|
||||||
|
pool: &PgPool,
|
||||||
|
user_id: Uuid,
|
||||||
|
api_key_id: Uuid,
|
||||||
|
endpoint: &str,
|
||||||
|
url: &str,
|
||||||
|
status: &str,
|
||||||
|
credits_used: i64,
|
||||||
|
duration_ms: i64,
|
||||||
|
) -> Result<UsageLog, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, UsageLog>(
|
||||||
|
r#"INSERT INTO usage_logs (id, user_id, api_key_id, endpoint, url, status, credits_used, duration_ms)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||||
|
RETURNING id, user_id, api_key_id, endpoint, url, status, credits_used, duration_ms, created_at"#,
|
||||||
|
)
|
||||||
|
.bind(Uuid::new_v4())
|
||||||
|
.bind(user_id)
|
||||||
|
.bind(api_key_id)
|
||||||
|
.bind(endpoint)
|
||||||
|
.bind(url)
|
||||||
|
.bind(status)
|
||||||
|
.bind(credits_used)
|
||||||
|
.bind(duration_ms)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn list_by_user(
|
||||||
|
pool: &PgPool,
|
||||||
|
user_id: Uuid,
|
||||||
|
limit: i64,
|
||||||
|
offset: i64,
|
||||||
|
) -> Result<Vec<UsageLog>, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, UsageLog>(
|
||||||
|
r#"SELECT id, user_id, api_key_id, endpoint, url, status, credits_used, duration_ms, created_at
|
||||||
|
FROM usage_logs WHERE user_id = $1 ORDER BY created_at DESC LIMIT $2 OFFSET $3"#,
|
||||||
|
)
|
||||||
|
.bind(user_id)
|
||||||
|
.bind(limit)
|
||||||
|
.bind(offset)
|
||||||
|
.fetch_all(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
64
crates/db/src/repos/users.rs
Normal file
64
crates/db/src/repos/users.rs
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
use shared::models::User;
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
pub async fn find_by_email(pool: &PgPool, email: &str) -> Result<Option<User>, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, User>(
|
||||||
|
r#"SELECT id, email, password_hash, google_id, credits, tier, created_at, updated_at FROM users WHERE email = $1"#,
|
||||||
|
)
|
||||||
|
.bind(email)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn find_by_id(pool: &PgPool, id: Uuid) -> Result<Option<User>, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, User>(
|
||||||
|
r#"SELECT id, email, password_hash, google_id, credits, tier, created_at, updated_at FROM users WHERE id = $1"#,
|
||||||
|
)
|
||||||
|
.bind(id)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn create(
|
||||||
|
pool: &PgPool,
|
||||||
|
email: &str,
|
||||||
|
password_hash: Option<&str>,
|
||||||
|
google_id: Option<&str>,
|
||||||
|
) -> Result<User, sqlx::Error> {
|
||||||
|
sqlx::query_as::<_, User>(
|
||||||
|
r#"INSERT INTO users (id, email, password_hash, google_id, credits, tier)
|
||||||
|
VALUES ($1, $2, $3, $4, 30, 'free')
|
||||||
|
RETURNING id, email, password_hash, google_id, credits, tier, created_at, updated_at"#,
|
||||||
|
)
|
||||||
|
.bind(Uuid::new_v4())
|
||||||
|
.bind(email)
|
||||||
|
.bind(password_hash)
|
||||||
|
.bind(google_id)
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn deduct_credits(pool: &PgPool, user_id: Uuid, amount: i64) -> Result<bool, sqlx::Error> {
|
||||||
|
let result = sqlx::query(
|
||||||
|
r#"UPDATE users SET credits = credits - $1 WHERE id = $2 AND credits >= $1"#,
|
||||||
|
)
|
||||||
|
.bind(amount)
|
||||||
|
.bind(user_id)
|
||||||
|
.execute(pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(result.rows_affected() > 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn add_credits(pool: &PgPool, user_id: Uuid, amount: i64) -> Result<(), sqlx::Error> {
|
||||||
|
sqlx::query(
|
||||||
|
r#"UPDATE users SET credits = credits + $1 WHERE id = $2"#,
|
||||||
|
)
|
||||||
|
.bind(amount)
|
||||||
|
.bind(user_id)
|
||||||
|
.execute(pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
18
crates/db/tests/db_test.rs
Normal file
18
crates/db/tests/db_test.rs
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
use shared::models::User;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_user_model_serialization() {
|
||||||
|
let user = User {
|
||||||
|
id: uuid::Uuid::new_v4(),
|
||||||
|
email: "test@example.com".to_string(),
|
||||||
|
password_hash: Some("hash".to_string()),
|
||||||
|
google_id: None,
|
||||||
|
credits: 30,
|
||||||
|
tier: "free".to_string(),
|
||||||
|
created_at: chrono::Utc::now(),
|
||||||
|
updated_at: chrono::Utc::now(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let json = serde_json::to_string(&user).unwrap();
|
||||||
|
assert!(json.contains("test@example.com"));
|
||||||
|
}
|
||||||
15
crates/shared/Cargo.toml
Normal file
15
crates/shared/Cargo.toml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
[package]
|
||||||
|
name = "shared"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
serde = { workspace = true }
|
||||||
|
serde_json = { workspace = true }
|
||||||
|
uuid = { workspace = true }
|
||||||
|
chrono = { workspace = true }
|
||||||
|
thiserror = { workspace = true }
|
||||||
|
url = { workspace = true }
|
||||||
|
regex = { workspace = true }
|
||||||
|
config = { workspace = true }
|
||||||
|
sqlx = { workspace = true }
|
||||||
26
crates/shared/src/api.rs
Normal file
26
crates/shared/src/api.rs
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct ApiResponse<T> {
|
||||||
|
pub success: bool,
|
||||||
|
pub data: Option<T>,
|
||||||
|
pub error: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> ApiResponse<T> {
|
||||||
|
pub fn ok(data: T) -> Self {
|
||||||
|
Self {
|
||||||
|
success: true,
|
||||||
|
data: Some(data),
|
||||||
|
error: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn err(msg: impl Into<String>) -> Self {
|
||||||
|
Self {
|
||||||
|
success: false,
|
||||||
|
data: None,
|
||||||
|
error: Some(msg.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
25
crates/shared/src/config.rs
Normal file
25
crates/shared/src/config.rs
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
use serde::Deserialize;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Deserialize)]
|
||||||
|
pub struct AppConfig {
|
||||||
|
pub database_url: String,
|
||||||
|
pub redis_url: String,
|
||||||
|
pub jwt_secret: String,
|
||||||
|
pub s3_endpoint: String,
|
||||||
|
pub s3_bucket: String,
|
||||||
|
pub s3_region: String,
|
||||||
|
pub s3_access_key: String,
|
||||||
|
pub s3_secret_key: String,
|
||||||
|
pub app_port: u16,
|
||||||
|
pub app_host: String,
|
||||||
|
pub playwright_script_path: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AppConfig {
|
||||||
|
pub fn from_env() -> Result<Self, config::ConfigError> {
|
||||||
|
config::Config::builder()
|
||||||
|
.add_source(config::Environment::default())
|
||||||
|
.build()?
|
||||||
|
.try_deserialize()
|
||||||
|
}
|
||||||
|
}
|
||||||
41
crates/shared/src/error.rs
Normal file
41
crates/shared/src/error.rs
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum AppError {
|
||||||
|
#[error("Database error: {0}")]
|
||||||
|
Database(#[from] sqlx::Error),
|
||||||
|
#[error("Redis error: {0}")]
|
||||||
|
Redis(String),
|
||||||
|
#[error("S3 error: {0}")]
|
||||||
|
S3(String),
|
||||||
|
#[error("Invalid URL: {0}")]
|
||||||
|
InvalidUrl(String),
|
||||||
|
#[error("Browser automation failed: {0}")]
|
||||||
|
BrowserError(String),
|
||||||
|
#[error("Rate limit exceeded")]
|
||||||
|
RateLimit,
|
||||||
|
#[error("Insufficient credits")]
|
||||||
|
InsufficientCredits,
|
||||||
|
#[error("Unauthorized")]
|
||||||
|
Unauthorized,
|
||||||
|
#[error("Not found")]
|
||||||
|
NotFound,
|
||||||
|
#[error("Bad request: {0}")]
|
||||||
|
BadRequest(String),
|
||||||
|
#[error("Internal error: {0}")]
|
||||||
|
Internal(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AppError {
|
||||||
|
pub fn status_code(&self) -> u16 {
|
||||||
|
match self {
|
||||||
|
AppError::InvalidUrl(_) | AppError::BadRequest(_) => 400,
|
||||||
|
AppError::Unauthorized => 401,
|
||||||
|
AppError::InsufficientCredits => 403,
|
||||||
|
AppError::NotFound => 404,
|
||||||
|
AppError::RateLimit => 429,
|
||||||
|
AppError::BrowserError(_) => 500,
|
||||||
|
_ => 500,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
24
crates/shared/src/jobs.rs
Normal file
24
crates/shared/src/jobs.rs
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::models::CrawlOptions;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct CrawlJob {
|
||||||
|
pub job_id: Uuid,
|
||||||
|
pub user_id: Uuid,
|
||||||
|
pub api_key_id: Uuid,
|
||||||
|
pub endpoint: String,
|
||||||
|
pub url: String,
|
||||||
|
pub options: CrawlOptions,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct CrawlResult {
|
||||||
|
pub job_id: Uuid,
|
||||||
|
pub success: bool,
|
||||||
|
pub data: Option<serde_json::Value>,
|
||||||
|
pub error: Option<String>,
|
||||||
|
pub duration_ms: i64,
|
||||||
|
pub file_url: Option<String>,
|
||||||
|
}
|
||||||
6
crates/shared/src/lib.rs
Normal file
6
crates/shared/src/lib.rs
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
pub mod api;
|
||||||
|
pub mod config;
|
||||||
|
pub mod error;
|
||||||
|
pub mod jobs;
|
||||||
|
pub mod models;
|
||||||
|
pub mod queue;
|
||||||
136
crates/shared/src/models.rs
Normal file
136
crates/shared/src/models.rs
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use sqlx::FromRow;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||||
|
pub struct User {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub email: String,
|
||||||
|
pub password_hash: Option<String>,
|
||||||
|
pub google_id: Option<String>,
|
||||||
|
pub credits: i64,
|
||||||
|
pub tier: String,
|
||||||
|
pub created_at: DateTime<Utc>,
|
||||||
|
pub updated_at: DateTime<Utc>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||||
|
pub struct ApiKey {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub user_id: Uuid,
|
||||||
|
pub key_hash: String,
|
||||||
|
pub name: String,
|
||||||
|
pub last_used_at: Option<DateTime<Utc>>,
|
||||||
|
pub created_at: DateTime<Utc>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||||
|
pub struct OAuthAccount {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub user_id: Uuid,
|
||||||
|
pub provider: String,
|
||||||
|
pub provider_account_id: String,
|
||||||
|
pub created_at: DateTime<Utc>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||||
|
pub struct Subscription {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub user_id: Uuid,
|
||||||
|
pub stripe_customer_id: Option<String>,
|
||||||
|
pub stripe_subscription_id: Option<String>,
|
||||||
|
pub stripe_price_id: Option<String>,
|
||||||
|
pub status: String,
|
||||||
|
pub tier: String,
|
||||||
|
pub current_period_start: Option<DateTime<Utc>>,
|
||||||
|
pub current_period_end: Option<DateTime<Utc>>,
|
||||||
|
pub created_at: DateTime<Utc>,
|
||||||
|
pub updated_at: DateTime<Utc>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||||
|
pub struct UsageLog {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub user_id: Uuid,
|
||||||
|
pub api_key_id: Uuid,
|
||||||
|
pub endpoint: String,
|
||||||
|
pub url: String,
|
||||||
|
pub status: String,
|
||||||
|
pub credits_used: i64,
|
||||||
|
pub duration_ms: i64,
|
||||||
|
pub created_at: DateTime<Utc>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||||
|
pub struct Team {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub name: String,
|
||||||
|
pub slug: String,
|
||||||
|
pub owner_id: Uuid,
|
||||||
|
pub created_at: DateTime<Utc>,
|
||||||
|
pub updated_at: DateTime<Utc>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||||
|
pub struct TeamMember {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub team_id: Uuid,
|
||||||
|
pub user_id: Uuid,
|
||||||
|
pub role: String,
|
||||||
|
pub created_at: DateTime<Utc>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct CrawlRequest {
|
||||||
|
pub url: String,
|
||||||
|
#[serde(default)]
|
||||||
|
pub options: CrawlOptions,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||||
|
pub struct CrawlOptions {
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub full_page: Option<bool>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub width: Option<u32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub height: Option<u32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub wait_for: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub timeout: Option<u64>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub user_agent: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub selectors: Option<Vec<String>>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub include_html: Option<bool>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub webhook_url: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub session_id: Option<String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub headers: Option<std::collections::HashMap<String, String>>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub mobile: Option<bool>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub scroll_to_bottom: Option<bool>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub stealth: Option<bool>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub use_proxy: Option<bool>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub solve_captcha: Option<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct CrawlResponse {
|
||||||
|
pub success: bool,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub data: Option<serde_json::Value>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub calls_remaining: Option<i64>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub error: Option<String>,
|
||||||
|
}
|
||||||
27
crates/shared/src/queue.rs
Normal file
27
crates/shared/src/queue.rs
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::models::CrawlOptions;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct Job {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub user_id: Uuid,
|
||||||
|
pub api_key_id: Uuid,
|
||||||
|
pub endpoint: String,
|
||||||
|
pub url: String,
|
||||||
|
pub options: CrawlOptions,
|
||||||
|
pub webhook_url: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct JobResult {
|
||||||
|
pub id: Uuid,
|
||||||
|
pub success: bool,
|
||||||
|
pub data: Option<serde_json::Value>,
|
||||||
|
pub error: Option<String>,
|
||||||
|
pub duration_ms: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const QUEUE_NAME: &str = "crawlapi:jobs";
|
||||||
|
pub const RESULT_PREFIX: &str = "crawlapi:results:";
|
||||||
26
crates/worker/Cargo.toml
Normal file
26
crates/worker/Cargo.toml
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
[package]
|
||||||
|
name = "worker"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
shared = { path = "../shared" }
|
||||||
|
db = { path = "../db" }
|
||||||
|
tokio = { workspace = true }
|
||||||
|
serde = { workspace = true }
|
||||||
|
serde_json = { workspace = true }
|
||||||
|
redis = { workspace = true }
|
||||||
|
tracing = { workspace = true }
|
||||||
|
tracing-subscriber = { workspace = true, features = ["json", "env-filter"] }
|
||||||
|
chrono = { workspace = true }
|
||||||
|
thiserror = { workspace = true }
|
||||||
|
anyhow = { workspace = true }
|
||||||
|
aws-config = { workspace = true }
|
||||||
|
aws-sdk-s3 = { workspace = true }
|
||||||
|
config = { workspace = true }
|
||||||
|
tokio-util = { workspace = true }
|
||||||
|
futures = { workspace = true }
|
||||||
|
uuid = { workspace = true }
|
||||||
|
reqwest = { workspace = true }
|
||||||
|
sentry = "0.36"
|
||||||
|
sqlx = { workspace = true }
|
||||||
230
crates/worker/src/main.rs
Normal file
230
crates/worker/src/main.rs
Normal file
@@ -0,0 +1,230 @@
|
|||||||
|
use chrono::Utc;
|
||||||
|
use db::connection::create_pool;
|
||||||
|
use redis::AsyncCommands;
|
||||||
|
use shared::{
|
||||||
|
config::AppConfig,
|
||||||
|
queue::{Job, JobResult, QUEUE_NAME, RESULT_PREFIX},
|
||||||
|
};
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
use tokio::process::Command;
|
||||||
|
use tokio::time::sleep;
|
||||||
|
use tracing::{info_span, Instrument};
|
||||||
|
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter};
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> anyhow::Result<()> {
|
||||||
|
let sentry_dsn = std::env::var("SENTRY_DSN").ok();
|
||||||
|
let _guard = sentry_dsn.map(|dsn| {
|
||||||
|
sentry::init((dsn, sentry::ClientOptions {
|
||||||
|
release: sentry::release_name!(),
|
||||||
|
..Default::default()
|
||||||
|
}))
|
||||||
|
});
|
||||||
|
|
||||||
|
let json_logging = std::env::var("JSON_LOGGING").unwrap_or_else(|_| "false".to_string()) == "true";
|
||||||
|
|
||||||
|
if json_logging {
|
||||||
|
tracing_subscriber::registry()
|
||||||
|
.with(EnvFilter::try_from_default_env().unwrap_or_else(|_| "worker=debug".into()))
|
||||||
|
.with(
|
||||||
|
tracing_subscriber::fmt::layer()
|
||||||
|
.json()
|
||||||
|
.with_current_span(true)
|
||||||
|
.with_span_list(true)
|
||||||
|
.with_target(true),
|
||||||
|
)
|
||||||
|
.init();
|
||||||
|
} else {
|
||||||
|
tracing_subscriber::registry()
|
||||||
|
.with(EnvFilter::try_from_default_env().unwrap_or_else(|_| "worker=debug".into()))
|
||||||
|
.with(tracing_subscriber::fmt::layer())
|
||||||
|
.init();
|
||||||
|
}
|
||||||
|
|
||||||
|
let config = AppConfig::from_env()?;
|
||||||
|
let db = create_pool(&config.database_url).await?;
|
||||||
|
|
||||||
|
let redis_client = redis::Client::open(config.redis_url.clone())?;
|
||||||
|
let mut redis_conn = redis_client.get_multiplexed_tokio_connection().await?;
|
||||||
|
|
||||||
|
tracing::info!("Worker started. Waiting for jobs...");
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let job_json: Option<(String, String)> = redis::cmd("BLPOP")
|
||||||
|
.arg(QUEUE_NAME)
|
||||||
|
.arg(5)
|
||||||
|
.query_async(&mut redis_conn)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if let Some((_, json)) = job_json {
|
||||||
|
let job: Job = match serde_json::from_str(&json) {
|
||||||
|
Ok(j) => j,
|
||||||
|
Err(e) => {
|
||||||
|
tracing::error!("Failed to deserialize job: {}", e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let span = info_span!(
|
||||||
|
"process_job",
|
||||||
|
job_id = %job.id,
|
||||||
|
user_id = %job.user_id,
|
||||||
|
endpoint = %job.endpoint,
|
||||||
|
url = %job.url,
|
||||||
|
);
|
||||||
|
|
||||||
|
process_single_job(&config, &db, &mut redis_conn, &job)
|
||||||
|
.instrument(span)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn process_single_job(
|
||||||
|
config: &AppConfig,
|
||||||
|
db: &sqlx::PgPool,
|
||||||
|
redis_conn: &mut redis::aio::MultiplexedConnection,
|
||||||
|
job: &Job,
|
||||||
|
) {
|
||||||
|
tracing::info!("Processing job {}: {} {}", job.id, job.endpoint, job.url);
|
||||||
|
let start = Instant::now();
|
||||||
|
|
||||||
|
let result = process_job_with_retry(config, job).await;
|
||||||
|
let duration = start.elapsed().as_millis() as i64;
|
||||||
|
|
||||||
|
let job_result = match result {
|
||||||
|
Ok(data) => JobResult {
|
||||||
|
id: job.id,
|
||||||
|
success: true,
|
||||||
|
data: Some(data),
|
||||||
|
error: None,
|
||||||
|
duration_ms: duration,
|
||||||
|
},
|
||||||
|
Err(e) => {
|
||||||
|
tracing::error!("Job {} failed after retries: {}", job.id, e);
|
||||||
|
JobResult {
|
||||||
|
id: job.id,
|
||||||
|
success: false,
|
||||||
|
data: None,
|
||||||
|
error: Some(e.clone()),
|
||||||
|
duration_ms: duration,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let result_json = serde_json::to_string(&job_result).unwrap();
|
||||||
|
let result_key = format!("{}{}", RESULT_PREFIX, job.id);
|
||||||
|
let _: () = redis_conn.set_ex(&result_key, result_json, 300).await.unwrap_or(());
|
||||||
|
|
||||||
|
let status = if job_result.success { "success" } else { "error" };
|
||||||
|
let _ = db::repos::usage_logs::create(
|
||||||
|
db,
|
||||||
|
job.user_id,
|
||||||
|
job.api_key_id,
|
||||||
|
&job.endpoint,
|
||||||
|
&job.url,
|
||||||
|
status,
|
||||||
|
1,
|
||||||
|
duration,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
if let Some(webhook_url) = &job.webhook_url {
|
||||||
|
let _ = send_webhook(webhook_url, &job_result).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
if !job_result.success {
|
||||||
|
let dlq_key = format!("crawlapi:dlq:{}", job.id);
|
||||||
|
let dlq_data = serde_json::json!({
|
||||||
|
"job": job,
|
||||||
|
"error": job_result.error,
|
||||||
|
"failed_at": Utc::now().to_rfc3339(),
|
||||||
|
});
|
||||||
|
let _: () = redis_conn.set_ex(dlq_key, dlq_data.to_string(), 86400).await.unwrap_or(());
|
||||||
|
tracing::warn!("Job {} moved to DLQ", job.id);
|
||||||
|
}
|
||||||
|
|
||||||
|
tracing::info!("Job {} completed in {}ms", job.id, duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn process_job_with_retry(config: &AppConfig, job: &Job) -> Result<serde_json::Value, String> {
|
||||||
|
let max_retries = 3;
|
||||||
|
let mut last_error = String::new();
|
||||||
|
|
||||||
|
for attempt in 0..max_retries {
|
||||||
|
if attempt > 0 {
|
||||||
|
let backoff = Duration::from_secs(2_u64.pow(attempt as u32));
|
||||||
|
tracing::info!(
|
||||||
|
"Retrying job {} (attempt {}/{}), waiting {:?}",
|
||||||
|
job.id,
|
||||||
|
attempt + 1,
|
||||||
|
max_retries,
|
||||||
|
backoff
|
||||||
|
);
|
||||||
|
sleep(backoff).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
match process_job(config, job).await {
|
||||||
|
Ok(data) => return Ok(data),
|
||||||
|
Err(e) => {
|
||||||
|
last_error = e;
|
||||||
|
tracing::warn!(
|
||||||
|
"Job {} attempt {}/{} failed: {}",
|
||||||
|
job.id,
|
||||||
|
attempt + 1,
|
||||||
|
max_retries,
|
||||||
|
last_error
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(format!("Failed after {} retries: {}", max_retries, last_error))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn process_job(config: &AppConfig, job: &Job) -> Result<serde_json::Value, String> {
|
||||||
|
let script_path = &config.playwright_script_path;
|
||||||
|
|
||||||
|
let mut cmd = Command::new("node");
|
||||||
|
cmd.arg(script_path)
|
||||||
|
.arg(&job.endpoint)
|
||||||
|
.arg(serde_json::to_string(&job.url).unwrap())
|
||||||
|
.arg(serde_json::to_string(&job.options).unwrap())
|
||||||
|
.env("OUTPUT_DIR", "/tmp/crawlapi")
|
||||||
|
.env("BROWSER_POOL_SIZE", std::env::var("BROWSER_POOL_SIZE").unwrap_or_else(|_| "5".to_string()))
|
||||||
|
.env("MAX_PAGES_PER_BROWSER", std::env::var("MAX_PAGES_PER_BROWSER").unwrap_or_else(|_| "10".to_string()));
|
||||||
|
|
||||||
|
if let Ok(proxy_url) = std::env::var("PROXY_URL") {
|
||||||
|
cmd.env("PROXY_URL", proxy_url);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Ok(captcha_key) = std::env::var("CAPTCHA_API_KEY") {
|
||||||
|
cmd.env("CAPTCHA_API_KEY", captcha_key);
|
||||||
|
}
|
||||||
|
|
||||||
|
let output = cmd.output()
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Failed to execute browser: {}", e))?;
|
||||||
|
|
||||||
|
if !output.status.success() {
|
||||||
|
return Err(format!("Browser error: {}", String::from_utf8_lossy(&output.stderr)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||||
|
let result: serde_json::Value = serde_json::from_str(&stdout)
|
||||||
|
.map_err(|e| format!("Invalid JSON from browser: {} | output: {}", e, stdout))?;
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn send_webhook(url: &str, result: &JobResult) -> Result<(), reqwest::Error> {
|
||||||
|
let client = reqwest::Client::builder()
|
||||||
|
.timeout(Duration::from_secs(10))
|
||||||
|
.build()?;
|
||||||
|
let _ = client
|
||||||
|
.post(url)
|
||||||
|
.json(result)
|
||||||
|
.send()
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
141
docker-compose.yml
Normal file
141
docker-compose.yml
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
version: "3.8"
|
||||||
|
|
||||||
|
services:
|
||||||
|
postgres:
|
||||||
|
image: postgres:16-alpine
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: crawlapi
|
||||||
|
POSTGRES_PASSWORD: crawlapi
|
||||||
|
POSTGRES_DB: crawlapi
|
||||||
|
ports:
|
||||||
|
- "5432:5432"
|
||||||
|
volumes:
|
||||||
|
- postgres_data:/var/lib/postgresql/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U crawlapi"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
redis:
|
||||||
|
image: redis:7-alpine
|
||||||
|
ports:
|
||||||
|
- "6379:6379"
|
||||||
|
volumes:
|
||||||
|
- redis_data:/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
minio:
|
||||||
|
image: minio/minio:latest
|
||||||
|
command: server /data --console-address ":9001"
|
||||||
|
environment:
|
||||||
|
MINIO_ROOT_USER: minioadmin
|
||||||
|
MINIO_ROOT_PASSWORD: minioadmin
|
||||||
|
ports:
|
||||||
|
- "9000:9000"
|
||||||
|
- "9001:9001"
|
||||||
|
volumes:
|
||||||
|
- minio_data:/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
api:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile.api
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: postgres://crawlapi:crawlapi@postgres:5432/crawlapi
|
||||||
|
REDIS_URL: redis://redis:6379
|
||||||
|
JWT_SECRET: your-super-secret-jwt-key-change-this-in-production
|
||||||
|
S3_ENDPOINT: http://minio:9000
|
||||||
|
S3_BUCKET: crawlapi
|
||||||
|
S3_REGION: us-east-1
|
||||||
|
S3_ACCESS_KEY: minioadmin
|
||||||
|
S3_SECRET_KEY: minioadmin
|
||||||
|
APP_PORT: 3000
|
||||||
|
APP_HOST: 0.0.0.0
|
||||||
|
PLAYWRIGHT_SCRIPT_PATH: /app/playwright/pool.js
|
||||||
|
AWS_ACCESS_KEY_ID: minioadmin
|
||||||
|
AWS_SECRET_ACCESS_KEY: minioadmin
|
||||||
|
BROWSER_POOL_SIZE: "5"
|
||||||
|
MAX_PAGES_PER_BROWSER: "10"
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
minio:
|
||||||
|
condition: service_healthy
|
||||||
|
volumes:
|
||||||
|
- ./playwright:/app/playwright:ro
|
||||||
|
|
||||||
|
worker:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile.worker
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: postgres://crawlapi:crawlapi@postgres:5432/crawlapi
|
||||||
|
REDIS_URL: redis://redis:6379
|
||||||
|
S3_ENDPOINT: http://minio:9000
|
||||||
|
S3_BUCKET: crawlapi
|
||||||
|
S3_REGION: us-east-1
|
||||||
|
S3_ACCESS_KEY: minioadmin
|
||||||
|
S3_SECRET_KEY: minioadmin
|
||||||
|
AWS_ACCESS_KEY_ID: minioadmin
|
||||||
|
AWS_SECRET_ACCESS_KEY: minioadmin
|
||||||
|
PLAYWRIGHT_SCRIPT_PATH: /app/playwright/pool.js
|
||||||
|
BROWSER_POOL_SIZE: "5"
|
||||||
|
MAX_PAGES_PER_BROWSER: "10"
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
minio:
|
||||||
|
condition: service_healthy
|
||||||
|
volumes:
|
||||||
|
- ./playwright:/app/playwright:ro
|
||||||
|
deploy:
|
||||||
|
replicas: 3
|
||||||
|
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus:latest
|
||||||
|
ports:
|
||||||
|
- "9090:9090"
|
||||||
|
volumes:
|
||||||
|
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||||
|
command:
|
||||||
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana:latest
|
||||||
|
ports:
|
||||||
|
- "3001:3000"
|
||||||
|
environment:
|
||||||
|
GF_SECURITY_ADMIN_PASSWORD: admin
|
||||||
|
volumes:
|
||||||
|
- grafana_data:/var/lib/grafana
|
||||||
|
|
||||||
|
frontend:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile.frontend
|
||||||
|
ports:
|
||||||
|
- "80:80"
|
||||||
|
depends_on:
|
||||||
|
- api
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
postgres_data:
|
||||||
|
redis_data:
|
||||||
|
minio_data:
|
||||||
|
grafana_data:
|
||||||
105
e2e/api.spec.ts
Normal file
105
e2e/api.spec.ts
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
import { test, expect } from '@playwright/test';
|
||||||
|
|
||||||
|
const API_URL = process.env.API_URL || 'http://localhost:3000';
|
||||||
|
|
||||||
|
test.describe('Crawl API E2E', () => {
|
||||||
|
let apiKey: string;
|
||||||
|
let token: string;
|
||||||
|
|
||||||
|
test.beforeAll(async ({ request }) => {
|
||||||
|
// Register test user
|
||||||
|
const register = await request.post(`${API_URL}/api/auth/register`, {
|
||||||
|
data: {
|
||||||
|
email: `e2e_${Date.now()}@test.com`,
|
||||||
|
password: 'testpassword123'
|
||||||
|
}
|
||||||
|
});
|
||||||
|
const registerData = await register.json();
|
||||||
|
token = registerData.token;
|
||||||
|
|
||||||
|
// Create API key
|
||||||
|
const key = await request.post(`${API_URL}/api/auth/api-keys`, {
|
||||||
|
headers: { 'x-auth-token': token },
|
||||||
|
data: { name: 'E2E Key' }
|
||||||
|
});
|
||||||
|
const keyData = await key.json();
|
||||||
|
apiKey = keyData.key;
|
||||||
|
});
|
||||||
|
|
||||||
|
test('POST /api/screenshot returns success', async ({ request }) => {
|
||||||
|
const res = await request.post(`${API_URL}/api/screenshot`, {
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': apiKey,
|
||||||
|
},
|
||||||
|
data: {
|
||||||
|
url: 'https://example.com',
|
||||||
|
options: { fullPage: true }
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(res.status()).toBe(200);
|
||||||
|
const data = await res.json();
|
||||||
|
expect(data.success).toBe(true);
|
||||||
|
expect(data.data).toBeDefined();
|
||||||
|
expect(data.calls_remaining).toBeDefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
test('POST /api/content returns HTML', async ({ request }) => {
|
||||||
|
const res = await request.post(`${API_URL}/api/content`, {
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': apiKey,
|
||||||
|
},
|
||||||
|
data: { url: 'https://example.com' }
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(res.status()).toBe(200);
|
||||||
|
const data = await res.json();
|
||||||
|
expect(data.success).toBe(true);
|
||||||
|
expect(data.data.html).toContain('<html');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('POST /api/json returns structured data', async ({ request }) => {
|
||||||
|
const res = await request.post(`${API_URL}/api/json`, {
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': apiKey,
|
||||||
|
},
|
||||||
|
data: { url: 'https://example.com' }
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(res.status()).toBe(200);
|
||||||
|
const data = await res.json();
|
||||||
|
expect(data.success).toBe(true);
|
||||||
|
expect(data.data.title).toBeDefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
test('GET /metrics returns Prometheus format', async ({ request }) => {
|
||||||
|
const res = await request.get(`${API_URL}/metrics`);
|
||||||
|
expect(res.status()).toBe(200);
|
||||||
|
const text = await res.text();
|
||||||
|
expect(text).toContain('api_requests_total');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('POST /api/screenshot without API key returns 401', async ({ request }) => {
|
||||||
|
const res = await request.post(`${API_URL}/api/screenshot`, {
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
data: { url: 'https://example.com' }
|
||||||
|
});
|
||||||
|
expect(res.status()).toBe(401);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('POST /api/screenshot with invalid URL returns 400', async ({ request }) => {
|
||||||
|
const res = await request.post(`${API_URL}/api/screenshot`, {
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': apiKey,
|
||||||
|
},
|
||||||
|
data: { url: 'not-a-valid-url' }
|
||||||
|
});
|
||||||
|
expect(res.status()).toBe(200);
|
||||||
|
const data = await res.json();
|
||||||
|
expect(data.success).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
11
e2e/package.json
Normal file
11
e2e/package.json
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
{
|
||||||
|
"name": "crawlapi-e2e",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"scripts": {
|
||||||
|
"test": "playwright test",
|
||||||
|
"test:ui": "playwright test --ui"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@playwright/test": "^1.49.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
20
e2e/playwright.config.ts
Normal file
20
e2e/playwright.config.ts
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
import { defineConfig, devices } from '@playwright/test';
|
||||||
|
|
||||||
|
export default defineConfig({
|
||||||
|
testDir: '.',
|
||||||
|
fullyParallel: true,
|
||||||
|
forbidOnly: !!process.env.CI,
|
||||||
|
retries: process.env.CI ? 2 : 0,
|
||||||
|
workers: process.env.CI ? 1 : undefined,
|
||||||
|
reporter: 'html',
|
||||||
|
use: {
|
||||||
|
baseURL: process.env.API_URL || 'http://localhost:3000',
|
||||||
|
trace: 'on-first-retry',
|
||||||
|
},
|
||||||
|
projects: [
|
||||||
|
{
|
||||||
|
name: 'api',
|
||||||
|
testMatch: /.*\.spec\.ts/,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
92
frontend/app/billing/page.tsx
Normal file
92
frontend/app/billing/page.tsx
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useState, useEffect } from 'react'
|
||||||
|
import Link from 'next/link'
|
||||||
|
|
||||||
|
const plans = [
|
||||||
|
{ name: 'Free', price: '$0', credits: '30 / month', features: ['9 endpoints', '1 concurrent', 'Community support'] },
|
||||||
|
{ name: 'Hobby', price: '$9', credits: '1,000 / month', features: ['9 endpoints', '3 concurrent', 'Email support', 'Webhooks'] },
|
||||||
|
{ name: 'Starter', price: '$19', credits: '3,000 / month', features: ['9 endpoints', '5 concurrent', 'Priority support', 'Webhooks', 'AI extraction'] },
|
||||||
|
{ name: 'Pro', price: '$49', credits: '10,000 / month', features: ['All endpoints', '10 concurrent', 'Priority support', 'Webhooks', 'AI extraction', 'Proxy rotation'] },
|
||||||
|
{ name: 'Startup', price: '$99', credits: '25,000 / month', features: ['All endpoints', '20 concurrent', 'Dedicated support', 'Custom integrations', 'SLA'] },
|
||||||
|
]
|
||||||
|
|
||||||
|
export default function Billing() {
|
||||||
|
const [token, setToken] = useState('')
|
||||||
|
const [credits, setCredits] = useState<number | null>(null)
|
||||||
|
const [usage, setUsage] = useState(0)
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const t = localStorage.getItem('crawlapi_token')
|
||||||
|
if (t) {
|
||||||
|
setToken(t)
|
||||||
|
fetchUser(t)
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
async function fetchUser(t: string) {
|
||||||
|
try {
|
||||||
|
const res = await fetch('http://localhost:3000/api/auth/api-keys', {
|
||||||
|
headers: { 'x-auth-token': t }
|
||||||
|
})
|
||||||
|
// Just a mock - in production this would call a /me endpoint
|
||||||
|
setCredits(30)
|
||||||
|
setUsage(12)
|
||||||
|
} catch (e) {
|
||||||
|
console.error(e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<main style={{ maxWidth: 1200, margin: '0 auto', padding: '40px 20px' }}>
|
||||||
|
<nav style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', marginBottom: 40 }}>
|
||||||
|
<Link href="/" style={{ fontSize: 24, fontWeight: 700, color: '#fff', textDecoration: 'none' }}>Crawl API</Link>
|
||||||
|
<Link href="/" style={{ color: '#888', textDecoration: 'none' }}>← Back</Link>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
<h1 style={{ fontSize: 36, marginBottom: 8 }}>Billing</h1>
|
||||||
|
<p style={{ color: '#888', marginBottom: 40 }}>Manage your subscription and usage.</p>
|
||||||
|
|
||||||
|
{token && credits !== null && (
|
||||||
|
<div style={{ background: '#111', borderRadius: 12, padding: 24, marginBottom: 40 }}>
|
||||||
|
<h3 style={{ marginTop: 0 }}>Current Usage</h3>
|
||||||
|
<div style={{ display: 'flex', gap: 40, marginTop: 16 }}>
|
||||||
|
<div>
|
||||||
|
<div style={{ fontSize: 32, fontWeight: 700 }}>{credits - usage}</div>
|
||||||
|
<div style={{ color: '#888', fontSize: 14 }}>Credits remaining</div>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<div style={{ fontSize: 32, fontWeight: 700 }}>{usage}</div>
|
||||||
|
<div style={{ color: '#888', fontSize: 14 }}>Used this month</div>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<div style={{ fontSize: 32, fontWeight: 700 }}>{credits}</div>
|
||||||
|
<div style={{ color: '#888', fontSize: 14 }}>Total credits</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div style={{ marginTop: 16, background: '#1a1a1a', borderRadius: 8, height: 8, overflow: 'hidden' }}>
|
||||||
|
<div style={{ width: `${(usage / credits) * 100}%`, background: '#4ade80', height: '100%' }} />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<div style={{ display: 'grid', gridTemplateColumns: 'repeat(auto-fit, minmax(220px, 1fr))', gap: 16 }}>
|
||||||
|
{plans.map(plan => (
|
||||||
|
<div key={plan.name} style={{ background: '#111', borderRadius: 12, padding: 24, border: plan.name === 'Hobby' ? '1px solid #4ade80' : '1px solid transparent' }}>
|
||||||
|
<div style={{ fontSize: 14, color: '#888', marginBottom: 8 }}>{plan.name}</div>
|
||||||
|
<div style={{ fontSize: 36, fontWeight: 700, marginBottom: 8 }}>{plan.price}<span style={{ fontSize: 14, color: '#888' }}>/mo</span></div>
|
||||||
|
<div style={{ fontSize: 14, marginBottom: 16, color: '#4ade80' }}>{plan.credits}</div>
|
||||||
|
<ul style={{ listStyle: 'none', padding: 0, margin: 0 }}>
|
||||||
|
{plan.features.map((f, i) => (
|
||||||
|
<li key={i} style={{ padding: '4px 0', fontSize: 14, color: '#aaa' }}>✓ {f}</li>
|
||||||
|
))}
|
||||||
|
</ul>
|
||||||
|
<button style={{ width: '100%', marginTop: 16, padding: '10px', background: plan.name === 'Hobby' ? '#4ade80' : '#fff', color: '#000', borderRadius: 6, border: 'none', fontWeight: 600, cursor: 'pointer' }}>
|
||||||
|
{plan.name === 'Free' ? 'Current Plan' : 'Upgrade'}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
)
|
||||||
|
}
|
||||||
320
frontend/app/dashboard/page.tsx
Normal file
320
frontend/app/dashboard/page.tsx
Normal file
@@ -0,0 +1,320 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useState, useEffect } from 'react'
|
||||||
|
import Link from 'next/link'
|
||||||
|
|
||||||
|
export default function Dashboard() {
|
||||||
|
const [token, setToken] = useState('')
|
||||||
|
const [apiKey, setApiKey] = useState('')
|
||||||
|
const [url, setUrl] = useState('https://example.com')
|
||||||
|
const [endpoint, setEndpoint] = useState('screenshot')
|
||||||
|
const [result, setResult] = useState('')
|
||||||
|
const [loading, setLoading] = useState(false)
|
||||||
|
const [apiKeys, setApiKeys] = useState<{id: string, name: string}[]>([])
|
||||||
|
const [email, setEmail] = useState('')
|
||||||
|
const [password, setPassword] = useState('')
|
||||||
|
const [isLoggedIn, setIsLoggedIn] = useState(false)
|
||||||
|
const [newKeyName, setNewKeyName] = useState('')
|
||||||
|
|
||||||
|
const endpoints = ['crawl', 'content', 'screenshot', 'pdf', 'markdown', 'snapshot', 'scrape', 'json', 'links']
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const saved = localStorage.getItem('crawlapi_token')
|
||||||
|
if (saved) {
|
||||||
|
setToken(saved)
|
||||||
|
setIsLoggedIn(true)
|
||||||
|
fetchApiKeys(saved)
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
async function login() {
|
||||||
|
try {
|
||||||
|
const res = await fetch('http://localhost:3000/api/auth/login', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ email, password }),
|
||||||
|
})
|
||||||
|
const data = await res.json()
|
||||||
|
if (data.token) {
|
||||||
|
setToken(data.token)
|
||||||
|
setIsLoggedIn(true)
|
||||||
|
localStorage.setItem('crawlapi_token', data.token)
|
||||||
|
fetchApiKeys(data.token)
|
||||||
|
} else {
|
||||||
|
setResult(JSON.stringify(data, null, 2))
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
setResult(String(e))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchApiKeys(t: string) {
|
||||||
|
try {
|
||||||
|
const res = await fetch('http://localhost:3000/api/auth/api-keys', {
|
||||||
|
headers: { 'x-auth-token': t },
|
||||||
|
})
|
||||||
|
const data = await res.json()
|
||||||
|
setApiKeys(data || [])
|
||||||
|
} catch (e) {
|
||||||
|
console.error(e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function createApiKey() {
|
||||||
|
try {
|
||||||
|
const res = await fetch('http://localhost:3000/api/auth/api-keys', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-auth-token': token,
|
||||||
|
},
|
||||||
|
body: JSON.stringify({ name: newKeyName || 'New Key' }),
|
||||||
|
})
|
||||||
|
const data = await res.json()
|
||||||
|
setResult(JSON.stringify(data, null, 2))
|
||||||
|
fetchApiKeys(token)
|
||||||
|
} catch (e) {
|
||||||
|
setResult(String(e))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function testApi() {
|
||||||
|
setLoading(true)
|
||||||
|
try {
|
||||||
|
const res = await fetch(`http://localhost:3000/api/${endpoint}`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': apiKey,
|
||||||
|
},
|
||||||
|
body: JSON.stringify({ url }),
|
||||||
|
})
|
||||||
|
const data = await res.json()
|
||||||
|
setResult(JSON.stringify(data, null, 2))
|
||||||
|
} catch (e) {
|
||||||
|
setResult(String(e))
|
||||||
|
} finally {
|
||||||
|
setLoading(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<main style={{ maxWidth: 1200, margin: '0 auto', padding: '40px 20px' }}>
|
||||||
|
<nav style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', marginBottom: 60 }}>
|
||||||
|
<Link href="/" style={{ fontSize: 24, fontWeight: 700, color: '#fff', textDecoration: 'none' }}>Crawl API</Link>
|
||||||
|
<Link href="/" style={{ color: '#888', textDecoration: 'none' }}>← Back</Link>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
<h1 style={{ fontSize: 36, marginBottom: 8 }}>Dashboard</h1>
|
||||||
|
<p style={{ color: '#888', marginBottom: 40 }}>Test your API keys and monitor usage.</p>
|
||||||
|
|
||||||
|
{!isLoggedIn ? (
|
||||||
|
<div style={{ background: '#111', borderRadius: 12, padding: 24, marginBottom: 24 }}>
|
||||||
|
<h3 style={{ marginTop: 0 }}>Login</h3>
|
||||||
|
<button
|
||||||
|
onClick={async () => {
|
||||||
|
const res = await fetch('http://localhost:3000/api/auth/google')
|
||||||
|
const data = await res.json()
|
||||||
|
if (data.url) window.location.href = data.url
|
||||||
|
}}
|
||||||
|
style={{
|
||||||
|
background: '#4285f4',
|
||||||
|
color: '#fff',
|
||||||
|
padding: '12px 24px',
|
||||||
|
borderRadius: 8,
|
||||||
|
border: 'none',
|
||||||
|
fontWeight: 600,
|
||||||
|
cursor: 'pointer',
|
||||||
|
marginBottom: 16,
|
||||||
|
width: '100%'
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
Sign in with Google
|
||||||
|
</button>
|
||||||
|
<div style={{ color: '#888', textAlign: 'center', marginBottom: 16 }}>or</div>
|
||||||
|
<div style={{ display: 'grid', gap: 16, marginBottom: 16 }}>
|
||||||
|
<input
|
||||||
|
type="email"
|
||||||
|
value={email}
|
||||||
|
onChange={(e) => setEmail(e.target.value)}
|
||||||
|
placeholder="demo@crawlapi.dev"
|
||||||
|
style={{
|
||||||
|
width: '100%',
|
||||||
|
padding: 12,
|
||||||
|
background: '#1a1a1a',
|
||||||
|
border: '1px solid #333',
|
||||||
|
borderRadius: 8,
|
||||||
|
color: '#fff',
|
||||||
|
fontSize: 14,
|
||||||
|
boxSizing: 'border-box'
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
<input
|
||||||
|
type="password"
|
||||||
|
value={password}
|
||||||
|
onChange={(e) => setPassword(e.target.value)}
|
||||||
|
placeholder="password"
|
||||||
|
style={{
|
||||||
|
width: '100%',
|
||||||
|
padding: 12,
|
||||||
|
background: '#1a1a1a',
|
||||||
|
border: '1px solid #333',
|
||||||
|
borderRadius: 8,
|
||||||
|
color: '#fff',
|
||||||
|
fontSize: 14,
|
||||||
|
boxSizing: 'border-box'
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
onClick={login}
|
||||||
|
style={{
|
||||||
|
background: '#fff',
|
||||||
|
color: '#000',
|
||||||
|
padding: '12px 24px',
|
||||||
|
borderRadius: 8,
|
||||||
|
border: 'none',
|
||||||
|
fontWeight: 600,
|
||||||
|
cursor: 'pointer'
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
Login
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<div style={{ background: '#111', borderRadius: 12, padding: 24, marginBottom: 24 }}>
|
||||||
|
<h3 style={{ marginTop: 0 }}>Your API Keys</h3>
|
||||||
|
<div style={{ display: 'flex', gap: 8, marginBottom: 16 }}>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
value={newKeyName}
|
||||||
|
onChange={(e) => setNewKeyName(e.target.value)}
|
||||||
|
placeholder="Key name"
|
||||||
|
style={{
|
||||||
|
flex: 1,
|
||||||
|
padding: 12,
|
||||||
|
background: '#1a1a1a',
|
||||||
|
border: '1px solid #333',
|
||||||
|
borderRadius: 8,
|
||||||
|
color: '#fff',
|
||||||
|
fontSize: 14,
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
<button
|
||||||
|
onClick={createApiKey}
|
||||||
|
style={{
|
||||||
|
background: '#fff',
|
||||||
|
color: '#000',
|
||||||
|
padding: '12px 24px',
|
||||||
|
borderRadius: 8,
|
||||||
|
border: 'none',
|
||||||
|
fontWeight: 600,
|
||||||
|
cursor: 'pointer'
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
Create Key
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
{apiKeys.length > 0 ? (
|
||||||
|
<ul style={{ listStyle: 'none', padding: 0, margin: 0 }}>
|
||||||
|
{apiKeys.map((k) => (
|
||||||
|
<li key={k.id} style={{ padding: '8px 0', borderBottom: '1px solid #222', color: '#888' }}>
|
||||||
|
{k.name} <span style={{ fontFamily: 'monospace', fontSize: 12 }}>({k.id})</span>
|
||||||
|
</li>
|
||||||
|
))}
|
||||||
|
</ul>
|
||||||
|
) : (
|
||||||
|
<p style={{ color: '#888' }}>No API keys yet. Create one above.</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style={{ background: '#111', borderRadius: 12, padding: 24, marginBottom: 24 }}>
|
||||||
|
<h3 style={{ marginTop: 0 }}>Test API</h3>
|
||||||
|
<div style={{ display: 'grid', gap: 16, marginBottom: 16 }}>
|
||||||
|
<div>
|
||||||
|
<label style={{ display: 'block', color: '#888', marginBottom: 8, fontSize: 14 }}>API Key</label>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
value={apiKey}
|
||||||
|
onChange={(e) => setApiKey(e.target.value)}
|
||||||
|
placeholder="your-api-key"
|
||||||
|
style={{
|
||||||
|
width: '100%',
|
||||||
|
padding: 12,
|
||||||
|
background: '#1a1a1a',
|
||||||
|
border: '1px solid #333',
|
||||||
|
borderRadius: 8,
|
||||||
|
color: '#fff',
|
||||||
|
fontSize: 14,
|
||||||
|
boxSizing: 'border-box'
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label style={{ display: 'block', color: '#888', marginBottom: 8, fontSize: 14 }}>URL</label>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
value={url}
|
||||||
|
onChange={(e) => setUrl(e.target.value)}
|
||||||
|
style={{
|
||||||
|
width: '100%',
|
||||||
|
padding: 12,
|
||||||
|
background: '#1a1a1a',
|
||||||
|
border: '1px solid #333',
|
||||||
|
borderRadius: 8,
|
||||||
|
color: '#fff',
|
||||||
|
fontSize: 14,
|
||||||
|
boxSizing: 'border-box'
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label style={{ display: 'block', color: '#888', marginBottom: 8, fontSize: 14 }}>Endpoint</label>
|
||||||
|
<select
|
||||||
|
value={endpoint}
|
||||||
|
onChange={(e) => setEndpoint(e.target.value)}
|
||||||
|
style={{
|
||||||
|
width: '100%',
|
||||||
|
padding: 12,
|
||||||
|
background: '#1a1a1a',
|
||||||
|
border: '1px solid #333',
|
||||||
|
borderRadius: 8,
|
||||||
|
color: '#fff',
|
||||||
|
fontSize: 14
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{endpoints.map((ep) => (
|
||||||
|
<option key={ep} value={ep}>{ep}</option>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
onClick={testApi}
|
||||||
|
disabled={loading}
|
||||||
|
style={{
|
||||||
|
background: loading ? '#333' : '#fff',
|
||||||
|
color: '#000',
|
||||||
|
padding: '12px 24px',
|
||||||
|
borderRadius: 8,
|
||||||
|
border: 'none',
|
||||||
|
fontWeight: 600,
|
||||||
|
cursor: loading ? 'not-allowed' : 'pointer'
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{loading ? 'Sending...' : 'Send Request'}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{result && (
|
||||||
|
<div style={{ background: '#111', borderRadius: 12, padding: 24 }}>
|
||||||
|
<h3 style={{ marginTop: 0 }}>Response</h3>
|
||||||
|
<pre style={{ margin: 0, overflow: 'auto', fontSize: 13 }}>{result}</pre>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</main>
|
||||||
|
)
|
||||||
|
}
|
||||||
106
frontend/app/docs/page.tsx
Normal file
106
frontend/app/docs/page.tsx
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
import Link from 'next/link'
|
||||||
|
|
||||||
|
export default function Docs() {
|
||||||
|
return (
|
||||||
|
<main style={{ maxWidth: 1200, margin: '0 auto', padding: '40px 20px' }}>
|
||||||
|
<nav style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', marginBottom: 60 }}>
|
||||||
|
<Link href="/" style={{ fontSize: 24, fontWeight: 700, color: '#fff', textDecoration: 'none' }}>Crawl API</Link>
|
||||||
|
<Link href="/" style={{ color: '#888', textDecoration: 'none' }}>← Back</Link>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
<h1 style={{ fontSize: 42, marginBottom: 16 }}>API Documentation</h1>
|
||||||
|
<p style={{ color: '#888', marginBottom: 48 }}>
|
||||||
|
API reference for Crawl API — 9 endpoints for crawling, scraping, screenshots, PDFs, and more.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<section style={{ marginBottom: 48 }}>
|
||||||
|
<h2 style={{ fontSize: 24, marginBottom: 16 }}>Base URL</h2>
|
||||||
|
<code style={{ background: '#111', padding: '12px 16px', borderRadius: 8, display: 'block' }}>
|
||||||
|
https://crawlapi.dev
|
||||||
|
</code>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section style={{ marginBottom: 48 }}>
|
||||||
|
<h2 style={{ fontSize: 24, marginBottom: 16 }}>Authentication</h2>
|
||||||
|
<p style={{ color: '#888', marginBottom: 12 }}>All requests require an API key sent via the x-api-key header.</p>
|
||||||
|
<code style={{ background: '#111', padding: '12px 16px', borderRadius: 8, display: 'block' }}>
|
||||||
|
x-api-key: YOUR_API_KEY
|
||||||
|
</code>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section style={{ marginBottom: 48 }}>
|
||||||
|
<h2 style={{ fontSize: 24, marginBottom: 16 }}>Request format</h2>
|
||||||
|
<p style={{ color: '#888', marginBottom: 12 }}>Every endpoint accepts a POST request with a JSON body. The url field is always required.</p>
|
||||||
|
<pre style={{ background: '#111', padding: 16, borderRadius: 8, overflow: 'auto' }}>
|
||||||
|
{`curl -X POST https://crawlapi.dev/api/screenshot \\
|
||||||
|
-H "Content-Type: application/json" \\
|
||||||
|
-H "x-api-key: YOUR_API_KEY" \\
|
||||||
|
-d '{"url": "https://example.com"}'`}
|
||||||
|
</pre>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section style={{ marginBottom: 48 }}>
|
||||||
|
<h2 style={{ fontSize: 24, marginBottom: 16 }}>Endpoints</h2>
|
||||||
|
{[
|
||||||
|
{ path: '/api/crawl', desc: 'Full JS-rendered page crawl' },
|
||||||
|
{ path: '/api/content', desc: 'Raw HTML content' },
|
||||||
|
{ path: '/api/screenshot', desc: 'PNG screenshot' },
|
||||||
|
{ path: '/api/pdf', desc: 'PDF export' },
|
||||||
|
{ path: '/api/markdown', desc: 'Markdown extraction' },
|
||||||
|
{ path: '/api/snapshot', desc: 'HTML + screenshot' },
|
||||||
|
{ path: '/api/scrape', desc: 'CSS selector extraction' },
|
||||||
|
{ path: '/api/json', desc: 'Structured JSON' },
|
||||||
|
{ path: '/api/links', desc: 'Extract all links' },
|
||||||
|
].map((ep) => (
|
||||||
|
<div key={ep.path} style={{ background: '#111', borderRadius: 8, padding: 16, marginBottom: 12 }}>
|
||||||
|
<span style={{ color: '#4ade80', fontWeight: 600 }}>POST</span>{' '}
|
||||||
|
<code>{ep.path}</code>
|
||||||
|
<div style={{ color: '#888', marginTop: 4 }}>{ep.desc}</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section style={{ marginBottom: 48 }}>
|
||||||
|
<h2 style={{ fontSize: 24, marginBottom: 16 }}>Rate limits</h2>
|
||||||
|
<table style={{ width: '100%', borderCollapse: 'collapse' }}>
|
||||||
|
<tbody>
|
||||||
|
{[
|
||||||
|
{ label: 'Requests per minute', value: '60' },
|
||||||
|
{ label: 'Max concurrent', value: '10' },
|
||||||
|
{ label: 'Request timeout', value: '30s' },
|
||||||
|
].map((row) => (
|
||||||
|
<tr key={row.label} style={{ borderBottom: '1px solid #222' }}>
|
||||||
|
<td style={{ padding: '12px 0', color: '#888' }}>{row.label}</td>
|
||||||
|
<td style={{ padding: '12px 0', textAlign: 'right' }}>{row.value}</td>
|
||||||
|
</tr>
|
||||||
|
))}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section>
|
||||||
|
<h2 style={{ fontSize: 24, marginBottom: 16 }}>Error handling</h2>
|
||||||
|
<pre style={{ background: '#111', padding: 16, borderRadius: 8, overflow: 'auto' }}>
|
||||||
|
{`{ "success": false, "error": "Missing or invalid API key" }`}
|
||||||
|
</pre>
|
||||||
|
<table style={{ width: '100%', borderCollapse: 'collapse', marginTop: 16 }}>
|
||||||
|
<tbody>
|
||||||
|
{[
|
||||||
|
{ code: '400', meaning: 'Missing or invalid URL / bad options' },
|
||||||
|
{ code: '401', meaning: 'Missing or invalid API key' },
|
||||||
|
{ code: '403', meaning: 'Insufficient credits' },
|
||||||
|
{ code: '405', meaning: 'Wrong HTTP method (use POST)' },
|
||||||
|
{ code: '429', meaning: 'Rate limit exceeded' },
|
||||||
|
{ code: '500', meaning: 'Server error' },
|
||||||
|
].map((row) => (
|
||||||
|
<tr key={row.code} style={{ borderBottom: '1px solid #222' }}>
|
||||||
|
<td style={{ padding: '12px 0' }}>{row.code}</td>
|
||||||
|
<td style={{ padding: '12px 0', color: '#888' }}>{row.meaning}</td>
|
||||||
|
</tr>
|
||||||
|
))}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</section>
|
||||||
|
</main>
|
||||||
|
)
|
||||||
|
}
|
||||||
18
frontend/app/layout.tsx
Normal file
18
frontend/app/layout.tsx
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
export const metadata = {
|
||||||
|
title: 'Crawl API — Headless Browser REST API',
|
||||||
|
description: 'One API to crawl, screenshot, scrape, and extract data from any webpage. Built for developers.',
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function RootLayout({
|
||||||
|
children,
|
||||||
|
}: {
|
||||||
|
children: React.ReactNode
|
||||||
|
}) {
|
||||||
|
return (
|
||||||
|
<html lang="en">
|
||||||
|
<body style={{ margin: 0, fontFamily: 'system-ui, -apple-system, sans-serif', background: '#0a0a0a', color: '#fff' }}>
|
||||||
|
{children}
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
)
|
||||||
|
}
|
||||||
124
frontend/app/page.tsx
Normal file
124
frontend/app/page.tsx
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
import Link from 'next/link'
|
||||||
|
|
||||||
|
export default function Home() {
|
||||||
|
return (
|
||||||
|
<main style={{ maxWidth: 1200, margin: '0 auto', padding: '40px 20px' }}>
|
||||||
|
<nav style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', marginBottom: 80 }}>
|
||||||
|
<div style={{ fontSize: 24, fontWeight: 700 }}>Crawl API</div>
|
||||||
|
<div style={{ display: 'flex', gap: 24 }}>
|
||||||
|
<Link href="/docs" style={{ color: '#888', textDecoration: 'none' }}>Docs</Link>
|
||||||
|
<Link href="/playground" style={{ color: '#888', textDecoration: 'none' }}>Playground</Link>
|
||||||
|
<Link href="/billing" style={{ color: '#888', textDecoration: 'none' }}>Pricing</Link>
|
||||||
|
<Link href="/dashboard" style={{ color: '#888', textDecoration: 'none' }}>Dashboard</Link>
|
||||||
|
</div>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
<section style={{ textAlign: 'center', marginBottom: 120 }}>
|
||||||
|
<h1 style={{ fontSize: 56, marginBottom: 20, lineHeight: 1.1 }}>
|
||||||
|
Extract, capture, and convert<br />any webpage
|
||||||
|
</h1>
|
||||||
|
<p style={{ fontSize: 20, color: '#888', maxWidth: 600, margin: '0 auto 40px' }}>
|
||||||
|
Screenshots, PDFs, markdown, structured data and more — all from a single API call.
|
||||||
|
Just send a URL and get back exactly what you need.
|
||||||
|
</p>
|
||||||
|
<div style={{ display: 'flex', gap: 16, justifyContent: 'center' }}>
|
||||||
|
<Link href="/dashboard" style={{
|
||||||
|
background: '#fff',
|
||||||
|
color: '#000',
|
||||||
|
padding: '14px 28px',
|
||||||
|
borderRadius: 8,
|
||||||
|
textDecoration: 'none',
|
||||||
|
fontWeight: 600
|
||||||
|
}}>
|
||||||
|
Get started free
|
||||||
|
</Link>
|
||||||
|
<Link href="/playground" style={{
|
||||||
|
border: '1px solid #333',
|
||||||
|
color: '#fff',
|
||||||
|
padding: '14px 28px',
|
||||||
|
borderRadius: 8,
|
||||||
|
textDecoration: 'none'
|
||||||
|
}}>
|
||||||
|
API Playground →
|
||||||
|
</Link>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section style={{ marginBottom: 120 }}>
|
||||||
|
<div style={{
|
||||||
|
background: '#111',
|
||||||
|
borderRadius: 12,
|
||||||
|
padding: 24,
|
||||||
|
fontFamily: 'monospace',
|
||||||
|
fontSize: 14,
|
||||||
|
overflow: 'auto'
|
||||||
|
}}>
|
||||||
|
<div style={{ color: '#888', marginBottom: 12 }}>$ curl -X POST /api/screenshot -d {'{'}"url": "https://example.com"{'}'}</div>
|
||||||
|
<div>{'{'} "success": <span style={{ color: '#4ade80' }}>true</span>,</div>
|
||||||
|
<div> "url": <span style={{ color: '#fbbf24' }}>"https://cdn.crawlapi.dev/s/abc123.png"</span>,</div>
|
||||||
|
<div> "width": <span style={{ color: '#60a5fa' }}>1440</span> {'}'}</div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section style={{ marginBottom: 120 }}>
|
||||||
|
<h2 style={{ fontSize: 36, marginBottom: 16, textAlign: 'center' }}>9 endpoints, one shape</h2>
|
||||||
|
<p style={{ color: '#888', textAlign: 'center', marginBottom: 48 }}>
|
||||||
|
Every endpoint accepts the same request body. Send a URL and optional config — get back exactly what you need.
|
||||||
|
</p>
|
||||||
|
<div style={{
|
||||||
|
display: 'grid',
|
||||||
|
gridTemplateColumns: 'repeat(auto-fit, minmax(300px, 1fr))',
|
||||||
|
gap: 16
|
||||||
|
}}>
|
||||||
|
{[
|
||||||
|
{ method: 'POST', path: '/api/crawl', desc: 'Full JS-rendered page crawl with all resources' },
|
||||||
|
{ method: 'POST', path: '/api/content', desc: 'Raw HTML content of any page' },
|
||||||
|
{ method: 'POST', path: '/api/screenshot', desc: 'Full-page PNG screenshot, hosted on CDN' },
|
||||||
|
{ method: 'POST', path: '/api/pdf', desc: 'PDF export of any page, hosted on CDN' },
|
||||||
|
{ method: 'POST', path: '/api/markdown', desc: 'Clean Markdown extraction from any page' },
|
||||||
|
{ method: 'POST', path: '/api/snapshot', desc: 'HTML + screenshot combined in one call' },
|
||||||
|
{ method: 'POST', path: '/api/scrape', desc: 'Structured extraction with CSS selectors' },
|
||||||
|
{ method: 'POST', path: '/api/json', desc: 'Page content as structured JSON' },
|
||||||
|
{ method: 'POST', path: '/api/links', desc: 'Extract all links from any page' },
|
||||||
|
].map((ep) => (
|
||||||
|
<div key={ep.path} style={{ background: '#111', borderRadius: 12, padding: 24 }}>
|
||||||
|
<span style={{ color: '#4ade80', fontSize: 12, fontWeight: 600 }}>{ep.method}</span>
|
||||||
|
<div style={{ fontFamily: 'monospace', fontSize: 14, margin: '8px 0' }}>{ep.path}</div>
|
||||||
|
<div style={{ color: '#888', fontSize: 14 }}>{ep.desc}</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section style={{ marginBottom: 120 }}>
|
||||||
|
<h2 style={{ fontSize: 36, marginBottom: 16, textAlign: 'center' }}>Simple, per-call pricing</h2>
|
||||||
|
<p style={{ color: '#888', textAlign: 'center', marginBottom: 48 }}>
|
||||||
|
Start free. Scale as you grow. Every endpoint costs 1 API call — no surprises.
|
||||||
|
</p>
|
||||||
|
<div style={{
|
||||||
|
display: 'grid',
|
||||||
|
gridTemplateColumns: 'repeat(auto-fit, minmax(220px, 1fr))',
|
||||||
|
gap: 16
|
||||||
|
}}>
|
||||||
|
{[
|
||||||
|
{ name: 'Hobby', price: '$9', credits: '1,000 API calls/mo', concurrent: '3 concurrent requests' },
|
||||||
|
{ name: 'Starter', price: '$19', credits: '3,000 API calls/mo', concurrent: '5 concurrent requests' },
|
||||||
|
{ name: 'Pro', price: '$49', credits: '10,000 API calls/mo', concurrent: '10 concurrent requests' },
|
||||||
|
{ name: 'Startup', price: '$99', credits: '25,000 API calls/mo', concurrent: '20 concurrent requests' },
|
||||||
|
].map((plan) => (
|
||||||
|
<div key={plan.name} style={{ background: '#111', borderRadius: 12, padding: 24 }}>
|
||||||
|
<div style={{ fontSize: 14, color: '#888', marginBottom: 8 }}>{plan.name}</div>
|
||||||
|
<div style={{ fontSize: 36, fontWeight: 700, marginBottom: 16 }}>{plan.price}<span style={{ fontSize: 14, color: '#888' }}>/mo</span></div>
|
||||||
|
<div style={{ fontSize: 14, marginBottom: 8 }}>{plan.credits}</div>
|
||||||
|
<div style={{ fontSize: 14, color: '#888' }}>{plan.concurrent}</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<footer style={{ textAlign: 'center', color: '#888', padding: '40px 0', borderTop: '1px solid #222' }}>
|
||||||
|
© 2026 Crawl API. Built for developers.
|
||||||
|
</footer>
|
||||||
|
</main>
|
||||||
|
)
|
||||||
|
}
|
||||||
159
frontend/app/playground/page.tsx
Normal file
159
frontend/app/playground/page.tsx
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
'use client'
|
||||||
|
|
||||||
|
import { useState } from 'react'
|
||||||
|
import Link from 'next/link'
|
||||||
|
|
||||||
|
export default function Playground() {
|
||||||
|
const [apiKey, setApiKey] = useState('')
|
||||||
|
const [url, setUrl] = useState('https://example.com')
|
||||||
|
const [endpoint, setEndpoint] = useState('screenshot')
|
||||||
|
const [options, setOptions] = useState('{}')
|
||||||
|
const [result, setResult] = useState('')
|
||||||
|
const [loading, setLoading] = useState(false)
|
||||||
|
const [codeLang, setCodeLang] = useState('curl')
|
||||||
|
|
||||||
|
const endpoints = [
|
||||||
|
{ value: 'screenshot', label: 'Screenshot' },
|
||||||
|
{ value: 'pdf', label: 'PDF' },
|
||||||
|
{ value: 'crawl', label: 'Crawl' },
|
||||||
|
{ value: 'content', label: 'Content' },
|
||||||
|
{ value: 'markdown', label: 'Markdown' },
|
||||||
|
{ value: 'json', label: 'JSON' },
|
||||||
|
{ value: 'links', label: 'Links' },
|
||||||
|
{ value: 'scrape', label: 'Scrape' },
|
||||||
|
{ value: 'snapshot', label: 'Snapshot' },
|
||||||
|
{ value: 'extract', label: 'AI Extract' },
|
||||||
|
]
|
||||||
|
|
||||||
|
async function sendRequest() {
|
||||||
|
setLoading(true)
|
||||||
|
try {
|
||||||
|
const body: any = { url }
|
||||||
|
if (options && options !== '{}') {
|
||||||
|
body.options = JSON.parse(options)
|
||||||
|
}
|
||||||
|
const res = await fetch(`http://localhost:3000/api/${endpoint}`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': apiKey,
|
||||||
|
},
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
})
|
||||||
|
const data = await res.json()
|
||||||
|
setResult(JSON.stringify(data, null, 2))
|
||||||
|
} catch (e) {
|
||||||
|
setResult(String(e))
|
||||||
|
} finally {
|
||||||
|
setLoading(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function getCodeSnippet() {
|
||||||
|
const body = JSON.stringify({ url, options: JSON.parse(options || '{}') }, null, 2)
|
||||||
|
switch (codeLang) {
|
||||||
|
case 'curl':
|
||||||
|
return `curl -X POST http://localhost:3000/api/${endpoint} \\
|
||||||
|
-H "Content-Type: application/json" \\
|
||||||
|
-H "x-api-key: ${apiKey || 'YOUR_API_KEY'}" \\
|
||||||
|
-d '${body}'`
|
||||||
|
case 'python':
|
||||||
|
return `import requests
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
"http://localhost:3000/api/${endpoint}",
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"x-api-key": "${apiKey || 'YOUR_API_KEY'}"
|
||||||
|
},
|
||||||
|
json=${body.replace(/true/g, 'True').replace(/false/g, 'False').replace(/null/g, 'None')}
|
||||||
|
)
|
||||||
|
print(response.json())`
|
||||||
|
case 'javascript':
|
||||||
|
return `const response = await fetch('http://localhost:3000/api/${endpoint}', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': '${apiKey || 'YOUR_API_KEY'}'
|
||||||
|
},
|
||||||
|
body: JSON.stringify(${body})
|
||||||
|
});
|
||||||
|
const data = await response.json();
|
||||||
|
console.log(data);`
|
||||||
|
default:
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<main style={{ maxWidth: 1200, margin: '0 auto', padding: '40px 20px' }}>
|
||||||
|
<nav style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', marginBottom: 40 }}>
|
||||||
|
<Link href="/" style={{ fontSize: 24, fontWeight: 700, color: '#fff', textDecoration: 'none' }}>Crawl API</Link>
|
||||||
|
<Link href="/" style={{ color: '#888', textDecoration: 'none' }}>← Back</Link>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
<h1 style={{ fontSize: 36, marginBottom: 8 }}>API Playground</h1>
|
||||||
|
<p style={{ color: '#888', marginBottom: 32 }}>Test any endpoint directly from the browser.</p>
|
||||||
|
|
||||||
|
<div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 24 }}>
|
||||||
|
<div>
|
||||||
|
<div style={{ background: '#111', borderRadius: 12, padding: 24, marginBottom: 24 }}>
|
||||||
|
<h3 style={{ marginTop: 0 }}>Request</h3>
|
||||||
|
<div style={{ display: 'grid', gap: 12 }}>
|
||||||
|
<div>
|
||||||
|
<label style={{ display: 'block', color: '#888', marginBottom: 4, fontSize: 13 }}>API Key</label>
|
||||||
|
<input type="text" value={apiKey} onChange={e => setApiKey(e.target.value)} placeholder="your-api-key"
|
||||||
|
style={{ width: '100%', padding: 10, background: '#1a1a1a', border: '1px solid #333', borderRadius: 6, color: '#fff', fontSize: 13, boxSizing: 'border-box' }} />
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label style={{ display: 'block', color: '#888', marginBottom: 4, fontSize: 13 }}>Endpoint</label>
|
||||||
|
<select value={endpoint} onChange={e => setEndpoint(e.target.value)}
|
||||||
|
style={{ width: '100%', padding: 10, background: '#1a1a1a', border: '1px solid #333', borderRadius: 6, color: '#fff', fontSize: 13 }}>
|
||||||
|
{endpoints.map(ep => <option key={ep.value} value={ep.value}>{ep.label}</option>)}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label style={{ display: 'block', color: '#888', marginBottom: 4, fontSize: 13 }}>URL</label>
|
||||||
|
<input type="text" value={url} onChange={e => setUrl(e.target.value)}
|
||||||
|
style={{ width: '100%', padding: 10, background: '#1a1a1a', border: '1px solid #333', borderRadius: 6, color: '#fff', fontSize: 13, boxSizing: 'border-box' }} />
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label style={{ display: 'block', color: '#888', marginBottom: 4, fontSize: 13 }}>Options (JSON)</label>
|
||||||
|
<textarea value={options} onChange={e => setOptions(e.target.value)} rows={4}
|
||||||
|
style={{ width: '100%', padding: 10, background: '#1a1a1a', border: '1px solid #333', borderRadius: 6, color: '#fff', fontSize: 13, boxSizing: 'border-box', fontFamily: 'monospace' }} />
|
||||||
|
</div>
|
||||||
|
<button onClick={sendRequest} disabled={loading}
|
||||||
|
style={{ background: loading ? '#333' : '#fff', color: '#000', padding: '12px', borderRadius: 6, border: 'none', fontWeight: 600, cursor: loading ? 'not-allowed' : 'pointer' }}>
|
||||||
|
{loading ? 'Sending...' : 'Send Request'}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style={{ background: '#111', borderRadius: 12, padding: 24 }}>
|
||||||
|
<h3 style={{ marginTop: 0 }}>Code Snippet</h3>
|
||||||
|
<div style={{ display: 'flex', gap: 8, marginBottom: 12 }}>
|
||||||
|
{['curl', 'python', 'javascript'].map(l => (
|
||||||
|
<button key={l} onClick={() => setCodeLang(l)}
|
||||||
|
style={{ background: codeLang === l ? '#fff' : '#1a1a1a', color: codeLang === l ? '#000' : '#888', padding: '6px 12px', borderRadius: 4, border: 'none', fontSize: 12, cursor: 'pointer', textTransform: 'uppercase' }}>
|
||||||
|
{l}
|
||||||
|
</button>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
<pre style={{ margin: 0, fontSize: 12, overflow: 'auto', background: '#0a0a0a', padding: 12, borderRadius: 6 }}>{getCodeSnippet()}</pre>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<div style={{ background: '#111', borderRadius: 12, padding: 24, height: '100%' }}>
|
||||||
|
<h3 style={{ marginTop: 0 }}>Response</h3>
|
||||||
|
{result ? (
|
||||||
|
<pre style={{ margin: 0, fontSize: 13, overflow: 'auto', background: '#0a0a0a', padding: 12, borderRadius: 6, height: 'calc(100% - 40px)' }}>{result}</pre>
|
||||||
|
) : (
|
||||||
|
<div style={{ color: '#555', textAlign: 'center', padding: '40px 0' }}>Send a request to see the response</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
)
|
||||||
|
}
|
||||||
7
frontend/next.config.js
Normal file
7
frontend/next.config.js
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
/** @type {import('next').NextConfig} */
|
||||||
|
const nextConfig = {
|
||||||
|
output: 'export',
|
||||||
|
distDir: 'dist',
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = nextConfig
|
||||||
22
frontend/package.json
Normal file
22
frontend/package.json
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
"name": "crawlapi-frontend",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"private": true,
|
||||||
|
"scripts": {
|
||||||
|
"dev": "next dev",
|
||||||
|
"build": "next build",
|
||||||
|
"start": "next start",
|
||||||
|
"lint": "next lint"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"next": "14.2.0",
|
||||||
|
"react": "^18.3.0",
|
||||||
|
"react-dom": "^18.3.0"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/node": "^20.0.0",
|
||||||
|
"@types/react": "^18.3.0",
|
||||||
|
"@types/react-dom": "^18.3.0",
|
||||||
|
"typescript": "^5.4.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
26
frontend/tsconfig.json
Normal file
26
frontend/tsconfig.json
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"lib": ["dom", "dom.iterable", "esnext"],
|
||||||
|
"allowJs": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"strict": true,
|
||||||
|
"noEmit": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"module": "esnext",
|
||||||
|
"moduleResolution": "bundler",
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"isolatedModules": true,
|
||||||
|
"jsx": "preserve",
|
||||||
|
"incremental": true,
|
||||||
|
"plugins": [
|
||||||
|
{
|
||||||
|
"name": "next"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"paths": {
|
||||||
|
"@/*": ["./*"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
|
||||||
|
"exclude": ["node_modules"]
|
||||||
|
}
|
||||||
97
k8s/api.yaml
Normal file
97
k8s/api.yaml
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: api
|
||||||
|
namespace: crawlapi
|
||||||
|
spec:
|
||||||
|
replicas: 3
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: api
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: api
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: api
|
||||||
|
image: crawlapi/api:latest
|
||||||
|
ports:
|
||||||
|
- containerPort: 3000
|
||||||
|
env:
|
||||||
|
- name: DATABASE_URL
|
||||||
|
value: "postgres://$(POSTGRES_USER):$(POSTGRES_PASSWORD)@postgres:5432/crawlapi"
|
||||||
|
- name: REDIS_URL
|
||||||
|
value: "redis://redis:6379"
|
||||||
|
- name: S3_ENDPOINT
|
||||||
|
value: "http://minio:9000"
|
||||||
|
- name: S3_BUCKET
|
||||||
|
value: "crawlapi"
|
||||||
|
- name: PLAYWRIGHT_SCRIPT_PATH
|
||||||
|
value: "/app/playwright/pool.js"
|
||||||
|
envFrom:
|
||||||
|
- secretRef:
|
||||||
|
name: crawlapi-secrets
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "256Mi"
|
||||||
|
cpu: "250m"
|
||||||
|
limits:
|
||||||
|
memory: "512Mi"
|
||||||
|
cpu: "500m"
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /metrics
|
||||||
|
port: 3000
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 30
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /metrics
|
||||||
|
port: 3000
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
startupProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /metrics
|
||||||
|
port: 3000
|
||||||
|
failureThreshold: 30
|
||||||
|
periodSeconds: 10
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: api
|
||||||
|
namespace: crawlapi
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: api
|
||||||
|
ports:
|
||||||
|
- port: 3000
|
||||||
|
targetPort: 3000
|
||||||
|
---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: api
|
||||||
|
namespace: crawlapi
|
||||||
|
annotations:
|
||||||
|
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||||
|
cert-manager.io/cluster-issuer: letsencrypt
|
||||||
|
spec:
|
||||||
|
ingressClassName: nginx
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- api.crawlapi.dev
|
||||||
|
secretName: api-tls
|
||||||
|
rules:
|
||||||
|
- host: api.crawlapi.dev
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: api
|
||||||
|
port:
|
||||||
|
number: 3000
|
||||||
30
k8s/cert-manager.yaml
Normal file
30
k8s/cert-manager.yaml
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: ClusterIssuer
|
||||||
|
metadata:
|
||||||
|
name: letsencrypt
|
||||||
|
spec:
|
||||||
|
acme:
|
||||||
|
server: https://acme-v02.api.letsencrypt.org/directory
|
||||||
|
email: admin@crawlapi.dev
|
||||||
|
privateKeySecretRef:
|
||||||
|
name: letsencrypt-private-key
|
||||||
|
solvers:
|
||||||
|
- http01:
|
||||||
|
ingress:
|
||||||
|
class: nginx
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: crawlapi-tls
|
||||||
|
namespace: crawlapi
|
||||||
|
spec:
|
||||||
|
secretName: crawlapi-tls-secret
|
||||||
|
issuerRef:
|
||||||
|
name: letsencrypt
|
||||||
|
kind: ClusterIssuer
|
||||||
|
dnsNames:
|
||||||
|
- crawlapi.dev
|
||||||
|
- www.crawlapi.dev
|
||||||
|
- api.crawlapi.dev
|
||||||
|
- status.crawlapi.dev
|
||||||
63
k8s/frontend.yaml
Normal file
63
k8s/frontend.yaml
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: frontend
|
||||||
|
namespace: crawlapi
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: frontend
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: frontend
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: frontend
|
||||||
|
image: crawlapi/frontend:latest
|
||||||
|
ports:
|
||||||
|
- containerPort: 3000
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "128Mi"
|
||||||
|
cpu: "100m"
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: frontend
|
||||||
|
namespace: crawlapi
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: frontend
|
||||||
|
ports:
|
||||||
|
- port: 3000
|
||||||
|
targetPort: 3000
|
||||||
|
---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: frontend
|
||||||
|
namespace: crawlapi
|
||||||
|
annotations:
|
||||||
|
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||||
|
cert-manager.io/cluster-issuer: letsencrypt
|
||||||
|
spec:
|
||||||
|
ingressClassName: nginx
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- crawlapi.dev
|
||||||
|
- www.crawlapi.dev
|
||||||
|
secretName: frontend-tls
|
||||||
|
rules:
|
||||||
|
- host: crawlapi.dev
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: frontend
|
||||||
|
port:
|
||||||
|
number: 3000
|
||||||
67
k8s/minio.yaml
Normal file
67
k8s/minio.yaml
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: minio
|
||||||
|
namespace: crawlapi
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: minio
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: minio
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: minio
|
||||||
|
image: minio/minio:latest
|
||||||
|
command: ["server", "/data", "--console-address", ":9001"]
|
||||||
|
env:
|
||||||
|
- name: MINIO_ROOT_USER
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: crawlapi-secrets
|
||||||
|
key: minio-access-key
|
||||||
|
- name: MINIO_ROOT_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: crawlapi-secrets
|
||||||
|
key: minio-secret-key
|
||||||
|
ports:
|
||||||
|
- containerPort: 9000
|
||||||
|
- containerPort: 9001
|
||||||
|
volumeMounts:
|
||||||
|
- name: minio-storage
|
||||||
|
mountPath: /data
|
||||||
|
volumes:
|
||||||
|
- name: minio-storage
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: minio-pvc
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: minio
|
||||||
|
namespace: crawlapi
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: minio
|
||||||
|
ports:
|
||||||
|
- name: api
|
||||||
|
port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
- name: console
|
||||||
|
port: 9001
|
||||||
|
targetPort: 9001
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: minio-pvc
|
||||||
|
namespace: crawlapi
|
||||||
|
spec:
|
||||||
|
accessModes: ["ReadWriteOnce"]
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 50Gi
|
||||||
4
k8s/namespace.yaml
Normal file
4
k8s/namespace.yaml
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: crawlapi
|
||||||
57
k8s/postgres.yaml
Normal file
57
k8s/postgres.yaml
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: StatefulSet
|
||||||
|
metadata:
|
||||||
|
name: postgres
|
||||||
|
namespace: crawlapi
|
||||||
|
spec:
|
||||||
|
serviceName: postgres
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: postgres
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: postgres
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: postgres
|
||||||
|
image: postgres:16-alpine
|
||||||
|
env:
|
||||||
|
- name: POSTGRES_USER
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: crawlapi-secrets
|
||||||
|
key: postgres-user
|
||||||
|
- name: POSTGRES_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: crawlapi-secrets
|
||||||
|
key: postgres-password
|
||||||
|
- name: POSTGRES_DB
|
||||||
|
value: crawlapi
|
||||||
|
ports:
|
||||||
|
- containerPort: 5432
|
||||||
|
volumeMounts:
|
||||||
|
- name: postgres-storage
|
||||||
|
mountPath: /var/lib/postgresql/data
|
||||||
|
volumeClaimTemplates:
|
||||||
|
- metadata:
|
||||||
|
name: postgres-storage
|
||||||
|
spec:
|
||||||
|
accessModes: ["ReadWriteOnce"]
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Gi
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: postgres
|
||||||
|
namespace: crawlapi
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: postgres
|
||||||
|
ports:
|
||||||
|
- port: 5432
|
||||||
|
targetPort: 5432
|
||||||
32
k8s/redis.yaml
Normal file
32
k8s/redis.yaml
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: redis
|
||||||
|
namespace: crawlapi
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: redis
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: redis
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: redis
|
||||||
|
image: redis:7-alpine
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: redis
|
||||||
|
namespace: crawlapi
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: redis
|
||||||
|
ports:
|
||||||
|
- port: 6379
|
||||||
|
targetPort: 6379
|
||||||
18
k8s/secrets.yaml
Normal file
18
k8s/secrets.yaml
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: crawlapi-secrets
|
||||||
|
namespace: crawlapi
|
||||||
|
type: Opaque
|
||||||
|
stringData:
|
||||||
|
postgres-user: "crawlapi"
|
||||||
|
postgres-password: "changeme"
|
||||||
|
jwt-secret: "super-secret-jwt-key"
|
||||||
|
minio-access-key: "minioadmin"
|
||||||
|
minio-secret-key: "minioadmin"
|
||||||
|
s3-access-key: "minioadmin"
|
||||||
|
s3-secret-key: "minioadmin"
|
||||||
|
stripe-secret-key: "sk_test_..."
|
||||||
|
stripe-webhook-secret: "whsec_..."
|
||||||
|
google-client-id: ""
|
||||||
|
google-client-secret: ""
|
||||||
69
k8s/worker.yaml
Normal file
69
k8s/worker.yaml
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: worker
|
||||||
|
namespace: crawlapi
|
||||||
|
spec:
|
||||||
|
replicas: 5
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: worker
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: worker
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: worker
|
||||||
|
image: crawlapi/worker:latest
|
||||||
|
env:
|
||||||
|
- name: DATABASE_URL
|
||||||
|
value: "postgres://$(POSTGRES_USER):$(POSTGRES_PASSWORD)@postgres:5432/crawlapi"
|
||||||
|
- name: REDIS_URL
|
||||||
|
value: "redis://redis:6379"
|
||||||
|
- name: S3_ENDPOINT
|
||||||
|
value: "http://minio:9000"
|
||||||
|
- name: S3_BUCKET
|
||||||
|
value: "crawlapi"
|
||||||
|
- name: PLAYWRIGHT_SCRIPT_PATH
|
||||||
|
value: "/app/playwright/pool.js"
|
||||||
|
- name: BROWSER_POOL_SIZE
|
||||||
|
value: "5"
|
||||||
|
- name: MAX_PAGES_PER_BROWSER
|
||||||
|
value: "10"
|
||||||
|
envFrom:
|
||||||
|
- secretRef:
|
||||||
|
name: crawlapi-secrets
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "512Mi"
|
||||||
|
cpu: "500m"
|
||||||
|
limits:
|
||||||
|
memory: "2Gi"
|
||||||
|
cpu: "2000m"
|
||||||
|
---
|
||||||
|
apiVersion: autoscaling/v2
|
||||||
|
kind: HorizontalPodAutoscaler
|
||||||
|
metadata:
|
||||||
|
name: worker-hpa
|
||||||
|
namespace: crawlapi
|
||||||
|
spec:
|
||||||
|
scaleTargetRef:
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
name: worker
|
||||||
|
minReplicas: 3
|
||||||
|
maxReplicas: 20
|
||||||
|
metrics:
|
||||||
|
- type: Resource
|
||||||
|
resource:
|
||||||
|
name: cpu
|
||||||
|
target:
|
||||||
|
type: Utilization
|
||||||
|
averageUtilization: 70
|
||||||
|
- type: Resource
|
||||||
|
resource:
|
||||||
|
name: memory
|
||||||
|
target:
|
||||||
|
type: Utilization
|
||||||
|
averageUtilization: 80
|
||||||
48
legal/dpa.md
Normal file
48
legal/dpa.md
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
# Data Processing Agreement (DPA)
|
||||||
|
|
||||||
|
This Data Processing Agreement ("DPA") is entered into between Crawl API ("Processor") and the Customer ("Controller") as of the date of account creation.
|
||||||
|
|
||||||
|
## 1. Definitions
|
||||||
|
|
||||||
|
- **Personal Data**: Any information relating to an identified or identifiable natural person.
|
||||||
|
- **Processing**: Any operation performed on Personal Data.
|
||||||
|
- **Data Subject**: The natural person to whom Personal Data relates.
|
||||||
|
|
||||||
|
## 2. Scope of Processing
|
||||||
|
|
||||||
|
Processor will process Personal Data only as necessary to provide the Service and in accordance with Controller's documented instructions.
|
||||||
|
|
||||||
|
## 3. Processor Obligations
|
||||||
|
|
||||||
|
- Process Personal Data only on documented instructions from Controller
|
||||||
|
- Ensure persons authorized to process Personal Data are bound by confidentiality
|
||||||
|
- Implement appropriate technical and organizational measures
|
||||||
|
- Assist Controller in responding to Data Subject requests
|
||||||
|
- Notify Controller of any Personal Data breaches
|
||||||
|
|
||||||
|
## 4. Subprocessors
|
||||||
|
|
||||||
|
We use the following subprocessors:
|
||||||
|
- Amazon Web Services (hosting)
|
||||||
|
- Stripe (payment processing)
|
||||||
|
- Google Cloud (optional AI features)
|
||||||
|
|
||||||
|
## 5. Data Transfers
|
||||||
|
|
||||||
|
Personal Data may be transferred to countries outside the EEA. We ensure adequate safeguards are in place.
|
||||||
|
|
||||||
|
## 6. Security Measures
|
||||||
|
|
||||||
|
We implement:
|
||||||
|
- Encryption at rest and in transit
|
||||||
|
- Access controls and authentication
|
||||||
|
- Regular security assessments
|
||||||
|
- Incident response procedures
|
||||||
|
|
||||||
|
## 7. Audit Rights
|
||||||
|
|
||||||
|
Controller may request an audit of our compliance with this DPA once per year.
|
||||||
|
|
||||||
|
## 8. Termination
|
||||||
|
|
||||||
|
Upon termination, Processor will delete or return all Personal Data unless required by law to retain it.
|
||||||
78
legal/privacy-policy.md
Normal file
78
legal/privacy-policy.md
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
# Privacy Policy
|
||||||
|
|
||||||
|
Last updated: April 29, 2026
|
||||||
|
|
||||||
|
## 1. Information We Collect
|
||||||
|
|
||||||
|
### Account Information
|
||||||
|
- Email address
|
||||||
|
- Password (hashed)
|
||||||
|
- Billing information (processed by Stripe)
|
||||||
|
|
||||||
|
### Usage Data
|
||||||
|
- API request logs (endpoint, URL, timestamp)
|
||||||
|
- IP addresses (for rate limiting and security)
|
||||||
|
- Browser type and OS
|
||||||
|
|
||||||
|
### Scraped Data
|
||||||
|
- We temporarily process URLs and webpage content you submit
|
||||||
|
- We do not permanently store scraped content unless cached
|
||||||
|
- Cache TTL is 5 minutes by default
|
||||||
|
|
||||||
|
## 2. How We Use Your Information
|
||||||
|
|
||||||
|
- Provide and maintain the Service
|
||||||
|
- Process payments and prevent fraud
|
||||||
|
- Monitor usage and enforce rate limits
|
||||||
|
- Improve the Service and develop new features
|
||||||
|
- Comply with legal obligations
|
||||||
|
|
||||||
|
## 3. Data Sharing
|
||||||
|
|
||||||
|
We do not sell your personal data. We may share data with:
|
||||||
|
- Stripe (payment processing)
|
||||||
|
- Cloud providers (hosting infrastructure)
|
||||||
|
- Legal authorities when required by law
|
||||||
|
|
||||||
|
## 4. Data Security
|
||||||
|
|
||||||
|
We implement industry-standard security measures including:
|
||||||
|
- Encryption in transit (TLS/SSL)
|
||||||
|
- Hashed passwords (bcrypt)
|
||||||
|
- API key authentication
|
||||||
|
- Rate limiting and IP blocking
|
||||||
|
|
||||||
|
## 5. Your Rights
|
||||||
|
|
||||||
|
Depending on your jurisdiction, you may have the right to:
|
||||||
|
- Access your personal data
|
||||||
|
- Correct inaccurate data
|
||||||
|
- Delete your account and data
|
||||||
|
- Export your data
|
||||||
|
- Object to processing
|
||||||
|
|
||||||
|
## 6. Cookies
|
||||||
|
|
||||||
|
We use essential cookies for authentication. We do not use tracking cookies for advertising.
|
||||||
|
|
||||||
|
## 7. Data Retention
|
||||||
|
|
||||||
|
- Account data: retained until account deletion
|
||||||
|
- API logs: retained for 90 days
|
||||||
|
- Cached content: retained for 5 minutes
|
||||||
|
|
||||||
|
## 8. International Transfers
|
||||||
|
|
||||||
|
Data may be processed in the United States or other countries where our servers are located.
|
||||||
|
|
||||||
|
## 9. Children's Privacy
|
||||||
|
|
||||||
|
The Service is not intended for children under 13. We do not knowingly collect data from children.
|
||||||
|
|
||||||
|
## 10. Changes to This Policy
|
||||||
|
|
||||||
|
We may update this policy. Significant changes will be notified via email.
|
||||||
|
|
||||||
|
## Contact
|
||||||
|
|
||||||
|
For privacy questions, contact privacy@crawlapi.dev.
|
||||||
52
legal/terms-of-service.md
Normal file
52
legal/terms-of-service.md
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
# Terms of Service
|
||||||
|
|
||||||
|
Last updated: April 29, 2026
|
||||||
|
|
||||||
|
## 1. Acceptance of Terms
|
||||||
|
|
||||||
|
By accessing or using Crawl API ("the Service"), you agree to be bound by these Terms of Service. If you do not agree, do not use the Service.
|
||||||
|
|
||||||
|
## 2. Description of Service
|
||||||
|
|
||||||
|
Crawl API provides headless browser automation and web scraping tools via a REST API. The Service allows users to extract data, take screenshots, generate PDFs, and perform other automated web interactions.
|
||||||
|
|
||||||
|
## 3. Account Registration
|
||||||
|
|
||||||
|
You must provide accurate information when creating an account. You are responsible for maintaining the security of your account credentials and API keys.
|
||||||
|
|
||||||
|
## 4. Acceptable Use
|
||||||
|
|
||||||
|
You agree not to use the Service to:
|
||||||
|
- Scrape websites that explicitly prohibit scraping in their robots.txt or terms of service
|
||||||
|
- Access or attempt to access non-public areas of websites without authorization
|
||||||
|
- Violate any applicable laws or regulations
|
||||||
|
- Distribute malware or engage in fraudulent activities
|
||||||
|
- Overwhelm or damage third-party websites (rate limiting applies)
|
||||||
|
|
||||||
|
## 5. API Usage and Credits
|
||||||
|
|
||||||
|
The Service operates on a credit-based system. Each API endpoint consumes 1 credit per call. Credits are non-refundable and expire according to your subscription plan.
|
||||||
|
|
||||||
|
## 6. Payment and Billing
|
||||||
|
|
||||||
|
Paid plans are billed in advance on a monthly or annual basis. You can cancel your subscription at any time. Refunds are provided at our sole discretion.
|
||||||
|
|
||||||
|
## 7. Data Privacy
|
||||||
|
|
||||||
|
We process data in accordance with our Privacy Policy. We do not store scraped content permanently unless explicitly requested.
|
||||||
|
|
||||||
|
## 8. Limitation of Liability
|
||||||
|
|
||||||
|
The Service is provided "as is" without warranties of any kind. We are not liable for any damages arising from your use of the Service.
|
||||||
|
|
||||||
|
## 9. Termination
|
||||||
|
|
||||||
|
We reserve the right to suspend or terminate your account for violation of these terms or for any other reason at our discretion.
|
||||||
|
|
||||||
|
## 10. Changes to Terms
|
||||||
|
|
||||||
|
We may update these terms from time to time. Continued use of the Service after changes constitutes acceptance of the new terms.
|
||||||
|
|
||||||
|
## Contact
|
||||||
|
|
||||||
|
For questions about these terms, contact hello@crawlapi.dev.
|
||||||
43
load-tests/load.js
Normal file
43
load-tests/load.js
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
import http from 'k6/http';
|
||||||
|
import { check, sleep } from 'k6';
|
||||||
|
|
||||||
|
export const options = {
|
||||||
|
stages: [
|
||||||
|
{ duration: '2m', target: 10 }, // Ramp up
|
||||||
|
{ duration: '5m', target: 10 }, // Steady state
|
||||||
|
{ duration: '2m', target: 20 }, // Ramp up more
|
||||||
|
{ duration: '5m', target: 20 }, // Steady state
|
||||||
|
{ duration: '2m', target: 0 }, // Ramp down
|
||||||
|
],
|
||||||
|
thresholds: {
|
||||||
|
http_req_duration: ['p(95)<5000'], // 95% of requests under 5s
|
||||||
|
http_req_failed: ['rate<0.1'], // Less than 10% errors
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const API_URL = __ENV.API_URL || 'http://localhost:3000';
|
||||||
|
const API_KEY = __ENV.API_KEY || 'test-key';
|
||||||
|
|
||||||
|
const endpoints = ['content', 'json', 'links'];
|
||||||
|
|
||||||
|
export default function () {
|
||||||
|
const endpoint = endpoints[Math.floor(Math.random() * endpoints.length)];
|
||||||
|
|
||||||
|
const res = http.post(
|
||||||
|
`${API_URL}/api/${endpoint}`,
|
||||||
|
JSON.stringify({ url: 'https://example.com' }),
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': API_KEY,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
check(res, {
|
||||||
|
'status is 200': (r) => r.status === 200,
|
||||||
|
'response has success': (r) => r.json('success') === true,
|
||||||
|
});
|
||||||
|
|
||||||
|
sleep(1);
|
||||||
|
}
|
||||||
13
load-tests/package.json
Normal file
13
load-tests/package.json
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"name": "crawlapi-load-tests",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"scripts": {
|
||||||
|
"smoke": "k6 run smoke.js",
|
||||||
|
"load": "k6 run load.js",
|
||||||
|
"stress": "k6 run stress.js",
|
||||||
|
"screenshot": "k6 run screenshot.js"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/k6": "^0.52.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
37
load-tests/screenshot.js
Normal file
37
load-tests/screenshot.js
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
import http from 'k6/http';
|
||||||
|
import { check, sleep } from 'k6';
|
||||||
|
|
||||||
|
export const options = {
|
||||||
|
vus: 5,
|
||||||
|
duration: '3m',
|
||||||
|
thresholds: {
|
||||||
|
http_req_duration: ['p(95)<30000'], // Screenshots take longer
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const API_URL = __ENV.API_URL || 'http://localhost:3000';
|
||||||
|
const API_KEY = __ENV.API_KEY || 'test-key';
|
||||||
|
|
||||||
|
export default function () {
|
||||||
|
const res = http.post(
|
||||||
|
`${API_URL}/api/screenshot`,
|
||||||
|
JSON.stringify({
|
||||||
|
url: 'https://example.com',
|
||||||
|
options: { fullPage: true }
|
||||||
|
}),
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': API_KEY,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
check(res, {
|
||||||
|
'status is 200': (r) => r.status === 200,
|
||||||
|
'response has success': (r) => r.json('success') === true,
|
||||||
|
'response has url': (r) => r.json('data.url') !== undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
sleep(2);
|
||||||
|
}
|
||||||
28
load-tests/smoke.js
Normal file
28
load-tests/smoke.js
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
import http from 'k6/http';
|
||||||
|
import { check } from 'k6';
|
||||||
|
|
||||||
|
export const options = {
|
||||||
|
vus: 1,
|
||||||
|
duration: '1m',
|
||||||
|
};
|
||||||
|
|
||||||
|
const API_URL = __ENV.API_URL || 'http://localhost:3000';
|
||||||
|
const API_KEY = __ENV.API_KEY || 'test-key';
|
||||||
|
|
||||||
|
export default function () {
|
||||||
|
const res = http.post(
|
||||||
|
`${API_URL}/api/content`,
|
||||||
|
JSON.stringify({ url: 'https://example.com' }),
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': API_KEY,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
check(res, {
|
||||||
|
'status is 200': (r) => r.status === 200,
|
||||||
|
'response has success': (r) => r.json('success') === true,
|
||||||
|
});
|
||||||
|
}
|
||||||
40
load-tests/stress.js
Normal file
40
load-tests/stress.js
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
import http from 'k6/http';
|
||||||
|
import { check, sleep } from 'k6';
|
||||||
|
|
||||||
|
export const options = {
|
||||||
|
stages: [
|
||||||
|
{ duration: '2m', target: 50 },
|
||||||
|
{ duration: '5m', target: 50 },
|
||||||
|
{ duration: '2m', target: 100 },
|
||||||
|
{ duration: '5m', target: 100 },
|
||||||
|
{ duration: '2m', target: 200 },
|
||||||
|
{ duration: '5m', target: 200 },
|
||||||
|
{ duration: '2m', target: 0 },
|
||||||
|
],
|
||||||
|
thresholds: {
|
||||||
|
http_req_duration: ['p(95)<10000'],
|
||||||
|
http_req_failed: ['rate<0.2'],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const API_URL = __ENV.API_URL || 'http://localhost:3000';
|
||||||
|
const API_KEY = __ENV.API_KEY || 'test-key';
|
||||||
|
|
||||||
|
export default function () {
|
||||||
|
const res = http.post(
|
||||||
|
`${API_URL}/api/content`,
|
||||||
|
JSON.stringify({ url: 'https://example.com' }),
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': API_KEY,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
check(res, {
|
||||||
|
'status is 200': (r) => r.status === 200,
|
||||||
|
});
|
||||||
|
|
||||||
|
sleep(Math.random() * 2);
|
||||||
|
}
|
||||||
153
playwright/index.js
Normal file
153
playwright/index.js
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
const { chromium } = require('playwright');
|
||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
const TurndownService = require('turndown');
|
||||||
|
|
||||||
|
const turndownService = new TurndownService();
|
||||||
|
const OUTPUT_DIR = process.env.OUTPUT_DIR || '/tmp/crawlapi';
|
||||||
|
|
||||||
|
function parseOptions(raw) {
|
||||||
|
try {
|
||||||
|
return JSON.parse(raw);
|
||||||
|
} catch {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseUrl(raw) {
|
||||||
|
try {
|
||||||
|
return JSON.parse(raw);
|
||||||
|
} catch {
|
||||||
|
return raw.replace(/^"|"$/g, '');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function ensureDir(dir) {
|
||||||
|
if (!fs.existsSync(dir)) {
|
||||||
|
fs.mkdirSync(dir, { recursive: true });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function writeFile(filename, data) {
|
||||||
|
ensureDir(OUTPUT_DIR);
|
||||||
|
const filepath = path.join(OUTPUT_DIR, filename);
|
||||||
|
fs.writeFileSync(filepath, data);
|
||||||
|
return filepath;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function run() {
|
||||||
|
const endpoint = process.argv[2];
|
||||||
|
const url = parseUrl(process.argv[3]);
|
||||||
|
const options = parseOptions(process.argv[4]);
|
||||||
|
|
||||||
|
const browser = await chromium.launch({ headless: true });
|
||||||
|
const context = await browser.newContext({
|
||||||
|
viewport: {
|
||||||
|
width: options.width || 1440,
|
||||||
|
height: options.height || 900,
|
||||||
|
},
|
||||||
|
userAgent: options.user_agent || undefined,
|
||||||
|
});
|
||||||
|
const page = await context.newPage();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const timeout = (options.timeout || 30) * 1000;
|
||||||
|
await page.goto(url, { waitUntil: 'networkidle', timeout });
|
||||||
|
|
||||||
|
if (options.wait_for) {
|
||||||
|
await page.waitForSelector(options.wait_for, { timeout });
|
||||||
|
}
|
||||||
|
|
||||||
|
let result = {};
|
||||||
|
|
||||||
|
switch (endpoint) {
|
||||||
|
case 'crawl': {
|
||||||
|
const html = await page.content();
|
||||||
|
const title = await page.title();
|
||||||
|
result = { html, title, url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'content': {
|
||||||
|
const html = await page.content();
|
||||||
|
result = { html, url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'screenshot': {
|
||||||
|
const screenshotOptions = { type: 'png' };
|
||||||
|
if (options.full_page) {
|
||||||
|
screenshotOptions.fullPage = true;
|
||||||
|
}
|
||||||
|
const buffer = await page.screenshot(screenshotOptions);
|
||||||
|
const filepath = writeFile(`${Date.now()}.png`, buffer);
|
||||||
|
result = { file_path: filepath, url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'pdf': {
|
||||||
|
const buffer = await page.pdf({
|
||||||
|
format: 'A4',
|
||||||
|
printBackground: true,
|
||||||
|
});
|
||||||
|
const filepath = writeFile(`${Date.now()}.pdf`, buffer);
|
||||||
|
result = { file_path: filepath, url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'markdown': {
|
||||||
|
const html = await page.content();
|
||||||
|
const markdown = turndownService.turndown(html);
|
||||||
|
result = { markdown, url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'snapshot': {
|
||||||
|
const html = await page.content();
|
||||||
|
const screenshotBuffer = await page.screenshot({ type: 'png', fullPage: options.full_page || false });
|
||||||
|
const filepath = writeFile(`${Date.now()}.png`, screenshotBuffer);
|
||||||
|
result = { html, file_path: filepath, url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'scrape': {
|
||||||
|
const selectors = options.selectors || ['h1', 'h2', 'p', 'a'];
|
||||||
|
const data = {};
|
||||||
|
for (const selector of selectors) {
|
||||||
|
const elements = await page.locator(selector).all();
|
||||||
|
const texts = [];
|
||||||
|
for (const el of elements) {
|
||||||
|
const text = await el.textContent();
|
||||||
|
if (text) texts.push(text.trim());
|
||||||
|
}
|
||||||
|
data[selector] = texts;
|
||||||
|
}
|
||||||
|
result = { data, url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'json': {
|
||||||
|
const title = await page.title();
|
||||||
|
const description = await page.locator('meta[name="description"]').getAttribute('content').catch(() => null);
|
||||||
|
const headings = await page.locator('h1, h2, h3').allTextContents();
|
||||||
|
const links = await page.locator('a[href]').evaluateAll(els => els.map(el => ({ href: el.href, text: el.textContent?.trim() })));
|
||||||
|
result = { title, description, headings, links: links.slice(0, 50), url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'links': {
|
||||||
|
const links = await page.locator('a[href]').evaluateAll(els =>
|
||||||
|
els.map(el => ({
|
||||||
|
href: el.href,
|
||||||
|
text: el.textContent?.trim() || '',
|
||||||
|
})).filter(l => l.href)
|
||||||
|
);
|
||||||
|
result = { links: [...new Map(links.map(l => [l.href, l])).values()], url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
throw new Error(`Unknown endpoint: ${endpoint}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(JSON.stringify(result));
|
||||||
|
} catch (error) {
|
||||||
|
console.error(error.message);
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
run();
|
||||||
13
playwright/package.json
Normal file
13
playwright/package.json
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"name": "crawlapi-playwright",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "Playwright worker for Crawl API",
|
||||||
|
"main": "index.js",
|
||||||
|
"scripts": {
|
||||||
|
"start": "node index.js"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"playwright": "^1.49.0",
|
||||||
|
"turndown": "^7.2.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
412
playwright/pool.js
Normal file
412
playwright/pool.js
Normal file
@@ -0,0 +1,412 @@
|
|||||||
|
const { chromium } = require('playwright');
|
||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
const TurndownService = require('turndown');
|
||||||
|
const { execSync } = require('child_process');
|
||||||
|
|
||||||
|
const turndownService = new TurndownService();
|
||||||
|
const OUTPUT_DIR = process.env.OUTPUT_DIR || '/tmp/crawlapi';
|
||||||
|
const COOKIE_DIR = process.env.COOKIE_DIR || '/tmp/crawlapi/cookies';
|
||||||
|
const PROXY_URL = process.env.PROXY_URL || '';
|
||||||
|
const CAPTCHA_API_KEY = process.env.CAPTCHA_API_KEY || '';
|
||||||
|
|
||||||
|
class ProxyRotator {
|
||||||
|
constructor() {
|
||||||
|
this.proxies = [];
|
||||||
|
this.index = 0;
|
||||||
|
this.loadProxies();
|
||||||
|
}
|
||||||
|
|
||||||
|
loadProxies() {
|
||||||
|
if (PROXY_URL) {
|
||||||
|
this.proxies = PROXY_URL.split(',').map(p => p.trim()).filter(Boolean);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
getNext() {
|
||||||
|
if (this.proxies.length === 0) return null;
|
||||||
|
const proxy = this.proxies[this.index];
|
||||||
|
this.index = (this.index + 1) % this.proxies.length;
|
||||||
|
return proxy;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const proxyRotator = new ProxyRotator();
|
||||||
|
|
||||||
|
class BrowserPool {
|
||||||
|
constructor(maxBrowsers = 5, maxPagesPerBrowser = 10) {
|
||||||
|
this.maxBrowsers = maxBrowsers;
|
||||||
|
this.maxPagesPerBrowser = maxPagesPerBrowser;
|
||||||
|
this.browsers = [];
|
||||||
|
this.initialized = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
async init() {
|
||||||
|
if (this.initialized) return;
|
||||||
|
for (let i = 0; i < this.maxBrowsers; i++) {
|
||||||
|
const browser = await chromium.launch({
|
||||||
|
headless: true,
|
||||||
|
args: [
|
||||||
|
'--no-sandbox',
|
||||||
|
'--disable-setuid-sandbox',
|
||||||
|
'--disable-dev-shm-usage',
|
||||||
|
'--disable-accelerated-2d-canvas',
|
||||||
|
'--disable-gpu',
|
||||||
|
'--window-size=1920,1080',
|
||||||
|
]
|
||||||
|
});
|
||||||
|
this.browsers.push({ browser, pages: [], lock: false });
|
||||||
|
}
|
||||||
|
this.initialized = true;
|
||||||
|
console.error(`Browser pool initialized with ${this.maxBrowsers} browsers`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async acquirePage(proxy = null) {
|
||||||
|
await this.init();
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const tryAcquire = () => {
|
||||||
|
for (const entry of this.browsers) {
|
||||||
|
if (!entry.lock && entry.pages.length < this.maxPagesPerBrowser) {
|
||||||
|
entry.lock = true;
|
||||||
|
const contextOptions = {
|
||||||
|
viewport: { width: 1440, height: 900 },
|
||||||
|
};
|
||||||
|
if (proxy) {
|
||||||
|
contextOptions.proxy = { server: proxy };
|
||||||
|
}
|
||||||
|
entry.browser.newContext(contextOptions)
|
||||||
|
.then(context => {
|
||||||
|
return context.newPage();
|
||||||
|
})
|
||||||
|
.then(page => {
|
||||||
|
entry.pages.push(page);
|
||||||
|
entry.lock = false;
|
||||||
|
resolve({ page, entry });
|
||||||
|
})
|
||||||
|
.catch(err => {
|
||||||
|
entry.lock = false;
|
||||||
|
reject(err);
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
setTimeout(tryAcquire, 50);
|
||||||
|
};
|
||||||
|
tryAcquire();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async releasePage(page, entry) {
|
||||||
|
const idx = entry.pages.indexOf(page);
|
||||||
|
if (idx > -1) {
|
||||||
|
entry.pages.splice(idx, 1);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
await page.context().close();
|
||||||
|
} catch (e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async close() {
|
||||||
|
for (const entry of this.browsers) {
|
||||||
|
await entry.browser.close();
|
||||||
|
}
|
||||||
|
this.browsers = [];
|
||||||
|
this.initialized = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const pool = new BrowserPool(
|
||||||
|
parseInt(process.env.BROWSER_POOL_SIZE) || 5,
|
||||||
|
parseInt(process.env.MAX_PAGES_PER_BROWSER) || 10
|
||||||
|
);
|
||||||
|
|
||||||
|
function parseOptions(raw) {
|
||||||
|
try { return JSON.parse(raw); } catch { return {}; }
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseUrl(raw) {
|
||||||
|
try { return JSON.parse(raw); } catch { return raw.replace(/^"|"$/g, ''); }
|
||||||
|
}
|
||||||
|
|
||||||
|
function ensureDir(dir) {
|
||||||
|
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
function writeFile(dir, filename, data) {
|
||||||
|
ensureDir(dir);
|
||||||
|
const filepath = path.join(dir, filename);
|
||||||
|
fs.writeFileSync(filepath, data);
|
||||||
|
return filepath;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getCookiePath(sessionId) {
|
||||||
|
if (!sessionId) return null;
|
||||||
|
ensureDir(COOKIE_DIR);
|
||||||
|
return path.join(COOKIE_DIR, `${sessionId}.json`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadCookies(context, sessionId) {
|
||||||
|
const cookiePath = getCookiePath(sessionId);
|
||||||
|
if (cookiePath && fs.existsSync(cookiePath)) {
|
||||||
|
try {
|
||||||
|
const cookies = JSON.parse(fs.readFileSync(cookiePath, 'utf8'));
|
||||||
|
await context.addCookies(cookies);
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to load cookies:', e.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function saveCookies(context, sessionId) {
|
||||||
|
const cookiePath = getCookiePath(sessionId);
|
||||||
|
if (cookiePath) {
|
||||||
|
try {
|
||||||
|
const cookies = await context.cookies();
|
||||||
|
fs.writeFileSync(cookiePath, JSON.stringify(cookies));
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to save cookies:', e.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function applyStealth(page) {
|
||||||
|
// Stealth patches to avoid detection
|
||||||
|
await page.addInitScript(() => {
|
||||||
|
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
|
||||||
|
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
|
||||||
|
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
|
||||||
|
window.chrome = { runtime: {} };
|
||||||
|
const originalQuery = window.navigator.permissions.query;
|
||||||
|
window.navigator.permissions.query = (parameters) => (
|
||||||
|
parameters.name === 'notifications' ?
|
||||||
|
Promise.resolve({ state: Notification.permission }) :
|
||||||
|
originalQuery(parameters)
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function solveCaptcha(page, url) {
|
||||||
|
if (!CAPTCHA_API_KEY) return false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Check for reCAPTCHA
|
||||||
|
const recaptchaSiteKey = await page.locator('[data-sitekey]').first().getAttribute('data-sitekey').catch(() => null);
|
||||||
|
if (recaptchaSiteKey) {
|
||||||
|
console.error('Found reCAPTCHA, attempting to solve...');
|
||||||
|
const taskData = {
|
||||||
|
clientKey: CAPTCHA_API_KEY,
|
||||||
|
task: {
|
||||||
|
type: 'NoCaptchaTaskProxyless',
|
||||||
|
websiteURL: url,
|
||||||
|
websiteKey: recaptchaSiteKey,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Submit to 2captcha / CapSolver
|
||||||
|
const res = await fetch('https://api.capsolver.com/createTask', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(taskData)
|
||||||
|
});
|
||||||
|
const data = await res.json();
|
||||||
|
|
||||||
|
if (data.errorId === 0) {
|
||||||
|
const taskId = data.taskId;
|
||||||
|
// Poll for result
|
||||||
|
for (let i = 0; i < 60; i++) {
|
||||||
|
await new Promise(r => setTimeout(r, 5000));
|
||||||
|
const resultRes = await fetch('https://api.capsolver.com/getTaskResult', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ clientKey: CAPTCHA_API_KEY, taskId })
|
||||||
|
});
|
||||||
|
const resultData = await resultRes.json();
|
||||||
|
if (resultData.status === 'ready') {
|
||||||
|
await page.locator('#g-recaptcha-response').evaluate((el, token) => {
|
||||||
|
el.value = token;
|
||||||
|
}, resultData.solution.gRecaptchaResponse);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Captcha solving failed:', e.message);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function run() {
|
||||||
|
const endpoint = process.argv[2];
|
||||||
|
const url = parseUrl(process.argv[3]);
|
||||||
|
const options = parseOptions(process.argv[4]);
|
||||||
|
const outputDir = process.env.OUTPUT_DIR || '/tmp/crawlapi';
|
||||||
|
|
||||||
|
// Get proxy if enabled
|
||||||
|
const proxy = options.use_proxy ? proxyRotator.getNext() : null;
|
||||||
|
if (proxy) {
|
||||||
|
console.error(`Using proxy: ${proxy}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const { page, entry } = await pool.acquirePage(proxy);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Load cookies if session provided
|
||||||
|
if (options.session_id) {
|
||||||
|
await loadCookies(page.context(), options.session_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set custom headers
|
||||||
|
if (options.headers) {
|
||||||
|
await page.setExtraHTTPHeaders(options.headers);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply stealth mode
|
||||||
|
if (options.stealth !== false) {
|
||||||
|
await applyStealth(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mobile emulation
|
||||||
|
if (options.mobile) {
|
||||||
|
const devices = require('playwright').devices;
|
||||||
|
const device = devices['iPhone 14'];
|
||||||
|
if (device) {
|
||||||
|
await page.setViewportSize(device.viewport);
|
||||||
|
await page.setUserAgent(device.userAgent);
|
||||||
|
}
|
||||||
|
} else if (options.user_agent) {
|
||||||
|
await page.setExtraHTTPHeaders({ 'User-Agent': options.user_agent });
|
||||||
|
}
|
||||||
|
|
||||||
|
const timeout = (options.timeout || 30) * 1000;
|
||||||
|
const viewport = {
|
||||||
|
width: options.width || 1440,
|
||||||
|
height: options.height || 900,
|
||||||
|
};
|
||||||
|
await page.setViewportSize(viewport);
|
||||||
|
|
||||||
|
await page.goto(url, { waitUntil: 'networkidle', timeout });
|
||||||
|
|
||||||
|
// Try to solve CAPTCHA if present
|
||||||
|
if (options.solve_captcha !== false) {
|
||||||
|
const solved = await solveCaptcha(page, url);
|
||||||
|
if (solved) {
|
||||||
|
await page.goto(url, { waitUntil: 'networkidle', timeout });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (options.wait_for) {
|
||||||
|
await page.waitForSelector(options.wait_for, { timeout });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scroll for infinite scroll
|
||||||
|
if (options.scroll_to_bottom) {
|
||||||
|
let lastHeight = 0;
|
||||||
|
let retries = 0;
|
||||||
|
while (retries < 10) {
|
||||||
|
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||||
|
if (currentHeight === lastHeight) break;
|
||||||
|
lastHeight = currentHeight;
|
||||||
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||||
|
await page.waitForTimeout(500);
|
||||||
|
retries++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let result = {};
|
||||||
|
|
||||||
|
switch (endpoint) {
|
||||||
|
case 'crawl': {
|
||||||
|
const html = await page.content();
|
||||||
|
const title = await page.title();
|
||||||
|
result = { html, title, url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'content': {
|
||||||
|
const html = await page.content();
|
||||||
|
result = { html, url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'screenshot': {
|
||||||
|
const opts = { type: 'png' };
|
||||||
|
if (options.full_page) opts.fullPage = true;
|
||||||
|
const buffer = await page.screenshot(opts);
|
||||||
|
const filepath = writeFile(outputDir, `${Date.now()}.png`, buffer);
|
||||||
|
result = { file_path: filepath, url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'pdf': {
|
||||||
|
const buffer = await page.pdf({ format: 'A4', printBackground: true });
|
||||||
|
const filepath = writeFile(outputDir, `${Date.now()}.pdf`, buffer);
|
||||||
|
result = { file_path: filepath, url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'markdown': {
|
||||||
|
const html = await page.content();
|
||||||
|
const markdown = turndownService.turndown(html);
|
||||||
|
result = { markdown, url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'snapshot': {
|
||||||
|
const html = await page.content();
|
||||||
|
const buffer = await page.screenshot({ type: 'png', fullPage: options.full_page || false });
|
||||||
|
const filepath = writeFile(outputDir, `${Date.now()}.png`, buffer);
|
||||||
|
result = { html, file_path: filepath, url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'scrape': {
|
||||||
|
const selectors = options.selectors || ['h1', 'h2', 'p', 'a'];
|
||||||
|
const data = {};
|
||||||
|
for (const selector of selectors) {
|
||||||
|
const elements = await page.locator(selector).all();
|
||||||
|
const texts = [];
|
||||||
|
for (const el of elements) {
|
||||||
|
const text = await el.textContent();
|
||||||
|
if (text) texts.push(text.trim());
|
||||||
|
}
|
||||||
|
data[selector] = texts;
|
||||||
|
}
|
||||||
|
result = { data, url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'json': {
|
||||||
|
const title = await page.title();
|
||||||
|
const description = await page.locator('meta[name="description"]').getAttribute('content').catch(() => null);
|
||||||
|
const headings = await page.locator('h1, h2, h3').allTextContents();
|
||||||
|
const links = await page.locator('a[href]').evaluateAll(els => els.map(el => ({ href: el.href, text: el.textContent?.trim() })));
|
||||||
|
result = { title, description, headings, links: links.slice(0, 50), url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'links': {
|
||||||
|
const links = await page.locator('a[href]').evaluateAll(els =>
|
||||||
|
els.map(el => ({ href: el.href, text: el.textContent?.trim() || '' })).filter(l => l.href)
|
||||||
|
);
|
||||||
|
result = { links: [...new Map(links.map(l => [l.href, l])).values()], url: page.url() };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
throw new Error(`Unknown endpoint: ${endpoint}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save cookies if session provided
|
||||||
|
if (options.session_id) {
|
||||||
|
await saveCookies(page.context(), options.session_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(JSON.stringify(result));
|
||||||
|
} catch (error) {
|
||||||
|
console.error(error.message);
|
||||||
|
process.exitCode = 1;
|
||||||
|
} finally {
|
||||||
|
await pool.releasePage(page, entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
run().then(() => {
|
||||||
|
setTimeout(() => process.exit(process.exitCode || 0), 100);
|
||||||
|
}).catch(err => {
|
||||||
|
console.error(err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
7
prometheus.yml
Normal file
7
prometheus.yml
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: 'crawlapi'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['api:3000']
|
||||||
Reference in New Issue
Block a user