Initial commit: Full Crawl API implementation
Some checks failed
CI / Test (push) Has been cancelled
Deploy / Deploy to Staging (push) Has been cancelled
CI / Build & Push (push) Has been cancelled
Deploy / Deploy to Production (push) Has been cancelled

This commit is contained in:
2026-04-29 07:03:48 +00:00
commit 62994d4f3d
92 changed files with 6176 additions and 0 deletions

15
crates/shared/Cargo.toml Normal file
View File

@@ -0,0 +1,15 @@
[package]
name = "shared"
version = "0.1.0"
edition = "2021"
[dependencies]
serde = { workspace = true }
serde_json = { workspace = true }
uuid = { workspace = true }
chrono = { workspace = true }
thiserror = { workspace = true }
url = { workspace = true }
regex = { workspace = true }
config = { workspace = true }
sqlx = { workspace = true }

26
crates/shared/src/api.rs Normal file
View File

@@ -0,0 +1,26 @@
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ApiResponse<T> {
pub success: bool,
pub data: Option<T>,
pub error: Option<String>,
}
impl<T> ApiResponse<T> {
pub fn ok(data: T) -> Self {
Self {
success: true,
data: Some(data),
error: None,
}
}
pub fn err(msg: impl Into<String>) -> Self {
Self {
success: false,
data: None,
error: Some(msg.into()),
}
}
}

View File

@@ -0,0 +1,25 @@
use serde::Deserialize;
#[derive(Debug, Clone, Deserialize)]
pub struct AppConfig {
pub database_url: String,
pub redis_url: String,
pub jwt_secret: String,
pub s3_endpoint: String,
pub s3_bucket: String,
pub s3_region: String,
pub s3_access_key: String,
pub s3_secret_key: String,
pub app_port: u16,
pub app_host: String,
pub playwright_script_path: String,
}
impl AppConfig {
pub fn from_env() -> Result<Self, config::ConfigError> {
config::Config::builder()
.add_source(config::Environment::default())
.build()?
.try_deserialize()
}
}

View File

@@ -0,0 +1,41 @@
use thiserror::Error;
#[derive(Debug, Error)]
pub enum AppError {
#[error("Database error: {0}")]
Database(#[from] sqlx::Error),
#[error("Redis error: {0}")]
Redis(String),
#[error("S3 error: {0}")]
S3(String),
#[error("Invalid URL: {0}")]
InvalidUrl(String),
#[error("Browser automation failed: {0}")]
BrowserError(String),
#[error("Rate limit exceeded")]
RateLimit,
#[error("Insufficient credits")]
InsufficientCredits,
#[error("Unauthorized")]
Unauthorized,
#[error("Not found")]
NotFound,
#[error("Bad request: {0}")]
BadRequest(String),
#[error("Internal error: {0}")]
Internal(String),
}
impl AppError {
pub fn status_code(&self) -> u16 {
match self {
AppError::InvalidUrl(_) | AppError::BadRequest(_) => 400,
AppError::Unauthorized => 401,
AppError::InsufficientCredits => 403,
AppError::NotFound => 404,
AppError::RateLimit => 429,
AppError::BrowserError(_) => 500,
_ => 500,
}
}
}

24
crates/shared/src/jobs.rs Normal file
View File

@@ -0,0 +1,24 @@
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::models::CrawlOptions;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CrawlJob {
pub job_id: Uuid,
pub user_id: Uuid,
pub api_key_id: Uuid,
pub endpoint: String,
pub url: String,
pub options: CrawlOptions,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CrawlResult {
pub job_id: Uuid,
pub success: bool,
pub data: Option<serde_json::Value>,
pub error: Option<String>,
pub duration_ms: i64,
pub file_url: Option<String>,
}

6
crates/shared/src/lib.rs Normal file
View File

@@ -0,0 +1,6 @@
pub mod api;
pub mod config;
pub mod error;
pub mod jobs;
pub mod models;
pub mod queue;

136
crates/shared/src/models.rs Normal file
View File

@@ -0,0 +1,136 @@
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use sqlx::FromRow;
use uuid::Uuid;
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
pub struct User {
pub id: Uuid,
pub email: String,
pub password_hash: Option<String>,
pub google_id: Option<String>,
pub credits: i64,
pub tier: String,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
pub struct ApiKey {
pub id: Uuid,
pub user_id: Uuid,
pub key_hash: String,
pub name: String,
pub last_used_at: Option<DateTime<Utc>>,
pub created_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
pub struct OAuthAccount {
pub id: Uuid,
pub user_id: Uuid,
pub provider: String,
pub provider_account_id: String,
pub created_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
pub struct Subscription {
pub id: Uuid,
pub user_id: Uuid,
pub stripe_customer_id: Option<String>,
pub stripe_subscription_id: Option<String>,
pub stripe_price_id: Option<String>,
pub status: String,
pub tier: String,
pub current_period_start: Option<DateTime<Utc>>,
pub current_period_end: Option<DateTime<Utc>>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
pub struct UsageLog {
pub id: Uuid,
pub user_id: Uuid,
pub api_key_id: Uuid,
pub endpoint: String,
pub url: String,
pub status: String,
pub credits_used: i64,
pub duration_ms: i64,
pub created_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
pub struct Team {
pub id: Uuid,
pub name: String,
pub slug: String,
pub owner_id: Uuid,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
pub struct TeamMember {
pub id: Uuid,
pub team_id: Uuid,
pub user_id: Uuid,
pub role: String,
pub created_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CrawlRequest {
pub url: String,
#[serde(default)]
pub options: CrawlOptions,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct CrawlOptions {
#[serde(skip_serializing_if = "Option::is_none")]
pub full_page: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub width: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub height: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub wait_for: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub timeout: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub user_agent: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub selectors: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub include_html: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub webhook_url: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub session_id: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub headers: Option<std::collections::HashMap<String, String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub mobile: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub scroll_to_bottom: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub stealth: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub use_proxy: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub solve_captcha: Option<bool>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CrawlResponse {
pub success: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub data: Option<serde_json::Value>,
#[serde(skip_serializing_if = "Option::is_none")]
pub calls_remaining: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
}

View File

@@ -0,0 +1,27 @@
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::models::CrawlOptions;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Job {
pub id: Uuid,
pub user_id: Uuid,
pub api_key_id: Uuid,
pub endpoint: String,
pub url: String,
pub options: CrawlOptions,
pub webhook_url: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JobResult {
pub id: Uuid,
pub success: bool,
pub data: Option<serde_json::Value>,
pub error: Option<String>,
pub duration_ms: i64,
}
pub const QUEUE_NAME: &str = "crawlapi:jobs";
pub const RESULT_PREFIX: &str = "crawlapi:results:";