Initial commit: Full Crawl API implementation
This commit is contained in:
15
crates/shared/Cargo.toml
Normal file
15
crates/shared/Cargo.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
[package]
|
||||
name = "shared"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
url = { workspace = true }
|
||||
regex = { workspace = true }
|
||||
config = { workspace = true }
|
||||
sqlx = { workspace = true }
|
||||
26
crates/shared/src/api.rs
Normal file
26
crates/shared/src/api.rs
Normal file
@@ -0,0 +1,26 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ApiResponse<T> {
|
||||
pub success: bool,
|
||||
pub data: Option<T>,
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
impl<T> ApiResponse<T> {
|
||||
pub fn ok(data: T) -> Self {
|
||||
Self {
|
||||
success: true,
|
||||
data: Some(data),
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn err(msg: impl Into<String>) -> Self {
|
||||
Self {
|
||||
success: false,
|
||||
data: None,
|
||||
error: Some(msg.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
25
crates/shared/src/config.rs
Normal file
25
crates/shared/src/config.rs
Normal file
@@ -0,0 +1,25 @@
|
||||
use serde::Deserialize;
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct AppConfig {
|
||||
pub database_url: String,
|
||||
pub redis_url: String,
|
||||
pub jwt_secret: String,
|
||||
pub s3_endpoint: String,
|
||||
pub s3_bucket: String,
|
||||
pub s3_region: String,
|
||||
pub s3_access_key: String,
|
||||
pub s3_secret_key: String,
|
||||
pub app_port: u16,
|
||||
pub app_host: String,
|
||||
pub playwright_script_path: String,
|
||||
}
|
||||
|
||||
impl AppConfig {
|
||||
pub fn from_env() -> Result<Self, config::ConfigError> {
|
||||
config::Config::builder()
|
||||
.add_source(config::Environment::default())
|
||||
.build()?
|
||||
.try_deserialize()
|
||||
}
|
||||
}
|
||||
41
crates/shared/src/error.rs
Normal file
41
crates/shared/src/error.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum AppError {
|
||||
#[error("Database error: {0}")]
|
||||
Database(#[from] sqlx::Error),
|
||||
#[error("Redis error: {0}")]
|
||||
Redis(String),
|
||||
#[error("S3 error: {0}")]
|
||||
S3(String),
|
||||
#[error("Invalid URL: {0}")]
|
||||
InvalidUrl(String),
|
||||
#[error("Browser automation failed: {0}")]
|
||||
BrowserError(String),
|
||||
#[error("Rate limit exceeded")]
|
||||
RateLimit,
|
||||
#[error("Insufficient credits")]
|
||||
InsufficientCredits,
|
||||
#[error("Unauthorized")]
|
||||
Unauthorized,
|
||||
#[error("Not found")]
|
||||
NotFound,
|
||||
#[error("Bad request: {0}")]
|
||||
BadRequest(String),
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(String),
|
||||
}
|
||||
|
||||
impl AppError {
|
||||
pub fn status_code(&self) -> u16 {
|
||||
match self {
|
||||
AppError::InvalidUrl(_) | AppError::BadRequest(_) => 400,
|
||||
AppError::Unauthorized => 401,
|
||||
AppError::InsufficientCredits => 403,
|
||||
AppError::NotFound => 404,
|
||||
AppError::RateLimit => 429,
|
||||
AppError::BrowserError(_) => 500,
|
||||
_ => 500,
|
||||
}
|
||||
}
|
||||
}
|
||||
24
crates/shared/src/jobs.rs
Normal file
24
crates/shared/src/jobs.rs
Normal file
@@ -0,0 +1,24 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::models::CrawlOptions;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CrawlJob {
|
||||
pub job_id: Uuid,
|
||||
pub user_id: Uuid,
|
||||
pub api_key_id: Uuid,
|
||||
pub endpoint: String,
|
||||
pub url: String,
|
||||
pub options: CrawlOptions,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CrawlResult {
|
||||
pub job_id: Uuid,
|
||||
pub success: bool,
|
||||
pub data: Option<serde_json::Value>,
|
||||
pub error: Option<String>,
|
||||
pub duration_ms: i64,
|
||||
pub file_url: Option<String>,
|
||||
}
|
||||
6
crates/shared/src/lib.rs
Normal file
6
crates/shared/src/lib.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
pub mod api;
|
||||
pub mod config;
|
||||
pub mod error;
|
||||
pub mod jobs;
|
||||
pub mod models;
|
||||
pub mod queue;
|
||||
136
crates/shared/src/models.rs
Normal file
136
crates/shared/src/models.rs
Normal file
@@ -0,0 +1,136 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::FromRow;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||
pub struct User {
|
||||
pub id: Uuid,
|
||||
pub email: String,
|
||||
pub password_hash: Option<String>,
|
||||
pub google_id: Option<String>,
|
||||
pub credits: i64,
|
||||
pub tier: String,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||
pub struct ApiKey {
|
||||
pub id: Uuid,
|
||||
pub user_id: Uuid,
|
||||
pub key_hash: String,
|
||||
pub name: String,
|
||||
pub last_used_at: Option<DateTime<Utc>>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||
pub struct OAuthAccount {
|
||||
pub id: Uuid,
|
||||
pub user_id: Uuid,
|
||||
pub provider: String,
|
||||
pub provider_account_id: String,
|
||||
pub created_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||
pub struct Subscription {
|
||||
pub id: Uuid,
|
||||
pub user_id: Uuid,
|
||||
pub stripe_customer_id: Option<String>,
|
||||
pub stripe_subscription_id: Option<String>,
|
||||
pub stripe_price_id: Option<String>,
|
||||
pub status: String,
|
||||
pub tier: String,
|
||||
pub current_period_start: Option<DateTime<Utc>>,
|
||||
pub current_period_end: Option<DateTime<Utc>>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||
pub struct UsageLog {
|
||||
pub id: Uuid,
|
||||
pub user_id: Uuid,
|
||||
pub api_key_id: Uuid,
|
||||
pub endpoint: String,
|
||||
pub url: String,
|
||||
pub status: String,
|
||||
pub credits_used: i64,
|
||||
pub duration_ms: i64,
|
||||
pub created_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||
pub struct Team {
|
||||
pub id: Uuid,
|
||||
pub name: String,
|
||||
pub slug: String,
|
||||
pub owner_id: Uuid,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||
pub struct TeamMember {
|
||||
pub id: Uuid,
|
||||
pub team_id: Uuid,
|
||||
pub user_id: Uuid,
|
||||
pub role: String,
|
||||
pub created_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CrawlRequest {
|
||||
pub url: String,
|
||||
#[serde(default)]
|
||||
pub options: CrawlOptions,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct CrawlOptions {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub full_page: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub width: Option<u32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub height: Option<u32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub wait_for: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub timeout: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub user_agent: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub selectors: Option<Vec<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub include_html: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub webhook_url: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub session_id: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub headers: Option<std::collections::HashMap<String, String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub mobile: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub scroll_to_bottom: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub stealth: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub use_proxy: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub solve_captcha: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CrawlResponse {
|
||||
pub success: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub data: Option<serde_json::Value>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub calls_remaining: Option<i64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error: Option<String>,
|
||||
}
|
||||
27
crates/shared/src/queue.rs
Normal file
27
crates/shared/src/queue.rs
Normal file
@@ -0,0 +1,27 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::models::CrawlOptions;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Job {
|
||||
pub id: Uuid,
|
||||
pub user_id: Uuid,
|
||||
pub api_key_id: Uuid,
|
||||
pub endpoint: String,
|
||||
pub url: String,
|
||||
pub options: CrawlOptions,
|
||||
pub webhook_url: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct JobResult {
|
||||
pub id: Uuid,
|
||||
pub success: bool,
|
||||
pub data: Option<serde_json::Value>,
|
||||
pub error: Option<String>,
|
||||
pub duration_ms: i64,
|
||||
}
|
||||
|
||||
pub const QUEUE_NAME: &str = "crawlapi:jobs";
|
||||
pub const RESULT_PREFIX: &str = "crawlapi:results:";
|
||||
Reference in New Issue
Block a user