Initial commit - Horux Despachos NL
This commit is contained in:
98
apps/api/src/services/sat/sweep-stale-jobs.service.ts
Normal file
98
apps/api/src/services/sat/sweep-stale-jobs.service.ts
Normal file
@@ -0,0 +1,98 @@
|
||||
import { prisma } from '../../config/database.js';
|
||||
|
||||
export interface SweepResult {
|
||||
pendingFound: number;
|
||||
runningFound: number;
|
||||
pendingMarked: number;
|
||||
runningMarked: number;
|
||||
entries: Array<{
|
||||
id: string;
|
||||
tenantId: string;
|
||||
kind: 'pending-stale' | 'running-stale';
|
||||
ageHours: number;
|
||||
}>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Watchdog para jobs `sat_sync_jobs` stale.
|
||||
*
|
||||
* Categorías:
|
||||
* 1. `pending` con `nextRetryAt` > pendingHours atrás. El cron horario
|
||||
* `retryTimedOutJobs` normalmente los retoma, pero si no arranca
|
||||
* (dev, caída, reinicio largo) el job queda colgado y bloquea el
|
||||
* lock para nuevos syncs del mismo (tenant, contribuyente).
|
||||
*
|
||||
* 2. `running` con `startedAt` > runningHours atrás. Un sync inicial
|
||||
* típico termina en <2h; si lleva >runningHours es casi seguro
|
||||
* huérfano de un proceso que murió. La solicitud SAT ya expiró.
|
||||
*
|
||||
* Marca ambos como `failed` con `errorMessage` descriptivo. Idempotente
|
||||
* (volver a correrlo no reabre los ya-marcados-failed).
|
||||
*
|
||||
* - `apply=false` (default): dry-run, no toca BD.
|
||||
* - `pendingHours`/`runningHours`: thresholds (default 12h / 4h).
|
||||
*/
|
||||
export async function sweepStaleSatJobs(params: {
|
||||
apply: boolean;
|
||||
pendingHours?: number;
|
||||
runningHours?: number;
|
||||
} = { apply: false }): Promise<SweepResult> {
|
||||
const pendingHours = params.pendingHours ?? 12;
|
||||
const runningHours = params.runningHours ?? 4;
|
||||
const now = new Date();
|
||||
const pendingCutoff = new Date(now.getTime() - pendingHours * 3600 * 1000);
|
||||
const runningCutoff = new Date(now.getTime() - runningHours * 3600 * 1000);
|
||||
|
||||
const stalePending = await prisma.satSyncJob.findMany({
|
||||
where: { status: 'pending', nextRetryAt: { lt: pendingCutoff } },
|
||||
orderBy: { createdAt: 'asc' },
|
||||
});
|
||||
const staleRunning = await prisma.satSyncJob.findMany({
|
||||
where: { status: 'running', startedAt: { lt: runningCutoff } },
|
||||
orderBy: { createdAt: 'asc' },
|
||||
});
|
||||
|
||||
const result: SweepResult = {
|
||||
pendingFound: stalePending.length,
|
||||
runningFound: staleRunning.length,
|
||||
pendingMarked: 0,
|
||||
runningMarked: 0,
|
||||
entries: [],
|
||||
};
|
||||
|
||||
for (const j of stalePending) {
|
||||
const ageHours = Math.round((now.getTime() - (j.nextRetryAt ?? j.createdAt).getTime()) / 3_600_000);
|
||||
result.entries.push({ id: j.id, tenantId: j.tenantId, kind: 'pending-stale', ageHours });
|
||||
}
|
||||
for (const j of staleRunning) {
|
||||
const ageHours = Math.round((now.getTime() - (j.startedAt ?? j.createdAt).getTime()) / 3_600_000);
|
||||
result.entries.push({ id: j.id, tenantId: j.tenantId, kind: 'running-stale', ageHours });
|
||||
}
|
||||
|
||||
if (!params.apply) return result;
|
||||
|
||||
for (const j of stalePending) {
|
||||
await prisma.satSyncJob.update({
|
||||
where: { id: j.id },
|
||||
data: {
|
||||
status: 'failed',
|
||||
completedAt: now,
|
||||
errorMessage: `Abandoned by watchdog: pending with nextRetryAt ${j.nextRetryAt?.toISOString()} > ${pendingHours}h in the past. Retry cron didn't pick it up.`,
|
||||
},
|
||||
});
|
||||
result.pendingMarked++;
|
||||
}
|
||||
for (const j of staleRunning) {
|
||||
await prisma.satSyncJob.update({
|
||||
where: { id: j.id },
|
||||
data: {
|
||||
status: 'failed',
|
||||
completedAt: now,
|
||||
errorMessage: `Abandoned by watchdog: running with startedAt ${j.startedAt?.toISOString()} > ${runningHours}h (process crash / orphan). SAT request is lost; re-launch manually.`,
|
||||
},
|
||||
});
|
||||
result.runningMarked++;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
Reference in New Issue
Block a user