1
0

update: metrics for ali-fc

This commit is contained in:
alikia2x (寒寒) 2025-12-14 07:03:33 +08:00
parent ec90557be0
commit 312f011627
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG Key ID: 56209E0CCD8420C6
6 changed files with 35 additions and 6 deletions

2
.gitignore vendored
View File

@ -41,7 +41,7 @@ build/
temp/
datasets
ml_new/datasets/
mutagen.yml

View File

@ -4,6 +4,7 @@
<file url="file://$PROJECT_DIR$/packages/crawler/db/snapshot.ts" value="0d2dd3d3-bd27-4e5f-b0fa-ff14fb2a6bef" />
<file url="file://$PROJECT_DIR$/packages/crawler/mq/task/removeAllTimeoutSchedules.ts" value="0d2dd3d3-bd27-4e5f-b0fa-ff14fb2a6bef" />
<file url="file://$PROJECT_DIR$/queries/schedule_count.sql" value="0d2dd3d3-bd27-4e5f-b0fa-ff14fb2a6bef" />
<file url="file://$PROJECT_DIR$/queries/schedule_window.sql" value="0d2dd3d3-bd27-4e5f-b0fa-ff14fb2a6bef" />
<file url="file://$PROJECT_DIR$/queries/snapshots_count.sql" value="0d2dd3d3-bd27-4e5f-b0fa-ff14fb2a6bef" />
</component>
</project>

View File

@ -2,6 +2,7 @@
<project version="4">
<component name="SqlDialectMappings">
<file url="file://$PROJECT_DIR$/queries/schedule_count.sql" dialect="PostgreSQL" />
<file url="file://$PROJECT_DIR$/queries/schedule_window.sql" dialect="PostgreSQL" />
<file url="file://$PROJECT_DIR$/queries/snapshots_count.sql" dialect="PostgreSQL" />
<file url="PROJECT" dialect="PostgreSQL" />
</component>

View File

@ -14,7 +14,12 @@ import * as OpenApi from "@alicloud/openapi-client";
import Stream from "@alicloud/darabonba-stream";
import * as Util from "@alicloud/tea-util";
import { Readable } from "stream";
import { ipProxyCounter, ipProxyErrorCounter } from "crawler/metrics";
import {
aliFCCounter,
aliFCErrorCounter,
ipProxyCounter,
ipProxyErrorCounter
} from "crawler/metrics";
type ProxyType = "native" | "alicloud-fc" | "ip-proxy";
@ -425,7 +430,14 @@ export class NetworkDelegate<const C extends NetworkConfig> {
"ALICLOUD_PROXY_ERR"
);
}
return await this.alicloudFcRequest<R>(url, proxy.data);
try {
return await this.alicloudFcRequest<R>(url, proxy.data);
} catch (e) {
aliFCErrorCounter.add(1);
throw e;
} finally {
aliFCCounter.add(1);
}
case "ip-proxy":
if (!isIpProxy(proxy)) {
throw new NetSchedulerError(
@ -526,7 +538,7 @@ export class NetworkDelegate<const C extends NetworkConfig> {
}
const ipPool = this.ipPools[proxyName];
const maxRetries = 3;
const maxRetries = 5;
let lastError: Error | null = null;
@ -691,12 +703,10 @@ const config = {
snapshotVideo: {
provider: "bilibili",
proxies: ["ip_proxy_pool"],
limiters: bili_normal
},
bulkSnapshot: {
provider: "bilibili",
proxies: ["ip_proxy_pool"],
limiters: bili_strict
}
}
} as const satisfies NetworkConfig;

View File

@ -25,6 +25,14 @@ export const ipProxyErrorCounter = anotherMeter.createCounter("ip_proxy_error_co
description: "Number of errors thrown by IP proxy"
});
export const aliFCCounter = anotherMeter.createCounter("ali_fc_count", {
description: "Number of requests using Ali FC"
});
export const aliFCErrorCounter = anotherMeter.createCounter("ali_fc_error_count", {
description: "Number of errors thrown by Ali FC"
});
export const jobCounter = meter.createCounter("job_count", {
description: "Number of executed BullMQ jobs"
});

View File

@ -0,0 +1,9 @@
SET TIME ZONE 'Asia/Shanghai';
SELECT
date_trunc('hour', started_at) +
(EXTRACT(minute FROM started_at)::int / 5 * INTERVAL '5 minutes') AS window_start,
COUNT(*) AS count
FROM snapshot_schedule
WHERE started_at >= NOW() - INTERVAL '1 hours' AND status != 'completed' AND started_at <= NOW() + INTERVAL '14 days'
GROUP BY 1
ORDER BY window_start