diff --git a/.gitignore b/.gitignore
index c9064be..9580abc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,7 +41,7 @@ build/
temp/
-datasets
+ml_new/datasets/
mutagen.yml
diff --git a/.idea/data_source_mapping.xml b/.idea/data_source_mapping.xml
index 7bc8724..7ba958c 100644
--- a/.idea/data_source_mapping.xml
+++ b/.idea/data_source_mapping.xml
@@ -4,6 +4,7 @@
+
\ No newline at end of file
diff --git a/.idea/sqldialects.xml b/.idea/sqldialects.xml
index bf5df38..01b4ad0 100644
--- a/.idea/sqldialects.xml
+++ b/.idea/sqldialects.xml
@@ -2,6 +2,7 @@
+
diff --git a/packages/core/net/delegate.ts b/packages/core/net/delegate.ts
index b48cfb9..ae01cd8 100644
--- a/packages/core/net/delegate.ts
+++ b/packages/core/net/delegate.ts
@@ -14,7 +14,12 @@ import * as OpenApi from "@alicloud/openapi-client";
import Stream from "@alicloud/darabonba-stream";
import * as Util from "@alicloud/tea-util";
import { Readable } from "stream";
-import { ipProxyCounter, ipProxyErrorCounter } from "crawler/metrics";
+import {
+ aliFCCounter,
+ aliFCErrorCounter,
+ ipProxyCounter,
+ ipProxyErrorCounter
+} from "crawler/metrics";
type ProxyType = "native" | "alicloud-fc" | "ip-proxy";
@@ -425,7 +430,14 @@ export class NetworkDelegate {
"ALICLOUD_PROXY_ERR"
);
}
- return await this.alicloudFcRequest(url, proxy.data);
+ try {
+ return await this.alicloudFcRequest(url, proxy.data);
+ } catch (e) {
+ aliFCErrorCounter.add(1);
+ throw e;
+ } finally {
+ aliFCCounter.add(1);
+ }
case "ip-proxy":
if (!isIpProxy(proxy)) {
throw new NetSchedulerError(
@@ -526,7 +538,7 @@ export class NetworkDelegate {
}
const ipPool = this.ipPools[proxyName];
- const maxRetries = 3;
+ const maxRetries = 5;
let lastError: Error | null = null;
@@ -691,12 +703,10 @@ const config = {
snapshotVideo: {
provider: "bilibili",
proxies: ["ip_proxy_pool"],
- limiters: bili_normal
},
bulkSnapshot: {
provider: "bilibili",
proxies: ["ip_proxy_pool"],
- limiters: bili_strict
}
}
} as const satisfies NetworkConfig;
diff --git a/packages/crawler/metrics/index.ts b/packages/crawler/metrics/index.ts
index 8e20b56..af9432e 100644
--- a/packages/crawler/metrics/index.ts
+++ b/packages/crawler/metrics/index.ts
@@ -25,6 +25,14 @@ export const ipProxyErrorCounter = anotherMeter.createCounter("ip_proxy_error_co
description: "Number of errors thrown by IP proxy"
});
+export const aliFCCounter = anotherMeter.createCounter("ali_fc_count", {
+ description: "Number of requests using Ali FC"
+});
+
+export const aliFCErrorCounter = anotherMeter.createCounter("ali_fc_error_count", {
+ description: "Number of errors thrown by Ali FC"
+});
+
export const jobCounter = meter.createCounter("job_count", {
description: "Number of executed BullMQ jobs"
});
diff --git a/queries/schedule_window.sql b/queries/schedule_window.sql
new file mode 100644
index 0000000..c4c7163
--- /dev/null
+++ b/queries/schedule_window.sql
@@ -0,0 +1,9 @@
+SET TIME ZONE 'Asia/Shanghai';
+SELECT
+ date_trunc('hour', started_at) +
+ (EXTRACT(minute FROM started_at)::int / 5 * INTERVAL '5 minutes') AS window_start,
+ COUNT(*) AS count
+FROM snapshot_schedule
+WHERE started_at >= NOW() - INTERVAL '1 hours' AND status != 'completed' AND started_at <= NOW() + INTERVAL '14 days'
+GROUP BY 1
+ORDER BY window_start
\ No newline at end of file