diff --git a/.gitignore b/.gitignore
index d0be852..6ab4ef9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,5 @@ __pycache__
 .env
 .env*
 translate/output
-translate/source
\ No newline at end of file
+translate/source
+*.db
\ No newline at end of file
diff --git a/translate/spider.py b/translate/spider.py
new file mode 100644
index 0000000..c8aa510
--- /dev/null
+++ b/translate/spider.py
@@ -0,0 +1,134 @@
+import requests
+from bs4 import BeautifulSoup
+import sqlite3
+import urllib.robotparser as urobot
+from urllib.parse import urljoin, urlparse
+
+
+MAX_RECURSION_DEPTH = 5
+MAX_URLS = 1000
+MAX_THREADS = 10
+HEADERS = {
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
+}
+
+conn = sqlite3.connect("crawler.db")
+cursor = conn.cursor()
+
+# Initialization
+cursor.execute(
+    """
+CREATE TABLE IF NOT EXISTS url_list (
+    url TEXT PRIMARY KEY,
+    fetched_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    visited BOOLEAN,
+    parent_url TEXT,
+    child_url_count INTEGER
+)
+"""
+)
+conn.commit()
+
+
+def fetch_url(url, headers=None):
+    try:
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()
+        return response.text, response.url
+    except requests.RequestException as e:
+        print(f"Error fetching {url}: {e}")
+        return None, url
+
+
+def extract_links(html, base_url):
+    soup = BeautifulSoup(html, "html.parser")
+    links = set()
+    for a_tag in soup.find_all("a", href=True):
+        href = a_tag["href"]
+        full_url = urljoin(base_url, href)
+        if urlparse(full_url).netloc == urlparse(base_url).netloc:
+            links.add(full_url)
+    return links
+
+
+def fetch_sitemap(sitemap_url):
+    html, _ = fetch_url(sitemap_url)
+    if html:
+        soup = BeautifulSoup(html, "xml")
+        urls = {loc.text for loc in soup.find_all("loc")}
+        return urls
+    return set()
+
+
+def save_url(url, parent_url=None):
+    cursor = conn.cursor()
+    cursor.execute(
+        """
+    INSERT OR IGNORE INTO url_list (url, visited, parent_url, child_url_count)
+    VALUES (?, ?, ?, ?)
+    """,
+        (url, False, parent_url, 0),
+    )
+    conn.commit()
+
+
+def update_url(url, child_url_count):
+    cursor.execute(
+        """
+    UPDATE url_list SET child_url_count = ? WHERE url = ?
+    """,
+        (child_url_count, url),
+    )
+    conn.commit()
+
+
+def crawl(url, rp=None, depth=0):
+    if depth > MAX_RECURSION_DEPTH:
+        return
+
+    if (
+        rp
+        and rp.can_fetch("*", url) == False
+        and rp.can_fetch("Googlebot", url) == False
+        and rp.can_fetch("Baiduspider", url) == False
+    ):
+        return
+    save_url(url)
+    html, fetched_url = fetch_url(url, HEADERS)
+    if not html:
+        return
+
+    cursor.execute(
+        """
+    UPDATE url_list SET visited = TRUE, fetched_time = CURRENT_TIMESTAMP WHERE url = ?
+    """,
+        (fetched_url,),
+    )
+    conn.commit()
+
+    links = extract_links(html, fetched_url)
+    for link in links:
+        save_url(link, fetched_url)
+
+    update_url(fetched_url, len(links))
+
+    for link in links:
+        crawl(link, depth=depth + 1)
+
+
+def main(seed_url, rp, sitemap=None):
+    if sitemap:
+        sitemap_urls = fetch_sitemap(sitemap)
+        for sitemap_url in sitemap_urls:
+            save_url(sitemap_url)
+    crawl(seed_url, rp=rp)
+
+# Example usage
+# if __name__ == "__main__":
+#     seed_url = "https://www.bbc.co.uk/news"
+#     rp = urobot.RobotFileParser()
+#     rp.set_url("https://www.bbc.co.uk/robots.txt")
+#     rp.read()
+#     main(seed_url, rp, "https://www.bbc.co.uk/sitemap.xml")
+#     conn.close()