添加爬取arXiv论文的功能，支持根据查询获取论文标题、作者和摘要

2025-07-25 18:11:11 +08:00
parent 87f2756fdf
commit 2846ebd310
1 changed files with 34 additions and 0 deletions
--- a/crawl-arxiv.py
+++ b/crawl-arxiv.py
@@ -0,0 +1,34 @@
+import requests
+from bs4 import BeautifulSoup
+
+def fetch_arxiv_papers(query, max_results=10):
+    base_url = "http://export.arxiv.org/api/query"
+    params = {
+        "search_query": query,
+        "start": 0,
+        "max_results": max_results
+    }
+    response = requests.get(base_url, params=params)
+    if response.status_code == 200:
+        soup = BeautifulSoup(response.content, "xml")
+        entries = soup.find_all("entry")
+        for entry in entries:
+            title = entry.title.text.strip()
+            summary = entry.summary.text.strip()
+            
+            # 获取作者信息
+            authors = entry.find_all("author")
+            author_names = []
+            for author in authors:
+                name = author.find("name")
+                if name:
+                    author_names.append(name.text.strip())
+            
+            print(f"标题: {title}")
+            print(f"作者: {', '.join(author_names)}")
+            print(f"摘要: {summary}\n")
+    else:
+        print("请求失败，状态码:", response.status_code)
+
+# 示例调用
+fetch_arxiv_papers("cat:math.MP", max_results=5)