添加爬取arXiv论文的功能，支持根据查询获取论文标题、作者和摘要

2025-07-25 18:11:11 +08:00
parent 87f2756fdf
commit 2846ebd310
1 changed files with 34 additions and 0 deletions
--- a/crawl-arxiv.py
+++ b/crawl-arxiv.py
@@ -0,0 +1,34 @@
 import requests
 from bs4 import BeautifulSoup
 def fetch_arxiv_papers(query, max_results=10):
    base_url = "http://export.arxiv.org/api/query"
    params = {
        "search_query": query,
        "start": 0,
        "max_results": max_results
    }
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "xml")
        entries = soup.find_all("entry")
        for entry in entries:
            title = entry.title.text.strip()
            summary = entry.summary.text.strip()
            # 获取作者信息
            authors = entry.find_all("author")
            author_names = []
            for author in authors:
                name = author.find("name")
                if name:
                    author_names.append(name.text.strip())
            print(f"标题: {title}")
            print(f"作者: {', '.join(author_names)}")
            print(f"摘要: {summary}\n")
    else:
        print("请求失败，状态码:", response.status_code)
 # 示例调用
 fetch_arxiv_papers("cat:math.MP", max_results=5)