diff --git a/crawl-arxiv.py b/crawl-arxiv.py new file mode 100644 index 0000000..4bce34a --- /dev/null +++ b/crawl-arxiv.py @@ -0,0 +1,34 @@ +import requests +from bs4 import BeautifulSoup + +def fetch_arxiv_papers(query, max_results=10): + base_url = "http://export.arxiv.org/api/query" + params = { + "search_query": query, + "start": 0, + "max_results": max_results + } + response = requests.get(base_url, params=params) + if response.status_code == 200: + soup = BeautifulSoup(response.content, "xml") + entries = soup.find_all("entry") + for entry in entries: + title = entry.title.text.strip() + summary = entry.summary.text.strip() + + # 获取作者信息 + authors = entry.find_all("author") + author_names = [] + for author in authors: + name = author.find("name") + if name: + author_names.append(name.text.strip()) + + print(f"标题: {title}") + print(f"作者: {', '.join(author_names)}") + print(f"摘要: {summary}\n") + else: + print("请求失败,状态码:", response.status_code) + +# 示例调用 +fetch_arxiv_papers("cat:math.MP", max_results=5) \ No newline at end of file