添加爬取arXiv论文的功能,支持根据查询获取论文标题、作者和摘要
This commit is contained in:
34
crawl-arxiv.py
Normal file
34
crawl-arxiv.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
def fetch_arxiv_papers(query, max_results=10):
|
||||||
|
base_url = "http://export.arxiv.org/api/query"
|
||||||
|
params = {
|
||||||
|
"search_query": query,
|
||||||
|
"start": 0,
|
||||||
|
"max_results": max_results
|
||||||
|
}
|
||||||
|
response = requests.get(base_url, params=params)
|
||||||
|
if response.status_code == 200:
|
||||||
|
soup = BeautifulSoup(response.content, "xml")
|
||||||
|
entries = soup.find_all("entry")
|
||||||
|
for entry in entries:
|
||||||
|
title = entry.title.text.strip()
|
||||||
|
summary = entry.summary.text.strip()
|
||||||
|
|
||||||
|
# 获取作者信息
|
||||||
|
authors = entry.find_all("author")
|
||||||
|
author_names = []
|
||||||
|
for author in authors:
|
||||||
|
name = author.find("name")
|
||||||
|
if name:
|
||||||
|
author_names.append(name.text.strip())
|
||||||
|
|
||||||
|
print(f"标题: {title}")
|
||||||
|
print(f"作者: {', '.join(author_names)}")
|
||||||
|
print(f"摘要: {summary}\n")
|
||||||
|
else:
|
||||||
|
print("请求失败,状态码:", response.status_code)
|
||||||
|
|
||||||
|
# 示例调用
|
||||||
|
fetch_arxiv_papers("cat:math.MP", max_results=5)
|
||||||
Reference in New Issue
Block a user