添加爬取arXiv论文的功能,支持根据查询获取论文标题、作者和摘要
This commit is contained in:
34
crawl-arxiv.py
Normal file
34
crawl-arxiv.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
def fetch_arxiv_papers(query, max_results=10):
|
||||
base_url = "http://export.arxiv.org/api/query"
|
||||
params = {
|
||||
"search_query": query,
|
||||
"start": 0,
|
||||
"max_results": max_results
|
||||
}
|
||||
response = requests.get(base_url, params=params)
|
||||
if response.status_code == 200:
|
||||
soup = BeautifulSoup(response.content, "xml")
|
||||
entries = soup.find_all("entry")
|
||||
for entry in entries:
|
||||
title = entry.title.text.strip()
|
||||
summary = entry.summary.text.strip()
|
||||
|
||||
# 获取作者信息
|
||||
authors = entry.find_all("author")
|
||||
author_names = []
|
||||
for author in authors:
|
||||
name = author.find("name")
|
||||
if name:
|
||||
author_names.append(name.text.strip())
|
||||
|
||||
print(f"标题: {title}")
|
||||
print(f"作者: {', '.join(author_names)}")
|
||||
print(f"摘要: {summary}\n")
|
||||
else:
|
||||
print("请求失败,状态码:", response.status_code)
|
||||
|
||||
# 示例调用
|
||||
fetch_arxiv_papers("cat:math.MP", max_results=5)
|
Reference in New Issue
Block a user