添加爬取arXiv论文的功能,支持根据查询获取论文标题、作者和摘要
This commit is contained in:
		
							
								
								
									
										34
									
								
								crawl-arxiv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								crawl-arxiv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,34 @@ | |||||||
|  | import requests | ||||||
|  | from bs4 import BeautifulSoup | ||||||
|  |  | ||||||
|  | def fetch_arxiv_papers(query, max_results=10): | ||||||
|  |     base_url = "http://export.arxiv.org/api/query" | ||||||
|  |     params = { | ||||||
|  |         "search_query": query, | ||||||
|  |         "start": 0, | ||||||
|  |         "max_results": max_results | ||||||
|  |     } | ||||||
|  |     response = requests.get(base_url, params=params) | ||||||
|  |     if response.status_code == 200: | ||||||
|  |         soup = BeautifulSoup(response.content, "xml") | ||||||
|  |         entries = soup.find_all("entry") | ||||||
|  |         for entry in entries: | ||||||
|  |             title = entry.title.text.strip() | ||||||
|  |             summary = entry.summary.text.strip() | ||||||
|  |              | ||||||
|  |             # 获取作者信息 | ||||||
|  |             authors = entry.find_all("author") | ||||||
|  |             author_names = [] | ||||||
|  |             for author in authors: | ||||||
|  |                 name = author.find("name") | ||||||
|  |                 if name: | ||||||
|  |                     author_names.append(name.text.strip()) | ||||||
|  |              | ||||||
|  |             print(f"标题: {title}") | ||||||
|  |             print(f"作者: {', '.join(author_names)}") | ||||||
|  |             print(f"摘要: {summary}\n") | ||||||
|  |     else: | ||||||
|  |         print("请求失败,状态码:", response.status_code) | ||||||
|  |  | ||||||
|  | # 示例调用 | ||||||
|  | fetch_arxiv_papers("cat:math.MP", max_results=5) | ||||||
		Reference in New Issue
	
	Block a user