添加爬取arXiv论文的功能,支持根据查询获取论文标题、作者和摘要
This commit is contained in:
		
							
								
								
									
										34
									
								
								crawl-arxiv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								crawl-arxiv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,34 @@ | ||||
| import requests | ||||
| from bs4 import BeautifulSoup | ||||
|  | ||||
| def fetch_arxiv_papers(query, max_results=10): | ||||
|     base_url = "http://export.arxiv.org/api/query" | ||||
|     params = { | ||||
|         "search_query": query, | ||||
|         "start": 0, | ||||
|         "max_results": max_results | ||||
|     } | ||||
|     response = requests.get(base_url, params=params) | ||||
|     if response.status_code == 200: | ||||
|         soup = BeautifulSoup(response.content, "xml") | ||||
|         entries = soup.find_all("entry") | ||||
|         for entry in entries: | ||||
|             title = entry.title.text.strip() | ||||
|             summary = entry.summary.text.strip() | ||||
|              | ||||
|             # 获取作者信息 | ||||
|             authors = entry.find_all("author") | ||||
|             author_names = [] | ||||
|             for author in authors: | ||||
|                 name = author.find("name") | ||||
|                 if name: | ||||
|                     author_names.append(name.text.strip()) | ||||
|              | ||||
|             print(f"标题: {title}") | ||||
|             print(f"作者: {', '.join(author_names)}") | ||||
|             print(f"摘要: {summary}\n") | ||||
|     else: | ||||
|         print("请求失败,状态码:", response.status_code) | ||||
|  | ||||
| # 示例调用 | ||||
| fetch_arxiv_papers("cat:math.MP", max_results=5) | ||||
		Reference in New Issue
	
	Block a user