This commit is contained in:
2025-07-18 18:00:04 +08:00
parent 24abc7aab3
commit 563f16f0c5
15 changed files with 25541 additions and 41 deletions

View File

@@ -1,27 +1,48 @@
import json
import random
input_file = "arxiv-metadata-oai-snapshot--ratio.json" # 20000条原始数据文件路径
output_file = "arxiv-metadata-oai-snapshot--swift.json"
input_file = "arxiv-metadata-oai-snapshot--26-500.json" # 20000条原始数据文件路径
output_file = "arxiv-metadata-oai-snapshot--swift-26-500.json"
# 类别对应选项映射
label_map = {
"astro-ph": "A",
"cond-mat.mes-hall": "B",
"cond-mat.mtrl-sci": "C",
"cs.CL": "D",
"cs.CV": "E",
"cs.LG": "F",
"gr-qc": "G",
"hep-ph": "H",
"hep-th": "I",
"quant-ph": "J"
'quant-ph': 'A',
'physics.chem-ph': 'B',
'physics.atom-ph': 'C',
'cond-mat.soft': 'D',
'cs.RO': 'E',
'cs.CL': 'F',
'cs.SE': 'G',
'cs.IR': 'H',
'hep-th': 'I',
'hep-ph': 'J',
'physics.optics': 'K',
'cs.AI': 'L',
'cs.CV': 'M',
'nucl-th': 'N',
'astro-ph': 'O',
'math.PR': 'P',
'cs.OS': 'Q',
'eess.SP': 'R',
'math.OC': 'S',
'math.DS': 'T',
'math.DG': 'U',
'math.MP': 'V',
'cs.MM': 'W',
'stat.ME': 'X',
'math.CO': 'Y',
'cs.NE': 'Z'
}
options_text = (
"\n\nA. astro-ph\nB. cond-mat.mes-hall\nC. cond-mat.mtrl-sci\nD. cs.CL\n"
"E. cs.CV\nF. cs.LG\nG. gr-qc\nH. hep-ph\nI. hep-th\nJ. quant-ph"
)
options = [
"A. quant-ph", "B. physics.chem-ph", "C. physics.atom-ph", "D. cond-mat.soft",
"E. cs.RO", "F. cs.CL", "G. cs.SE", "H. cs.IR", "I. hep-th", "J. hep-ph",
"K. physics.optics", "L. cs.AI", "M. cs.CV", "N. nucl-th", "O. astro-ph",
"P. math.PR", "Q. cs.OS", "R. eess.SP", "S. math.OC", "T. math.DS",
"U. math.DG", "V. math.MP", "W. cs.MM", "X. stat.ME", "Y. math.CO", "Z. cs.NE"
]
options_text = "\n".join(options)
# 读取所有数据
with open(input_file, 'r', encoding='utf-8') as f: