添加多个类别关键词,优化数据处理逻辑,支持从arXiv提取和筛选论文数据
This commit is contained in:
@@ -50,5 +50,5 @@ def get_Composition_ratio(input_file):
|
||||
if __name__ == "__main__":
|
||||
# input_file = "sftdata.jsonl"
|
||||
input_file = "output-26.jsonl"
|
||||
input_file = "arxiv-metadata-oai-snapshot--swift-26.json"
|
||||
input_file = "G:\\11\\data-prepare\\arxiv-metadata-oai-snapshot-multi-batch1.json"
|
||||
get_Composition_ratio(input_file)
|
||||
|
Reference in New Issue
Block a user