Initial commit: Markdown editor with file management and regex tools
项目特性: - 完整的Markdown编辑器,支持实时预览 - 文件管理功能,支持保存/加载/删除文件 - 正则表达式工具,支持批量文本替换 - 前后端分离架构 - 响应式设计 技术栈: - 前端:React + TypeScript + Vite - 后端:Python Flask - Markdown解析:Python-Markdown 包含组件: - WorkingMarkdownEditor: 基础功能版本 - FullMarkdownEditor: 完整功能版本 - SimpleMarkdownEditor: 简化版本
This commit is contained in:
25
backend/app.py
Normal file
25
backend/app.py
Normal file
@@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Flask应用入口文件
|
||||
启动Markdown编辑器的后端服务
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from app import create_app
|
||||
|
||||
def main():
|
||||
"""主函数,启动Flask应用"""
|
||||
app = create_app()
|
||||
|
||||
# 获取端口配置
|
||||
port = int(os.environ.get('PORT', 5000))
|
||||
debug = os.environ.get('DEBUG', 'False').lower() == 'true'
|
||||
|
||||
print(f"Starting Markdown Editor API on port {port}")
|
||||
print(f"Debug mode: {debug}")
|
||||
|
||||
app.run(host='0.0.0.0', port=port, debug=debug)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
32
backend/app/__init__.py
Normal file
32
backend/app/__init__.py
Normal file
@@ -0,0 +1,32 @@
|
||||
"""
|
||||
Flask应用初始化模块
|
||||
负责创建和配置Flask应用实例
|
||||
"""
|
||||
|
||||
from flask import Flask
|
||||
from flask_cors import CORS
|
||||
|
||||
|
||||
def create_app():
|
||||
"""
|
||||
创建并配置Flask应用
|
||||
|
||||
Returns:
|
||||
Flask: 配置好的Flask应用实例
|
||||
"""
|
||||
app = Flask(__name__)
|
||||
|
||||
# 配置跨域支持
|
||||
CORS(app, resources={
|
||||
r"/api/*": {
|
||||
"origins": ["http://localhost:3000", "http://127.0.0.1:3000"],
|
||||
"methods": ["GET", "POST", "PUT", "DELETE", "OPTIONS"],
|
||||
"allow_headers": ["Content-Type", "Authorization"]
|
||||
}
|
||||
})
|
||||
|
||||
# 注册蓝图
|
||||
from app.routes import api_bp
|
||||
app.register_blueprint(api_bp, url_prefix='/api')
|
||||
|
||||
return app
|
262
backend/app/routes.py
Normal file
262
backend/app/routes.py
Normal file
@@ -0,0 +1,262 @@
|
||||
"""
|
||||
API路由模块
|
||||
处理所有Markdown编辑器的API端点
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from flask import Blueprint, request, jsonify, send_file
|
||||
from utils.markdown_parser import MarkdownParser
|
||||
from utils.file_manager import FileManager
|
||||
from utils.regex_processor import RegexProcessor
|
||||
|
||||
# 创建API蓝图
|
||||
api_bp = Blueprint('api', __name__)
|
||||
|
||||
# 初始化工具类
|
||||
markdown_parser = MarkdownParser()
|
||||
file_manager = FileManager()
|
||||
regex_processor = RegexProcessor()
|
||||
|
||||
|
||||
@api_bp.route('/health', methods=['GET'])
|
||||
def health_check():
|
||||
"""
|
||||
健康检查端点
|
||||
|
||||
Returns:
|
||||
JSON: 服务状态信息
|
||||
"""
|
||||
return jsonify({
|
||||
'status': 'healthy',
|
||||
'service': 'markdown-editor-api'
|
||||
})
|
||||
|
||||
|
||||
@api_bp.route('/parse', methods=['POST'])
|
||||
def parse_markdown():
|
||||
"""
|
||||
解析Markdown文本为HTML
|
||||
|
||||
Request Body:
|
||||
{
|
||||
"content": "# Hello World"
|
||||
}
|
||||
|
||||
Returns:
|
||||
JSON: {
|
||||
"html": "<h1>Hello World</h1>",
|
||||
"metadata": {...}
|
||||
}
|
||||
"""
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'content' not in data:
|
||||
return jsonify({'error': 'Missing content field'}), 400
|
||||
|
||||
content = data['content']
|
||||
result = markdown_parser.parse(content)
|
||||
|
||||
return jsonify(result)
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
@api_bp.route('/files', methods=['GET'])
|
||||
def list_files():
|
||||
"""
|
||||
获取指定目录的文件列表
|
||||
|
||||
Query Parameters:
|
||||
path: 目录路径(相对于工作目录)
|
||||
|
||||
Returns:
|
||||
JSON: 文件列表信息
|
||||
"""
|
||||
try:
|
||||
path = request.args.get('path', '')
|
||||
files = file_manager.list_files(path)
|
||||
return jsonify({'files': files})
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
@api_bp.route('/files/read', methods=['GET'])
|
||||
def read_file():
|
||||
"""
|
||||
读取文件内容
|
||||
|
||||
Query Parameters:
|
||||
path: 文件路径
|
||||
|
||||
Returns:
|
||||
JSON: 文件内容和元数据
|
||||
"""
|
||||
try:
|
||||
path = request.args.get('path')
|
||||
if not path:
|
||||
return jsonify({'error': 'Missing path parameter'}), 400
|
||||
|
||||
content = file_manager.read_file(path)
|
||||
return jsonify({'content': content})
|
||||
|
||||
except FileNotFoundError:
|
||||
return jsonify({'error': 'File not found'}), 404
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
@api_bp.route('/files/write', methods=['POST'])
|
||||
def write_file():
|
||||
"""
|
||||
写入文件内容
|
||||
|
||||
Request Body:
|
||||
{
|
||||
"path": "文件路径",
|
||||
"content": "文件内容"
|
||||
}
|
||||
|
||||
Returns:
|
||||
JSON: 操作结果
|
||||
"""
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'path' not in data or 'content' not in data:
|
||||
return jsonify({'error': 'Missing required fields'}), 400
|
||||
|
||||
path = data['path']
|
||||
content = data['content']
|
||||
|
||||
file_manager.write_file(path, content)
|
||||
return jsonify({'message': 'File saved successfully'})
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
@api_bp.route('/files/delete', methods=['DELETE'])
|
||||
def delete_file():
|
||||
"""
|
||||
删除文件
|
||||
|
||||
Request Body:
|
||||
{
|
||||
"path": "文件路径"
|
||||
}
|
||||
|
||||
Returns:
|
||||
JSON: 操作结果
|
||||
"""
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'path' not in data:
|
||||
return jsonify({'error': 'Missing path field'}), 400
|
||||
|
||||
file_manager.delete_file(data['path'])
|
||||
return jsonify({'message': 'File deleted successfully'})
|
||||
|
||||
except FileNotFoundError:
|
||||
return jsonify({'error': 'File not found'}), 404
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
@api_bp.route('/regex/replace', methods=['POST'])
|
||||
def regex_replace():
|
||||
"""
|
||||
使用正则表达式批量替换文本
|
||||
|
||||
Request Body:
|
||||
{
|
||||
"content": "原始文本",
|
||||
"pattern": "正则表达式",
|
||||
"replacement": "替换内容",
|
||||
"flags": "gi"
|
||||
}
|
||||
|
||||
Returns:
|
||||
JSON: {
|
||||
"result": "替换后的文本",
|
||||
"matches": 3,
|
||||
"groups": [...]
|
||||
}
|
||||
"""
|
||||
try:
|
||||
data = request.get_json()
|
||||
required_fields = ['content', 'pattern', 'replacement']
|
||||
|
||||
if not data or not all(field in data for field in required_fields):
|
||||
return jsonify({'error': 'Missing required fields'}), 400
|
||||
|
||||
content = data['content']
|
||||
pattern = data['pattern']
|
||||
replacement = data['replacement']
|
||||
flags = data.get('flags', '')
|
||||
|
||||
result = regex_processor.replace(content, pattern, replacement, flags)
|
||||
return jsonify(result)
|
||||
|
||||
except re.error as e:
|
||||
return jsonify({'error': f'Invalid regex pattern: {str(e)}'}), 400
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
@api_bp.route('/regex/extract', methods=['POST'])
|
||||
def regex_extract():
|
||||
"""
|
||||
使用正则表达式提取文本中的匹配项
|
||||
|
||||
Request Body:
|
||||
{
|
||||
"content": "文本内容",
|
||||
"pattern": "正则表达式",
|
||||
"flags": "gi"
|
||||
}
|
||||
|
||||
Returns:
|
||||
JSON: 匹配项列表
|
||||
"""
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'content' not in data or 'pattern' not in data:
|
||||
return jsonify({'error': 'Missing required fields'}), 400
|
||||
|
||||
content = data['content']
|
||||
pattern = data['pattern']
|
||||
flags = data.get('flags', '')
|
||||
|
||||
matches = regex_processor.extract(content, pattern, flags)
|
||||
return jsonify({'matches': matches})
|
||||
|
||||
except re.error as e:
|
||||
return jsonify({'error': f'Invalid regex pattern: {str(e)}'}), 400
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
@api_bp.route('/files/download')
|
||||
def download_file():
|
||||
"""
|
||||
下载文件
|
||||
|
||||
Query Parameters:
|
||||
path: 文件路径
|
||||
|
||||
Returns:
|
||||
File: 文件内容
|
||||
"""
|
||||
try:
|
||||
path = request.args.get('path')
|
||||
if not path:
|
||||
return jsonify({'error': 'Missing path parameter'}), 400
|
||||
|
||||
return send_file(path, as_attachment=True)
|
||||
|
||||
except FileNotFoundError:
|
||||
return jsonify({'error': 'File not found'}), 404
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
6
backend/requirements.txt
Normal file
6
backend/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
Flask==2.3.3
|
||||
Flask-CORS==4.0.0
|
||||
markdown==3.5.1
|
||||
python-markdown-math==0.8
|
||||
watchdog==3.0.0
|
||||
Werkzeug==2.3.7
|
1
backend/utils/__init__.py
Normal file
1
backend/utils/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# 工具模块初始化
|
236
backend/utils/file_manager.py
Normal file
236
backend/utils/file_manager.py
Normal file
@@ -0,0 +1,236 @@
|
||||
"""
|
||||
文件管理器模块
|
||||
提供文件的读写、列表、删除等操作
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import shutil
|
||||
from typing import List, Dict, Any
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class FileManager:
|
||||
"""
|
||||
文件管理器类
|
||||
|
||||
负责处理所有文件相关的操作,包括读写、列表、删除等
|
||||
"""
|
||||
|
||||
def __init__(self, base_path: str = None):
|
||||
"""
|
||||
初始化文件管理器
|
||||
|
||||
Args:
|
||||
base_path (str): 基础路径,默认为当前工作目录
|
||||
"""
|
||||
self.base_path = Path(base_path) if base_path else Path.cwd()
|
||||
self.workspace_path = self.base_path / 'workspace'
|
||||
|
||||
# 确保工作目录存在
|
||||
self.workspace_path.mkdir(exist_ok=True)
|
||||
|
||||
def list_files(self, path: str = '', include_hidden: bool = False) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
列出指定路径下的文件和目录
|
||||
|
||||
Args:
|
||||
path (str): 相对路径,相对于工作目录
|
||||
include_hidden (bool): 是否包含隐藏文件
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: 文件和目录信息列表
|
||||
"""
|
||||
target_path = self.workspace_path / path
|
||||
|
||||
if not target_path.exists():
|
||||
raise FileNotFoundError(f"Path does not exist: {path}")
|
||||
|
||||
if not target_path.is_dir():
|
||||
raise NotADirectoryError(f"Path is not a directory: {path}")
|
||||
|
||||
items = []
|
||||
|
||||
for item in sorted(target_path.iterdir()):
|
||||
if not include_hidden and item.name.startswith('.'):
|
||||
continue
|
||||
|
||||
stat = item.stat()
|
||||
items.append({
|
||||
'name': item.name,
|
||||
'path': str(item.relative_to(self.workspace_path)),
|
||||
'type': 'directory' if item.is_dir() else 'file',
|
||||
'size': stat.st_size if item.is_file() else 0,
|
||||
'modified': stat.st_mtime,
|
||||
'extension': item.suffix.lower() if item.is_file() else '',
|
||||
'is_markdown': item.suffix.lower() == '.md'
|
||||
})
|
||||
|
||||
return items
|
||||
|
||||
def read_file(self, path: str) -> str:
|
||||
"""
|
||||
读取文件内容
|
||||
|
||||
Args:
|
||||
path (str): 文件路径,相对于工作目录
|
||||
|
||||
Returns:
|
||||
str: 文件内容
|
||||
"""
|
||||
file_path = self.workspace_path / path
|
||||
|
||||
if not file_path.exists():
|
||||
raise FileNotFoundError(f"File not found: {path}")
|
||||
|
||||
if not file_path.is_file():
|
||||
raise IsADirectoryError(f"Path is a directory: {path}")
|
||||
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
except UnicodeDecodeError:
|
||||
# 尝试其他编码
|
||||
with open(file_path, 'r', encoding='gbk') as f:
|
||||
return f.read()
|
||||
|
||||
def write_file(self, path: str, content: str, create_dirs: bool = True) -> None:
|
||||
"""
|
||||
写入文件内容
|
||||
|
||||
Args:
|
||||
path (str): 文件路径,相对于工作目录
|
||||
content (str): 文件内容
|
||||
create_dirs (bool): 是否自动创建目录
|
||||
"""
|
||||
file_path = self.workspace_path / path
|
||||
|
||||
if create_dirs:
|
||||
file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
def delete_file(self, path: str) -> None:
|
||||
"""
|
||||
删除文件或目录
|
||||
|
||||
Args:
|
||||
path (str): 文件或目录路径,相对于工作目录
|
||||
"""
|
||||
target_path = self.workspace_path / path
|
||||
|
||||
if not target_path.exists():
|
||||
raise FileNotFoundError(f"Path does not exist: {path}")
|
||||
|
||||
if target_path.is_dir():
|
||||
shutil.rmtree(target_path)
|
||||
else:
|
||||
target_path.unlink()
|
||||
|
||||
def create_directory(self, path: str) -> None:
|
||||
"""
|
||||
创建目录
|
||||
|
||||
Args:
|
||||
path (str): 目录路径,相对于工作目录
|
||||
"""
|
||||
dir_path = self.workspace_path / path
|
||||
dir_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def file_exists(self, path: str) -> bool:
|
||||
"""
|
||||
检查文件是否存在
|
||||
|
||||
Args:
|
||||
path (str): 文件路径,相对于工作目录
|
||||
|
||||
Returns:
|
||||
bool: 文件是否存在
|
||||
"""
|
||||
return (self.workspace_path / path).exists()
|
||||
|
||||
def get_file_info(self, path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
获取文件详细信息
|
||||
|
||||
Args:
|
||||
path (str): 文件路径,相对于工作目录
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: 文件详细信息
|
||||
"""
|
||||
file_path = self.workspace_path / path
|
||||
|
||||
if not file_path.exists():
|
||||
raise FileNotFoundError(f"File not found: {path}")
|
||||
|
||||
stat = file_path.stat()
|
||||
|
||||
return {
|
||||
'name': file_path.name,
|
||||
'path': str(file_path.relative_to(self.workspace_path)),
|
||||
'type': 'directory' if file_path.is_dir() else 'file',
|
||||
'size': stat.st_size,
|
||||
'modified': stat.st_mtime,
|
||||
'created': stat.st_ctime,
|
||||
'extension': file_path.suffix.lower(),
|
||||
'is_markdown': file_path.suffix.lower() == '.md'
|
||||
}
|
||||
|
||||
def search_files(self, pattern: str, path: str = '') -> List[str]:
|
||||
"""
|
||||
搜索文件
|
||||
|
||||
Args:
|
||||
pattern (str): 搜索模式(支持通配符)
|
||||
path (str): 搜索起始路径,相对于工作目录
|
||||
|
||||
Returns:
|
||||
List[str]: 匹配的文件路径列表
|
||||
"""
|
||||
target_path = self.workspace_path / path
|
||||
|
||||
if not target_path.exists():
|
||||
return []
|
||||
|
||||
matches = []
|
||||
for file_path in target_path.rglob(pattern):
|
||||
if file_path.is_file():
|
||||
matches.append(str(file_path.relative_to(self.workspace_path)))
|
||||
|
||||
return matches
|
||||
|
||||
def copy_file(self, src_path: str, dest_path: str) -> None:
|
||||
"""
|
||||
复制文件
|
||||
|
||||
Args:
|
||||
src_path (str): 源文件路径
|
||||
dest_path (str): 目标文件路径
|
||||
"""
|
||||
src = self.workspace_path / src_path
|
||||
dest = self.workspace_path / dest_path
|
||||
|
||||
if not src.exists():
|
||||
raise FileNotFoundError(f"Source file not found: {src_path}")
|
||||
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(src, dest)
|
||||
|
||||
def move_file(self, src_path: str, dest_path: str) -> None:
|
||||
"""
|
||||
移动文件
|
||||
|
||||
Args:
|
||||
src_path (str): 源文件路径
|
||||
dest_path (str): 目标文件路径
|
||||
"""
|
||||
src = self.workspace_path / src_path
|
||||
dest = self.workspace_path / dest_path
|
||||
|
||||
if not src.exists():
|
||||
raise FileNotFoundError(f"Source file not found: {src_path}")
|
||||
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.move(str(src), str(dest))
|
212
backend/utils/markdown_parser.py
Normal file
212
backend/utils/markdown_parser.py
Normal file
@@ -0,0 +1,212 @@
|
||||
"""
|
||||
Markdown解析器模块
|
||||
提供Markdown文本到HTML的转换功能,支持数学公式、代码高亮等扩展
|
||||
"""
|
||||
|
||||
import markdown
|
||||
from markdown.extensions import codehilite, fenced_code, tables, toc
|
||||
from mdx_math import MathExtension
|
||||
import re
|
||||
from typing import Dict, Any, List
|
||||
|
||||
|
||||
class MarkdownParser:
|
||||
"""
|
||||
Markdown解析器类
|
||||
|
||||
负责将Markdown文本转换为HTML格式,支持多种扩展功能
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""初始化Markdown解析器,配置所有扩展"""
|
||||
self.md = markdown.Markdown(
|
||||
extensions=[
|
||||
'codehilite',
|
||||
'fenced_code',
|
||||
'tables',
|
||||
'toc',
|
||||
MathExtension(enable_dollar_delimiter=True),
|
||||
],
|
||||
extension_configs={
|
||||
'codehilite': {
|
||||
'css_class': 'highlight',
|
||||
'use_pygments': True,
|
||||
'noclasses': False,
|
||||
},
|
||||
'toc': {
|
||||
'permalink': True,
|
||||
'baselevel': 1,
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
def parse(self, content: str) -> Dict[str, Any]:
|
||||
"""
|
||||
解析Markdown内容为HTML
|
||||
|
||||
Args:
|
||||
content (str): Markdown文本内容
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: 包含HTML和元数据的字典
|
||||
{
|
||||
'html': str, # 转换后的HTML
|
||||
'metadata': dict, # 提取的元数据
|
||||
'toc': str, # 目录HTML
|
||||
'word_count': int, # 字数统计
|
||||
'reading_time': int, # 预计阅读时间(分钟)
|
||||
}
|
||||
"""
|
||||
if not content:
|
||||
return {
|
||||
'html': '',
|
||||
'metadata': {},
|
||||
'toc': '',
|
||||
'word_count': 0,
|
||||
'reading_time': 0
|
||||
}
|
||||
|
||||
# 重置解析器状态
|
||||
self.md.reset()
|
||||
|
||||
# 提取元数据
|
||||
metadata = self._extract_metadata(content)
|
||||
|
||||
# 解析为HTML
|
||||
html = self.md.convert(content)
|
||||
|
||||
# 生成目录
|
||||
toc_html = self.md.toc if hasattr(self.md, 'toc') else ''
|
||||
|
||||
# 计算字数统计
|
||||
word_count = len(re.findall(r'\w+', content))
|
||||
|
||||
# 计算预计阅读时间(假设每分钟200字)
|
||||
reading_time = max(1, word_count // 200)
|
||||
|
||||
return {
|
||||
'html': html,
|
||||
'metadata': metadata,
|
||||
'toc': toc_html,
|
||||
'word_count': word_count,
|
||||
'reading_time': reading_time
|
||||
}
|
||||
|
||||
def _extract_metadata(self, content: str) -> Dict[str, Any]:
|
||||
"""
|
||||
从Markdown内容中提取元数据
|
||||
|
||||
Args:
|
||||
content (str): Markdown文本
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: 提取的元数据
|
||||
"""
|
||||
metadata = {}
|
||||
|
||||
# 提取标题(第一个#开头的行)
|
||||
title_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
|
||||
if title_match:
|
||||
metadata['title'] = title_match.group(1).strip()
|
||||
|
||||
# 提取副标题(第二个##开头的行)
|
||||
subtitle_match = re.search(r'^##\s+(.+)$', content, re.MULTILINE)
|
||||
if subtitle_match:
|
||||
metadata['subtitle'] = subtitle_match.group(1).strip()
|
||||
|
||||
# 提取YAML前置元数据
|
||||
yaml_match = re.match(r'^---\s*\n(.*?)\n---\s*\n', content, re.DOTALL)
|
||||
if yaml_match:
|
||||
yaml_content = yaml_match.group(1)
|
||||
yaml_data = self._parse_yaml(yaml_content)
|
||||
metadata.update(yaml_data)
|
||||
|
||||
# 提取标签(以#开头的单词)
|
||||
tags = re.findall(r'#(\w+)', content)
|
||||
if tags:
|
||||
metadata['tags'] = tags
|
||||
|
||||
# 提取链接
|
||||
links = re.findall(r'\[([^\]]+)\]\(([^)]+)\)', content)
|
||||
if links:
|
||||
metadata['links'] = [{'text': text, 'url': url} for text, url in links]
|
||||
|
||||
return metadata
|
||||
|
||||
def _parse_yaml(self, yaml_content: str) -> Dict[str, Any]:
|
||||
"""
|
||||
简易YAML解析器
|
||||
|
||||
Args:
|
||||
yaml_content (str): YAML格式的文本
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: 解析后的键值对
|
||||
"""
|
||||
result = {}
|
||||
lines = yaml_content.strip().split('\n')
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if ':' in line:
|
||||
key, value = line.split(':', 1)
|
||||
key = key.strip()
|
||||
value = value.strip().strip('"\'')
|
||||
|
||||
# 处理数组格式
|
||||
if value.startswith('[') and value.endswith(']'):
|
||||
value = [item.strip().strip('"\'') for item in value[1:-1].split(',')]
|
||||
|
||||
result[key] = value
|
||||
|
||||
return result
|
||||
|
||||
def get_toc(self, content: str) -> str:
|
||||
"""
|
||||
生成目录
|
||||
|
||||
Args:
|
||||
content (str): Markdown内容
|
||||
|
||||
Returns:
|
||||
str: 目录HTML
|
||||
"""
|
||||
self.md.reset()
|
||||
self.md.convert(content)
|
||||
return self.md.toc if hasattr(self.md, 'toc') else ''
|
||||
|
||||
def extract_code_blocks(self, content: str) -> List[Dict[str, str]]:
|
||||
"""
|
||||
提取代码块
|
||||
|
||||
Args:
|
||||
content (str): Markdown内容
|
||||
|
||||
Returns:
|
||||
List[Dict[str, str]]: 代码块列表,每个包含语言和代码
|
||||
"""
|
||||
pattern = r'```(\w*)\n(.*?)\n```'
|
||||
matches = re.findall(pattern, content, re.DOTALL)
|
||||
|
||||
return [
|
||||
{'language': lang.strip() or 'text', 'code': code.strip()}
|
||||
for lang, code in matches
|
||||
]
|
||||
|
||||
def extract_images(self, content: str) -> List[Dict[str, str]]:
|
||||
"""
|
||||
提取图片
|
||||
|
||||
Args:
|
||||
content (str): Markdown内容
|
||||
|
||||
Returns:
|
||||
List[Dict[str, str]]: 图片列表,每个包含alt和src
|
||||
"""
|
||||
pattern = r'!\[([^\]]*)\]\(([^)]+)\)'
|
||||
matches = re.findall(pattern, content)
|
||||
|
||||
return [
|
||||
{'alt': alt, 'src': src}
|
||||
for alt, src in matches
|
||||
]
|
310
backend/utils/regex_processor.py
Normal file
310
backend/utils/regex_processor.py
Normal file
@@ -0,0 +1,310 @@
|
||||
"""
|
||||
正则表达式处理器模块
|
||||
提供基于正则表达式的文本批量操作功能
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Dict, List, Any, Tuple
|
||||
|
||||
|
||||
class RegexProcessor:
|
||||
"""
|
||||
正则表达式处理器类
|
||||
|
||||
提供各种基于正则表达式的文本处理功能,包括替换、提取、验证等
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""初始化正则表达式处理器"""
|
||||
self.compiled_patterns = {}
|
||||
|
||||
def _compile_pattern(self, pattern: str, flags: str = '') -> re.Pattern:
|
||||
"""
|
||||
编译正则表达式模式,使用缓存提高性能
|
||||
|
||||
Args:
|
||||
pattern (str): 正则表达式模式
|
||||
flags (str): 正则表达式标志
|
||||
|
||||
Returns:
|
||||
re.Pattern: 编译后的正则表达式对象
|
||||
"""
|
||||
cache_key = f"{pattern}:{flags}"
|
||||
|
||||
if cache_key not in self.compiled_patterns:
|
||||
flag_value = 0
|
||||
for flag in flags.lower():
|
||||
if flag == 'i':
|
||||
flag_value |= re.IGNORECASE
|
||||
elif flag == 'm':
|
||||
flag_value |= re.MULTILINE
|
||||
elif flag == 's':
|
||||
flag_value |= re.DOTALL
|
||||
elif flag == 'x':
|
||||
flag_value |= re.VERBOSE
|
||||
elif flag == 'u':
|
||||
flag_value |= re.UNICODE
|
||||
elif flag == 'l':
|
||||
flag_value |= re.LOCALE
|
||||
|
||||
self.compiled_patterns[cache_key] = re.compile(pattern, flag_value)
|
||||
|
||||
return self.compiled_patterns[cache_key]
|
||||
|
||||
def replace(self, content: str, pattern: str, replacement: str, flags: str = '') -> Dict[str, Any]:
|
||||
"""
|
||||
使用正则表达式替换文本
|
||||
|
||||
Args:
|
||||
content (str): 原始文本
|
||||
pattern (str): 正则表达式模式
|
||||
replacement (str): 替换内容
|
||||
flags (str): 正则表达式标志
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: 替换结果信息
|
||||
{
|
||||
'result': str, # 替换后的文本
|
||||
'matches': int, # 匹配次数
|
||||
'groups': List[str], # 匹配的组
|
||||
'replacements': List[Dict[str, str]] # 详细的替换信息
|
||||
}
|
||||
"""
|
||||
regex = self._compile_pattern(pattern, flags)
|
||||
|
||||
# 收集匹配信息
|
||||
matches = list(regex.finditer(content))
|
||||
replacements = []
|
||||
|
||||
def replace_func(match):
|
||||
groups = match.groups()
|
||||
replacement_result = match.expand(replacement)
|
||||
|
||||
replacements.append({
|
||||
'original': match.group(0),
|
||||
'replaced': replacement_result,
|
||||
'start': match.start(),
|
||||
'end': match.end(),
|
||||
'groups': list(groups) if groups else []
|
||||
})
|
||||
|
||||
return replacement_result
|
||||
|
||||
# 执行替换
|
||||
result = regex.sub(replace_func, content)
|
||||
|
||||
return {
|
||||
'result': result,
|
||||
'matches': len(matches),
|
||||
'groups': [match.groups() for match in matches if match.groups()],
|
||||
'replacements': replacements
|
||||
}
|
||||
|
||||
def extract(self, content: str, pattern: str, flags: str = '') -> List[Dict[str, Any]]:
|
||||
"""
|
||||
使用正则表达式提取匹配项
|
||||
|
||||
Args:
|
||||
content (str): 文本内容
|
||||
pattern (str): 正则表达式模式
|
||||
flags (str): 正则表达式标志
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: 匹配项列表
|
||||
[
|
||||
{
|
||||
'match': str, # 完整匹配
|
||||
'groups': List[str], # 捕获组
|
||||
'start': int, # 开始位置
|
||||
'end': int, # 结束位置
|
||||
'named_groups': Dict[str, str] # 命名捕获组
|
||||
}
|
||||
]
|
||||
"""
|
||||
regex = self._compile_pattern(pattern, flags)
|
||||
matches = []
|
||||
|
||||
for match in regex.finditer(content):
|
||||
match_info = {
|
||||
'match': match.group(0),
|
||||
'groups': list(match.groups()) if match.groups() else [],
|
||||
'start': match.start(),
|
||||
'end': match.end(),
|
||||
'named_groups': match.groupdict()
|
||||
}
|
||||
matches.append(match_info)
|
||||
|
||||
return matches
|
||||
|
||||
def validate(self, content: str, pattern: str, flags: str = '') -> bool:
|
||||
"""
|
||||
验证文本是否匹配正则表达式
|
||||
|
||||
Args:
|
||||
content (str): 文本内容
|
||||
pattern (str): 正则表达式模式
|
||||
flags (str): 正则表达式标志
|
||||
|
||||
Returns:
|
||||
bool: 是否匹配
|
||||
"""
|
||||
regex = self._compile_pattern(pattern, flags)
|
||||
return bool(regex.search(content))
|
||||
|
||||
def split(self, content: str, pattern: str, flags: str = '', maxsplit: int = 0) -> Dict[str, Any]:
|
||||
"""
|
||||
使用正则表达式分割文本
|
||||
|
||||
Args:
|
||||
content (str): 文本内容
|
||||
pattern (str): 正则表达式模式
|
||||
flags (str): 正则表达式标志
|
||||
maxsplit (int): 最大分割次数
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: 分割结果
|
||||
{
|
||||
'parts': List[str], # 分割后的部分
|
||||
'separators': List[str], # 分隔符
|
||||
'count': int # 分割次数
|
||||
}
|
||||
"""
|
||||
regex = self._compile_pattern(pattern, flags)
|
||||
|
||||
if maxsplit > 0:
|
||||
parts = regex.split(content, maxsplit)
|
||||
else:
|
||||
parts = regex.split(content)
|
||||
|
||||
# 找出分隔符
|
||||
separators = [match.group(0) for match in regex.finditer(content)]
|
||||
|
||||
return {
|
||||
'parts': parts,
|
||||
'separators': separators,
|
||||
'count': len(separators)
|
||||
}
|
||||
|
||||
def find_and_replace_batch(self, content: str, operations: List[Dict[str, str]]) -> Dict[str, Any]:
|
||||
"""
|
||||
批量执行正则表达式替换操作
|
||||
|
||||
Args:
|
||||
content (str): 原始文本
|
||||
operations (List[Dict[str, str]]): 替换操作列表
|
||||
[
|
||||
{
|
||||
'pattern': str, # 正则表达式
|
||||
'replacement': str, # 替换内容
|
||||
'flags': str # 正则表达式标志
|
||||
}
|
||||
]
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: 批量替换结果
|
||||
{
|
||||
'result': str, # 最终文本
|
||||
'operations': List[Dict[str, Any]] # 每个操作的结果
|
||||
}
|
||||
"""
|
||||
result = content
|
||||
operation_results = []
|
||||
|
||||
for operation in operations:
|
||||
pattern = operation['pattern']
|
||||
replacement = operation['replacement']
|
||||
flags = operation.get('flags', '')
|
||||
|
||||
replace_result = self.replace(result, pattern, replacement, flags)
|
||||
result = replace_result['result']
|
||||
|
||||
operation_results.append({
|
||||
'pattern': pattern,
|
||||
'replacement': replacement,
|
||||
'flags': flags,
|
||||
'matches': replace_result['matches'],
|
||||
'replacements': replace_result['replacements']
|
||||
})
|
||||
|
||||
return {
|
||||
'result': result,
|
||||
'operations': operation_results
|
||||
}
|
||||
|
||||
def get_statistics(self, content: str, pattern: str, flags: str = '') -> Dict[str, Any]:
|
||||
"""
|
||||
获取正则表达式的匹配统计信息
|
||||
|
||||
Args:
|
||||
content (str): 文本内容
|
||||
pattern (str): 正则表达式模式
|
||||
flags (str): 正则表达式标志
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: 统计信息
|
||||
{
|
||||
'total_matches': int, # 总匹配数
|
||||
'unique_matches': int, # 唯一匹配数
|
||||
'average_length': float, # 平均匹配长度
|
||||
'longest_match': str, # 最长匹配
|
||||
'shortest_match': str, # 最短匹配
|
||||
'positions': List[Tuple[int, int]] # 匹配位置列表
|
||||
}
|
||||
"""
|
||||
matches = self.extract(content, pattern, flags)
|
||||
|
||||
if not matches:
|
||||
return {
|
||||
'total_matches': 0,
|
||||
'unique_matches': 0,
|
||||
'average_length': 0.0,
|
||||
'longest_match': '',
|
||||
'shortest_match': '',
|
||||
'positions': []
|
||||
}
|
||||
|
||||
match_texts = [m['match'] for m in matches]
|
||||
lengths = [len(text) for text in match_texts]
|
||||
|
||||
return {
|
||||
'total_matches': len(matches),
|
||||
'unique_matches': len(set(match_texts)),
|
||||
'average_length': sum(lengths) / len(lengths),
|
||||
'longest_match': max(match_texts, key=len),
|
||||
'shortest_match': min(match_texts, key=len),
|
||||
'positions': [(m['start'], m['end']) for m in matches]
|
||||
}
|
||||
|
||||
def common_patterns(self) -> Dict[str, str]:
|
||||
"""
|
||||
返回常用正则表达式模式
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: 常用模式字典
|
||||
"""
|
||||
return {
|
||||
'email': r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
|
||||
'url': r'https?://(?:[-\w.])+(?:[:\d]+)?(?:/(?:[\w/_.])*(?:\?(?:[\w&=%.])*)?(?:#(?:[\w.])*)?)?',
|
||||
'phone': r'(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}',
|
||||
'markdown_headers': r'^#{1,6}\s+(.+)$',
|
||||
'markdown_links': r'\[([^\]]+)\]\(([^)]+)\)',
|
||||
'markdown_images': r'!\[([^\]]*)\]\(([^)]+)\)',
|
||||
'markdown_bold': r'\*\*([^*]+)\*\*|__([^_]+)__',
|
||||
'markdown_italic': r'\*([^*]+)\*|_([^_]+)_',
|
||||
'code_blocks': r'```(\w*)\n(.*?)\n```',
|
||||
'inline_code': r'`([^`]+)`',
|
||||
'numbers': r'\d+',
|
||||
'words': r'\b\w+\b',
|
||||
'whitespace': r'\s+',
|
||||
'lines': r'.*(?:\n|$)',
|
||||
'markdown_tables': r'\|(.+)\|\n\|[-:\| ]+\|\n((?:\|.+\|\n?)*)',
|
||||
'markdown_lists': r'^(\s*)[-*+]\s+(.+)$',
|
||||
'markdown_quotes': r'^\s*>\s*(.+)',
|
||||
'html_tags': r'</?[a-zA-Z][^>]*\u003e',
|
||||
'ip_address': r'\b(?:\d{1,3}\.){3}\d{1,3}\b',
|
||||
'date_ymd': r'\d{4}-\d{2}-\d{2}',
|
||||
'date_dmy': r'\d{2}/\d{2}/\d{4}',
|
||||
'time_hms': r'\d{2}:\d{2}:\d{2}',
|
||||
'hex_color': r'#(?:[0-9a-fA-F]{3}){1,2}\b',
|
||||
'markdown_task_lists': r'^\s*[-*+]\s+\[([ x])\]\s+(.+)$'
|
||||
}
|
25
backend/workspace/test.md
Normal file
25
backend/workspace/test.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# 测试文档
|
||||
|
||||
这是一个**测试**Markdown文档,用于验证所有功能是否正常。
|
||||
|
||||
## 功能测试
|
||||
|
||||
- [x] Markdown解析
|
||||
- [x] 文件操作
|
||||
- [x] 正则表达式
|
||||
|
||||
### 代码块测试
|
||||
|
||||
|
||||
def hello_world():
|
||||
print('Hello, World\!')
|
||||
|
||||
hello_world()
|
||||
|
||||
### 数学公式测试
|
||||
|
||||
= mc^2$
|
||||
|
||||
> 引用测试
|
||||
|
||||
**粗体**和*斜体*测试。
|
Reference in New Issue
Block a user