feat:自动剥离大模型返回的markdown代码围栏

This commit is contained in:
ZhangYonghao
2026-03-22 00:43:47 +08:00
parent da15500808
commit bb630358af
6 changed files with 93 additions and 1 deletions

View File

@@ -48,6 +48,12 @@ CONTENT_SECURITY_POLICY = "; ".join(
]
)
MARKDOWN_CODE_FENCE_PATTERN = re.compile(
r"^\s*```(?P<language>[a-zA-Z0-9_+-]*)[^\S\r\n]*\r?\n"
r"(?P<content>[\s\S]*?)"
r"\r?\n```\s*$"
)
def build_response_headers() -> dict[str, str]:
headers = {
@@ -61,6 +67,32 @@ def build_response_headers() -> dict[str, str]:
return headers
def strip_outer_markdown_code_fence(
raw_content: str,
preserve_markdown_fence: bool,
) -> str:
if preserve_markdown_fence or not settings.strip_markdown_code_fence:
return raw_content
match = MARKDOWN_CODE_FENCE_PATTERN.match(raw_content)
if not match:
return raw_content
language = match.group("language").strip().lower()
content = match.group("content")
looks_like_html = bool(
re.search(r"<!doctype\s+html|<html\b|<[a-z][\w:-]*\b", content, re.IGNORECASE)
)
if language and language not in {"html", "htm", "xhtml", "xml"}:
return raw_content
if not language and not looks_like_html:
return raw_content
return content
def require_api_key(x_api_key: str | None = Header(default=None, alias="X-API-Key")) -> None:
if not settings.api_key:
return
@@ -261,7 +293,11 @@ def generate_html(
unique_id = generate_unique_id(db)
html_filename = f"{unique_id}.html"
html_path = settings.html_storage_dir / html_filename
html_document = build_html_document(request.html_content, request.title)
normalized_html_content = strip_outer_markdown_code_fence(
request.html_content,
preserve_markdown_fence=request.preserve_markdown_fence,
)
html_document = build_html_document(normalized_html_content, request.title)
expires_at = datetime.utcnow() + timedelta(
days=request.ttl_days or settings.default_retention_days
)