feat:自动剥离大模型返回的markdown代码围栏
This commit is contained in:
@@ -76,6 +76,7 @@ class Settings:
|
||||
max_html_length = max(1024, _get_int_env("MAX_HTML_LENGTH", 200_000))
|
||||
api_key = os.getenv("API_KEY", "").strip()
|
||||
allow_unsafe_html = _get_bool_env("ALLOW_UNSAFE_HTML", False)
|
||||
strip_markdown_code_fence = _get_bool_env("STRIP_MARKDOWN_CODE_FENCE", True)
|
||||
enable_request_debug_log = _get_bool_env("ENABLE_REQUEST_DEBUG_LOG", True)
|
||||
request_log_max_chars = max(256, _get_int_env("REQUEST_LOG_MAX_CHARS", 10_000))
|
||||
|
||||
|
||||
@@ -48,6 +48,12 @@ CONTENT_SECURITY_POLICY = "; ".join(
|
||||
]
|
||||
)
|
||||
|
||||
MARKDOWN_CODE_FENCE_PATTERN = re.compile(
|
||||
r"^\s*```(?P<language>[a-zA-Z0-9_+-]*)[^\S\r\n]*\r?\n"
|
||||
r"(?P<content>[\s\S]*?)"
|
||||
r"\r?\n```\s*$"
|
||||
)
|
||||
|
||||
|
||||
def build_response_headers() -> dict[str, str]:
|
||||
headers = {
|
||||
@@ -61,6 +67,32 @@ def build_response_headers() -> dict[str, str]:
|
||||
return headers
|
||||
|
||||
|
||||
def strip_outer_markdown_code_fence(
|
||||
raw_content: str,
|
||||
preserve_markdown_fence: bool,
|
||||
) -> str:
|
||||
if preserve_markdown_fence or not settings.strip_markdown_code_fence:
|
||||
return raw_content
|
||||
|
||||
match = MARKDOWN_CODE_FENCE_PATTERN.match(raw_content)
|
||||
if not match:
|
||||
return raw_content
|
||||
|
||||
language = match.group("language").strip().lower()
|
||||
content = match.group("content")
|
||||
looks_like_html = bool(
|
||||
re.search(r"<!doctype\s+html|<html\b|<[a-z][\w:-]*\b", content, re.IGNORECASE)
|
||||
)
|
||||
|
||||
if language and language not in {"html", "htm", "xhtml", "xml"}:
|
||||
return raw_content
|
||||
|
||||
if not language and not looks_like_html:
|
||||
return raw_content
|
||||
|
||||
return content
|
||||
|
||||
|
||||
def require_api_key(x_api_key: str | None = Header(default=None, alias="X-API-Key")) -> None:
|
||||
if not settings.api_key:
|
||||
return
|
||||
@@ -261,7 +293,11 @@ def generate_html(
|
||||
unique_id = generate_unique_id(db)
|
||||
html_filename = f"{unique_id}.html"
|
||||
html_path = settings.html_storage_dir / html_filename
|
||||
html_document = build_html_document(request.html_content, request.title)
|
||||
normalized_html_content = strip_outer_markdown_code_fence(
|
||||
request.html_content,
|
||||
preserve_markdown_fence=request.preserve_markdown_fence,
|
||||
)
|
||||
html_document = build_html_document(normalized_html_content, request.title)
|
||||
expires_at = datetime.utcnow() + timedelta(
|
||||
days=request.ttl_days or settings.default_retention_days
|
||||
)
|
||||
|
||||
@@ -30,6 +30,13 @@ class HTMLGenerateRequest(BaseModel):
|
||||
ge=1,
|
||||
description="Optional retention days for the file.",
|
||||
)
|
||||
preserve_markdown_fence: bool = Field(
|
||||
default=False,
|
||||
description=(
|
||||
"When true, keeps outer Markdown code fences instead of stripping them "
|
||||
"from the HTML payload."
|
||||
),
|
||||
)
|
||||
|
||||
@root_validator(pre=True)
|
||||
def normalize_aliases(cls, values: dict) -> dict:
|
||||
@@ -73,6 +80,17 @@ class HTMLGenerateRequest(BaseModel):
|
||||
normalized = value.strip()
|
||||
return normalized or None
|
||||
|
||||
@validator("preserve_markdown_fence", pre=True)
|
||||
def normalize_preserve_markdown_fence(cls, value: bool | str | None) -> bool:
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
if value is None:
|
||||
return False
|
||||
if isinstance(value, str):
|
||||
return value.strip().lower() in {"1", "true", "yes", "on"}
|
||||
|
||||
return bool(value)
|
||||
|
||||
@validator("ttl_days")
|
||||
def validate_ttl_days(cls, value: int | None) -> int | None:
|
||||
if value is None:
|
||||
|
||||
Reference in New Issue
Block a user