# Ultralytics š AGPL-3.0 License - https://ultralytics.com/license """ Automates building and post-processing of MkDocs documentation, especially for multilingual projects. This script streamlines generating localized documentation and updating HTML links for correct formatting. Key Features: - Automated building of MkDocs documentation: Compiles main documentation and localized versions from separate MkDocs configuration files. - Post-processing of generated HTML files: Updates HTML files to remove '.md' from internal links, ensuring correct navigation in web-based documentation. Usage: - Run from the root directory of your MkDocs project. - Ensure MkDocs is installed and configuration files (main and localized) are present. - The script builds documentation using MkDocs, then scans HTML files in 'site' to update links. - Ideal for projects with Markdown documentation served as a static website. Note: - Requires Python and MkDocs to be installed and configured. """ from __future__ import annotations import os import re import shutil import subprocess import sys import tempfile import time from pathlib import Path import yaml from bs4 import BeautifulSoup from minijinja import Environment, load_from_path try: from plugin import postprocess_site # mkdocs-ultralytics-plugin except ImportError: postprocess_site = None from build_reference import build_reference_docs from ultralytics.utils import LINUX, LOGGER, MACOS from ultralytics.utils.tqdm import TQDM os.environ["JUPYTER_PLATFORM_DIRS"] = "1" # fix DeprecationWarning: Jupyter is migrating to use standard platformdirs DOCS = Path(__file__).parent.resolve() SITE = DOCS.parent / "site" LINK_PATTERN = re.compile(r"(https?://[^\s()<>]*[^\s()<>.,:;!?\'\"])") TITLE_PATTERN = re.compile(r"
max_title_length and "-" in title_tag.text:
title_tag.string = title_tag.text.rsplit("-", 1)[0].strip()
modified = True
# Find the main content area
main_content = soup.find("main") or soup.find("div", class_="md-content")
if not main_content:
return str(soup) if modified else content
# Convert plaintext links to HTML hyperlinks
if needs_link_conversion:
for paragraph in main_content.select("p, li"):
for text_node in paragraph.find_all(string=True, recursive=False):
if text_node.parent.name not in {"a", "code"}:
new_text = LINK_PATTERN.sub(r'\1', str(text_node))
if " 0:
tail = " "
if tail:
span.insert_after(tail)
modified = True
if "reference" in rel_path:
highlight_labels(soup.select("main h1, main h2, main h3, main h4, main h5"))
highlight_labels(soup.select("nav.md-nav--secondary .md-ellipsis, nav.md-nav__list .md-ellipsis"))
if "reference" in rel_path:
for ellipsis in soup.select("nav.md-nav--secondary .md-ellipsis"):
kind = ellipsis.find(class_=lambda c: c and "doc-kind" in c.split())
text = str(kind.next_sibling).strip() if kind and kind.next_sibling else ellipsis.get_text(strip=True)
if "." not in text:
continue
ellipsis.clear()
short = text.rsplit(".", 1)[-1]
if kind:
ellipsis.append(kind)
ellipsis.append(f" {short}")
else:
ellipsis.append(short)
modified = True
if needs_kind_highlight and not modified and soup.select(".doc-kind"):
# Ensure style injection when pre-existing badges are present
modified = True
if modified:
head = soup.find("head")
if head and not soup.select("style[data-doc-kind]"):
style = soup.new_tag("style", attrs={"data-doc-kind": "true"})
style.string = (
".doc-kind{display:inline-flex;align-items:center;gap:0.25em;padding:0.21em 0.59em;border-radius:999px;"
"font-weight:700;font-size:0.81em;letter-spacing:0.06em;text-transform:uppercase;"
"line-height:1;color:var(--doc-kind-color,#f8fafc);"
"background:var(--doc-kind-bg,rgba(255,255,255,0.12));}"
f".doc-kind-class{{--doc-kind-color:{DOC_KIND_COLORS['Class']};--doc-kind-bg:rgba(3,157,252,0.22);}}"
f".doc-kind-function{{--doc-kind-color:{DOC_KIND_COLORS['Function']};--doc-kind-bg:rgba(252,152,3,0.22);}}"
f".doc-kind-method{{--doc-kind-color:{DOC_KIND_COLORS['Method']};--doc-kind-bg:rgba(239,94,255,0.22);}}"
f".doc-kind-property{{--doc-kind-color:{DOC_KIND_COLORS['Property']};--doc-kind-bg:rgba(2,232,53,0.22);}}"
)
head.append(style)
return str(soup) if modified else content
def _rewrite_md_links(content: str) -> str:
"""Replace .md references with trailing slashes in HTML content, skipping GitHub links."""
if ".md" not in content:
return content
lines = []
for line in content.split("\n"):
if "github.com" not in line:
line = line.replace("index.md", "")
line = MD_LINK_PATTERN.sub(r"\1\2/\3", line)
lines.append(line)
return "\n".join(lines)
# Precompiled regex patterns for minification
HTML_COMMENT = re.compile(r"")
HTML_PRESERVE = re.compile(r"<(pre|code|textarea|script)[^>]*>[\s\S]*?\1>", re.IGNORECASE)
HTML_TAG_SPACE = re.compile(r">\s+<")
HTML_MULTI_SPACE = re.compile(r"\s{2,}")
HTML_EMPTY_LINE = re.compile(r"^\s*$\n", re.MULTILINE)
CSS_COMMENT = re.compile(r"/\*[\s\S]*?\*/")
def remove_comments_and_empty_lines(content: str, file_type: str) -> str:
"""Remove comments and empty lines from a string of code, preserving newlines and URLs.
Args:
content (str): Code content to process.
file_type (str): Type of file ('html', 'css', or 'js').
Returns:
(str): Cleaned content with comments and empty lines removed.
Notes:
Typical reductions for Ultralytics Docs are:
- Total HTML reduction: 2.83% (1301.56 KB saved)
- Total CSS reduction: 1.75% (2.61 KB saved)
- Total JS reduction: 13.51% (99.31 KB saved)
"""
if file_type == "html":
content = HTML_COMMENT.sub("", content) # Remove HTML comments
# Preserve whitespace in ,
,