# Ultralytics šŸš€ AGPL-3.0 License - https://ultralytics.com/license """ Automates building and post-processing of MkDocs documentation, especially for multilingual projects. This script streamlines generating localized documentation and updating HTML links for correct formatting. Key Features: - Automated building of MkDocs documentation: Compiles main documentation and localized versions from separate MkDocs configuration files. - Post-processing of generated HTML files: Updates HTML files to remove '.md' from internal links, ensuring correct navigation in web-based documentation. Usage: - Run from the root directory of your MkDocs project. - Ensure MkDocs is installed and configuration files (main and localized) are present. - The script builds documentation using MkDocs, then scans HTML files in 'site' to update links. - Ideal for projects with Markdown documentation served as a static website. Note: - Requires Python and MkDocs to be installed and configured. """ from __future__ import annotations import os import re import shutil import subprocess import sys import tempfile import time from pathlib import Path import yaml from bs4 import BeautifulSoup from minijinja import Environment, load_from_path try: from plugin import postprocess_site # mkdocs-ultralytics-plugin except ImportError: postprocess_site = None from build_reference import build_reference_docs from ultralytics.utils import LINUX, LOGGER, MACOS from ultralytics.utils.tqdm import TQDM os.environ["JUPYTER_PLATFORM_DIRS"] = "1" # fix DeprecationWarning: Jupyter is migrating to use standard platformdirs DOCS = Path(__file__).parent.resolve() SITE = DOCS.parent / "site" LINK_PATTERN = re.compile(r"(https?://[^\s()<>]*[^\s()<>.,:;!?\'\"])") TITLE_PATTERN = re.compile(r"(.*?)", flags=re.IGNORECASE | re.DOTALL) MD_LINK_PATTERN = re.compile(r'(["\']?)([^"\'>\s]+?)\.md(["\']?)') DOC_KIND_LABELS = {"Class", "Function", "Method", "Property"} DOC_KIND_COLORS = { "Class": "#039dfc", # blue "Method": "#ef5eff", # magenta "Function": "#fc9803", # orange "Property": "#02e835", # green } def prepare_docs_markdown(clone_repos: bool = True): """Build docs using mkdocs.""" LOGGER.info("Removing existing build artifacts") shutil.rmtree(SITE, ignore_errors=True) shutil.rmtree(DOCS / "repos", ignore_errors=True) if clone_repos: # Get docs repo repo = "https://github.com/ultralytics/docs" local_dir = DOCS / "repos" / Path(repo).name subprocess.run( ["git", "clone", "-q", "--depth=1", "--single-branch", "-b", "main", repo, str(local_dir)], check=True ) shutil.rmtree(DOCS / "en/compare", ignore_errors=True) # delete if exists shutil.copytree(local_dir / "docs/en/compare", DOCS / "en/compare") # for docs LOGGER.info(f"Cloned/Updated {repo} in {local_dir}") # Add frontmatter for file in TQDM((DOCS / "en").rglob("*.md"), desc="Adding frontmatter"): update_markdown_files(file) def update_markdown_files(md_filepath: Path): """Create or update a Markdown file, ensuring frontmatter is present.""" if md_filepath.exists(): content = md_filepath.read_text().strip() # Replace apostrophes content = content.replace("ā€˜", "'").replace("’", "'") # Add frontmatter if missing if not content.strip().startswith("---\n"): header = "---\ncomments: true\ndescription: TODO ADD DESCRIPTION\nkeywords: TODO ADD KEYWORDS\n---\n\n" content = header + content # Ensure MkDocs admonitions "=== " lines are preceded and followed by empty newlines lines = content.split("\n") new_lines = [] for i, line in enumerate(lines): stripped_line = line.strip() if stripped_line.startswith("=== "): if i > 0 and new_lines[-1] != "": new_lines.append("") new_lines.append(line) if i < len(lines) - 1 and lines[i + 1].strip() != "": new_lines.append("") else: new_lines.append(line) content = "\n".join(new_lines) # Add EOF newline if missing if not content.endswith("\n"): content += "\n" # Save page md_filepath.write_text(content) return def update_docs_html(): """Update titles, edit links, and convert plaintext links in HTML documentation in one pass.""" from concurrent.futures import ProcessPoolExecutor html_files = list(SITE.rglob("*.html")) if not html_files: LOGGER.info("Updated HTML files: 0") return desc = f"Updating HTML at {SITE}" max_workers = os.cpu_count() or 1 with ProcessPoolExecutor(max_workers=max_workers) as executor: pbar = TQDM(executor.map(_process_html_file, html_files), total=len(html_files), desc=desc) updated = 0 for res in pbar: updated += bool(res) pbar.set_description(f"{desc} ({updated}/{len(html_files)} updated)") def _process_html_file(html_file: Path) -> bool: """Process a single HTML file; returns True if modified.""" try: content = html_file.read_text(encoding="utf-8") except Exception as e: LOGGER.warning(f"Could not read {html_file}: {e}") return False changed = False try: rel_path = html_file.relative_to(SITE).as_posix() except ValueError: rel_path = html_file.name # For pages sourced from external repos (compare), drop edit/copy buttons to avoid wrong links if rel_path.startswith("compare/"): before = content content = re.sub( r']*class="[^"]*md-content__button[^"]*"[^>]*>.*?', "", content, flags=re.IGNORECASE | re.DOTALL, ) if content != before: changed = True if rel_path == "404.html": new_content = re.sub(r".*?", "Ultralytics Docs - Not Found", content) if new_content != content: content, changed = new_content, True new_content = update_docs_soup(content, html_file=html_file) if new_content != content: content, changed = new_content, True new_content = _rewrite_md_links(content) if new_content != content: content, changed = new_content, True if changed: try: html_file.write_text(content, encoding="utf-8") return True except Exception as e: LOGGER.warning(f"Could not write {html_file}: {e}") return False def update_docs_soup(content: str, html_file: Path | None = None, max_title_length: int = 70) -> str: """Convert plaintext links to HTML hyperlinks, truncate long meta titles, and remove code line hrefs.""" title_match = TITLE_PATTERN.search(content) needs_title_trim = bool( title_match and len(title_match.group(1)) > max_title_length and "-" in title_match.group(1) ) needs_link_conversion = (" max_title_length and "-" in title_tag.text: title_tag.string = title_tag.text.rsplit("-", 1)[0].strip() modified = True # Find the main content area main_content = soup.find("main") or soup.find("div", class_="md-content") if not main_content: return str(soup) if modified else content # Convert plaintext links to HTML hyperlinks if needs_link_conversion: for paragraph in main_content.select("p, li"): for text_node in paragraph.find_all(string=True, recursive=False): if text_node.parent.name not in {"a", "code"}: new_text = LINK_PATTERN.sub(r'\1', str(text_node)) if " 0: tail = " " if tail: span.insert_after(tail) modified = True if "reference" in rel_path: highlight_labels(soup.select("main h1, main h2, main h3, main h4, main h5")) highlight_labels(soup.select("nav.md-nav--secondary .md-ellipsis, nav.md-nav__list .md-ellipsis")) if "reference" in rel_path: for ellipsis in soup.select("nav.md-nav--secondary .md-ellipsis"): kind = ellipsis.find(class_=lambda c: c and "doc-kind" in c.split()) text = str(kind.next_sibling).strip() if kind and kind.next_sibling else ellipsis.get_text(strip=True) if "." not in text: continue ellipsis.clear() short = text.rsplit(".", 1)[-1] if kind: ellipsis.append(kind) ellipsis.append(f" {short}") else: ellipsis.append(short) modified = True if needs_kind_highlight and not modified and soup.select(".doc-kind"): # Ensure style injection when pre-existing badges are present modified = True if modified: head = soup.find("head") if head and not soup.select("style[data-doc-kind]"): style = soup.new_tag("style", attrs={"data-doc-kind": "true"}) style.string = ( ".doc-kind{display:inline-flex;align-items:center;gap:0.25em;padding:0.21em 0.59em;border-radius:999px;" "font-weight:700;font-size:0.81em;letter-spacing:0.06em;text-transform:uppercase;" "line-height:1;color:var(--doc-kind-color,#f8fafc);" "background:var(--doc-kind-bg,rgba(255,255,255,0.12));}" f".doc-kind-class{{--doc-kind-color:{DOC_KIND_COLORS['Class']};--doc-kind-bg:rgba(3,157,252,0.22);}}" f".doc-kind-function{{--doc-kind-color:{DOC_KIND_COLORS['Function']};--doc-kind-bg:rgba(252,152,3,0.22);}}" f".doc-kind-method{{--doc-kind-color:{DOC_KIND_COLORS['Method']};--doc-kind-bg:rgba(239,94,255,0.22);}}" f".doc-kind-property{{--doc-kind-color:{DOC_KIND_COLORS['Property']};--doc-kind-bg:rgba(2,232,53,0.22);}}" ) head.append(style) return str(soup) if modified else content def _rewrite_md_links(content: str) -> str: """Replace .md references with trailing slashes in HTML content, skipping GitHub links.""" if ".md" not in content: return content lines = [] for line in content.split("\n"): if "github.com" not in line: line = line.replace("index.md", "") line = MD_LINK_PATTERN.sub(r"\1\2/\3", line) lines.append(line) return "\n".join(lines) # Precompiled regex patterns for minification HTML_COMMENT = re.compile(r"") HTML_PRESERVE = re.compile(r"<(pre|code|textarea|script)[^>]*>[\s\S]*?", re.IGNORECASE) HTML_TAG_SPACE = re.compile(r">\s+<") HTML_MULTI_SPACE = re.compile(r"\s{2,}") HTML_EMPTY_LINE = re.compile(r"^\s*$\n", re.MULTILINE) CSS_COMMENT = re.compile(r"/\*[\s\S]*?\*/") def remove_comments_and_empty_lines(content: str, file_type: str) -> str: """Remove comments and empty lines from a string of code, preserving newlines and URLs. Args: content (str): Code content to process. file_type (str): Type of file ('html', 'css', or 'js'). Returns: (str): Cleaned content with comments and empty lines removed. Notes: Typical reductions for Ultralytics Docs are: - Total HTML reduction: 2.83% (1301.56 KB saved) - Total CSS reduction: 1.75% (2.61 KB saved) - Total JS reduction: 13.51% (99.31 KB saved) """ if file_type == "html": content = HTML_COMMENT.sub("", content) # Remove HTML comments # Preserve whitespace in
, ,