#!/usr/bin/env python3 """Heuristic dead-include detector for app/ includes in minecraft/. For each minecraft/ source file that includes a header from app/, check whether the file references any of the top-level identifiers that header defines. If zero references, the include is a candidate for removal. Usage: python3 scripts/find_dead_app_includes.py [--apply] [DIR ...] Without --apply, prints candidates only. With --apply, removes them. DIR is one or more subdirectories of targets/minecraft/ to scope the sweep (e.g. world/entity, server). Defaults to all of targets/minecraft/. Caveats: - The "identifiers a header defines" heuristic catches type names, function names, struct/class/enum names, and macros. It can miss constants used through unusual paths and is fooled by includes that are needed only for transitive type completion. Always build clean after applying. - Comments and strings are not stripped from the consumer scan, so a file that mentions an app symbol only in a comment will look "live" and the include is conservatively kept. """ from __future__ import annotations import argparse import os import re import sys from pathlib import Path REPO_ROOT = Path(__file__).resolve().parent.parent MINECRAFT_ROOT = REPO_ROOT / "targets" / "minecraft" APP_ROOT = REPO_ROOT / "targets" / "app" INCLUDE_RE = re.compile(r'^\s*#\s*include\s*"(app/[^"]+)"\s*$', re.MULTILINE) # Identifier-extracting regexes for header analysis. Best-effort. IDENT_RES = [ # class/struct/union/enum tag definitions re.compile(r'\b(?:class|struct|union|enum(?:\s+class)?)\s+([A-Za-z_]\w*)'), # typedef NAME or typedef ... NAME; re.compile(r'\btypedef\b[^;]*?\b([A-Za-z_]\w*)\s*(?:\[|;)'), # using NAME = ... re.compile(r'\busing\s+([A-Za-z_]\w*)\s*='), # function declarations: WORD WORD ( where second WORD is identifier # this is too loose; skip in favour of usage by name # #define MACRO re.compile(r'^\s*#\s*define\s+([A-Za-z_]\w*)', re.MULTILINE), # extern variable declarations re.compile(r'\bextern\b[^;]*?\b([A-Za-z_]\w*)\s*[;\[(]'), ] CXX_KEYWORDS = { "if", "else", "while", "for", "do", "switch", "case", "default", "break", "continue", "return", "void", "int", "char", "short", "long", "float", "double", "bool", "true", "false", "nullptr", "class", "struct", "union", "enum", "namespace", "using", "typedef", "template", "typename", "const", "constexpr", "static", "extern", "inline", "virtual", "override", "final", "public", "private", "protected", "friend", "this", "new", "delete", "sizeof", "auto", "decltype", "operator", "throw", "try", "catch", "noexcept", "mutable", "volatile", "register", "explicit", "signed", "unsigned", "wchar_t", "char8_t", "char16_t", "char32_t", "size_t", "ptrdiff_t", "nullptr_t", "ifndef", "ifdef", "endif", "define", "include", "pragma", "elif", "error", "warning", "line", "undef", "alignas", "alignof", "concept", "requires", "co_await", "co_yield", "co_return", "consteval", "constinit", "static_cast", "dynamic_cast", "reinterpret_cast", "const_cast", } def extract_header_identifiers(header_path: Path) -> set[str]: """Best-effort extraction of identifiers a header defines.""" if not header_path.exists(): return set() try: text = header_path.read_text(encoding="utf-8", errors="surrogateescape") except OSError: return set() idents: set[str] = set() for regex in IDENT_RES: for match in regex.finditer(text): name = match.group(1) if name and name not in CXX_KEYWORDS and not name.startswith("_"): idents.add(name) return idents def file_references_any(file_text: str, idents: set[str]) -> bool: """Check if any identifier appears as a whole-word match in the file.""" if not idents: return False # Build one big alternation pattern = r'\b(?:' + '|'.join(re.escape(i) for i in idents) + r')\b' return re.search(pattern, file_text) is not None def collect_minecraft_files(roots: list[Path]) -> list[Path]: files: list[Path] = [] for root in roots: for dirpath, _dirnames, filenames in os.walk(root): for name in filenames: if name.endswith((".cpp", ".c", ".h", ".hpp")): files.append(Path(dirpath) / name) files.sort() return files def analyse(roots: list[Path], apply: bool) -> int: files = collect_minecraft_files(roots) header_cache: dict[str, set[str]] = {} candidate_count = 0 for path in files: try: text = path.read_text(encoding="utf-8", errors="surrogateescape") except OSError: continue includes = INCLUDE_RE.findall(text) if not includes: continue # Strip the include lines from the text we scan for symbols, so we # don't false-positive on the include path itself mentioning the # symbol name (e.g. ColourTable.h). scan_text = INCLUDE_RE.sub("", text) dead_includes: list[str] = [] for include_path in includes: cache_key = include_path if cache_key not in header_cache: header_path = REPO_ROOT / "targets" / include_path header_cache[cache_key] = extract_header_identifiers(header_path) idents = header_cache[cache_key] if not idents: # Header has no extractable identifiers (or doesn't exist). # Conservatively skip - don't claim it's dead. continue if not file_references_any(scan_text, idents): dead_includes.append(include_path) if dead_includes: candidate_count += len(dead_includes) rel = path.relative_to(REPO_ROOT) for inc in dead_includes: print(f"{rel}: {inc}") if apply: new_text = text for inc in dead_includes: pattern = re.compile( r'^\s*#\s*include\s*"' + re.escape(inc) + r'"\s*\n', re.MULTILINE, ) new_text = pattern.sub("", new_text) path.write_text(new_text, encoding="utf-8", errors="surrogateescape") print(f"\n{candidate_count} candidate dead include lines" f" {'removed' if apply else 'identified'}") return 0 def main() -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--apply", action="store_true", help="Actually remove the candidate includes") parser.add_argument("dirs", nargs="*", help="Subdirectories of targets/minecraft/ to scan") args = parser.parse_args() if args.dirs: roots = [MINECRAFT_ROOT / d for d in args.dirs] for r in roots: if not r.exists(): print(f"error: {r} does not exist", file=sys.stderr) return 1 else: roots = [MINECRAFT_ROOT] return analyse(roots, args.apply) if __name__ == "__main__": sys.exit(main())