mirror of
https://github.com/4jcraft/4jcraft.git
synced 2026-04-24 14:23:43 +00:00
183 lines
7 KiB
Python
183 lines
7 KiB
Python
#!/usr/bin/env python3
|
|
"""Heuristic dead-include detector for app/ includes in minecraft/.
|
|
|
|
For each minecraft/ source file that includes a header from app/, check
|
|
whether the file references any of the top-level identifiers that header
|
|
defines. If zero references, the include is a candidate for removal.
|
|
|
|
Usage:
|
|
python3 scripts/find_dead_app_includes.py [--apply] [DIR ...]
|
|
|
|
Without --apply, prints candidates only. With --apply, removes them.
|
|
DIR is one or more subdirectories of targets/minecraft/ to scope the
|
|
sweep (e.g. world/entity, server). Defaults to all of targets/minecraft/.
|
|
|
|
Caveats:
|
|
- The "identifiers a header defines" heuristic catches type names,
|
|
function names, struct/class/enum names, and macros. It can miss
|
|
constants used through unusual paths and is fooled by includes that
|
|
are needed only for transitive type completion. Always build clean
|
|
after applying.
|
|
- Comments and strings are not stripped from the consumer scan, so a
|
|
file that mentions an app symbol only in a comment will look "live"
|
|
and the include is conservatively kept.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import os
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
MINECRAFT_ROOT = REPO_ROOT / "targets" / "minecraft"
|
|
APP_ROOT = REPO_ROOT / "targets" / "app"
|
|
|
|
INCLUDE_RE = re.compile(r'^\s*#\s*include\s*"(app/[^"]+)"\s*$', re.MULTILINE)
|
|
|
|
# Identifier-extracting regexes for header analysis. Best-effort.
|
|
IDENT_RES = [
|
|
# class/struct/union/enum tag definitions
|
|
re.compile(r'\b(?:class|struct|union|enum(?:\s+class)?)\s+([A-Za-z_]\w*)'),
|
|
# typedef NAME or typedef ... NAME;
|
|
re.compile(r'\btypedef\b[^;]*?\b([A-Za-z_]\w*)\s*(?:\[|;)'),
|
|
# using NAME = ...
|
|
re.compile(r'\busing\s+([A-Za-z_]\w*)\s*='),
|
|
# function declarations: WORD WORD ( where second WORD is identifier
|
|
# this is too loose; skip in favour of usage by name
|
|
# #define MACRO
|
|
re.compile(r'^\s*#\s*define\s+([A-Za-z_]\w*)', re.MULTILINE),
|
|
# extern variable declarations
|
|
re.compile(r'\bextern\b[^;]*?\b([A-Za-z_]\w*)\s*[;\[(]'),
|
|
]
|
|
|
|
CXX_KEYWORDS = {
|
|
"if", "else", "while", "for", "do", "switch", "case", "default",
|
|
"break", "continue", "return", "void", "int", "char", "short", "long",
|
|
"float", "double", "bool", "true", "false", "nullptr", "class", "struct",
|
|
"union", "enum", "namespace", "using", "typedef", "template", "typename",
|
|
"const", "constexpr", "static", "extern", "inline", "virtual", "override",
|
|
"final", "public", "private", "protected", "friend", "this", "new",
|
|
"delete", "sizeof", "auto", "decltype", "operator", "throw", "try",
|
|
"catch", "noexcept", "mutable", "volatile", "register", "explicit",
|
|
"signed", "unsigned", "wchar_t", "char8_t", "char16_t", "char32_t",
|
|
"size_t", "ptrdiff_t", "nullptr_t", "ifndef", "ifdef", "endif", "define",
|
|
"include", "pragma", "elif", "error", "warning", "line", "undef",
|
|
"alignas", "alignof", "concept", "requires", "co_await", "co_yield",
|
|
"co_return", "consteval", "constinit", "static_cast", "dynamic_cast",
|
|
"reinterpret_cast", "const_cast",
|
|
}
|
|
|
|
|
|
def extract_header_identifiers(header_path: Path) -> set[str]:
|
|
"""Best-effort extraction of identifiers a header defines."""
|
|
if not header_path.exists():
|
|
return set()
|
|
try:
|
|
text = header_path.read_text(encoding="utf-8", errors="surrogateescape")
|
|
except OSError:
|
|
return set()
|
|
idents: set[str] = set()
|
|
for regex in IDENT_RES:
|
|
for match in regex.finditer(text):
|
|
name = match.group(1)
|
|
if name and name not in CXX_KEYWORDS and not name.startswith("_"):
|
|
idents.add(name)
|
|
return idents
|
|
|
|
|
|
def file_references_any(file_text: str, idents: set[str]) -> bool:
|
|
"""Check if any identifier appears as a whole-word match in the file."""
|
|
if not idents:
|
|
return False
|
|
# Build one big alternation
|
|
pattern = r'\b(?:' + '|'.join(re.escape(i) for i in idents) + r')\b'
|
|
return re.search(pattern, file_text) is not None
|
|
|
|
|
|
def collect_minecraft_files(roots: list[Path]) -> list[Path]:
|
|
files: list[Path] = []
|
|
for root in roots:
|
|
for dirpath, _dirnames, filenames in os.walk(root):
|
|
for name in filenames:
|
|
if name.endswith((".cpp", ".c", ".h", ".hpp")):
|
|
files.append(Path(dirpath) / name)
|
|
files.sort()
|
|
return files
|
|
|
|
|
|
def analyse(roots: list[Path], apply: bool) -> int:
|
|
files = collect_minecraft_files(roots)
|
|
header_cache: dict[str, set[str]] = {}
|
|
candidate_count = 0
|
|
|
|
for path in files:
|
|
try:
|
|
text = path.read_text(encoding="utf-8", errors="surrogateescape")
|
|
except OSError:
|
|
continue
|
|
includes = INCLUDE_RE.findall(text)
|
|
if not includes:
|
|
continue
|
|
# Strip the include lines from the text we scan for symbols, so we
|
|
# don't false-positive on the include path itself mentioning the
|
|
# symbol name (e.g. ColourTable.h).
|
|
scan_text = INCLUDE_RE.sub("", text)
|
|
dead_includes: list[str] = []
|
|
for include_path in includes:
|
|
cache_key = include_path
|
|
if cache_key not in header_cache:
|
|
header_path = REPO_ROOT / "targets" / include_path
|
|
header_cache[cache_key] = extract_header_identifiers(header_path)
|
|
idents = header_cache[cache_key]
|
|
if not idents:
|
|
# Header has no extractable identifiers (or doesn't exist).
|
|
# Conservatively skip - don't claim it's dead.
|
|
continue
|
|
if not file_references_any(scan_text, idents):
|
|
dead_includes.append(include_path)
|
|
if dead_includes:
|
|
candidate_count += len(dead_includes)
|
|
rel = path.relative_to(REPO_ROOT)
|
|
for inc in dead_includes:
|
|
print(f"{rel}: {inc}")
|
|
if apply:
|
|
new_text = text
|
|
for inc in dead_includes:
|
|
pattern = re.compile(
|
|
r'^\s*#\s*include\s*"' + re.escape(inc) + r'"\s*\n',
|
|
re.MULTILINE,
|
|
)
|
|
new_text = pattern.sub("", new_text)
|
|
path.write_text(new_text, encoding="utf-8", errors="surrogateescape")
|
|
|
|
print(f"\n{candidate_count} candidate dead include lines"
|
|
f" {'removed' if apply else 'identified'}")
|
|
return 0
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
parser.add_argument("--apply", action="store_true",
|
|
help="Actually remove the candidate includes")
|
|
parser.add_argument("dirs", nargs="*",
|
|
help="Subdirectories of targets/minecraft/ to scan")
|
|
args = parser.parse_args()
|
|
|
|
if args.dirs:
|
|
roots = [MINECRAFT_ROOT / d for d in args.dirs]
|
|
for r in roots:
|
|
if not r.exists():
|
|
print(f"error: {r} does not exist", file=sys.stderr)
|
|
return 1
|
|
else:
|
|
roots = [MINECRAFT_ROOT]
|
|
|
|
return analyse(roots, args.apply)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|