Source code for sphinx_source_tree

"""
sphinx-source-tree
==================
Generate a reStructuredText file containing an ASCII project tree
and ``literalinclude`` directives for every source file.

Reads defaults from ``[tool.sphinx-source-tree]`` in ``pyproject.toml``.
Per-file settings live under ``[[tool.sphinx-source-tree.files]]``.
CLI arguments always take precedence.
"""

from __future__ import annotations

import argparse
import fnmatch
import os
import sys
from pathlib import Path
from typing import Any

__title__ = "sphinx-source-tree"
__version__ = "0.2.3"
__author__ = "Artur Barseghyan <artur.barseghyan@gmail.com>"
__copyright__ = "2026 Artur Barseghyan"
__license__ = "MIT"
__all__ = (
    "build_parser",
    "build_tree",
    "collect_files",
    "detect_language",
    "generate",
    "load_config",
    "main",
    "resolve_config",
)

DEFAULTS: dict[str, Any] = {
    "project_root": ".",
    "depth": 10,
    "output": "docs/source_tree.rst",
    "extensions": [
        ".js",
        ".json",
        ".md",
        ".py",
        ".rst",
        ".toml",
        ".yaml",
        ".yml",
    ],
    "ignore": [
        "*.egg-info",
        "*.py,cover",
        "*.pyc",
        "*.pyo",
        ".DS_Store",
        ".coverage",
        ".coverage.*",
        ".git",
        ".hg",
        ".hypothesis",
        ".idea",
        ".mypy_cache",
        ".nox",
        ".pytest_cache",
        ".ruff_cache",
        ".secrets.baseline",
        ".svn",
        ".tox",
        ".venv",
        ".vscode",
        "LICENSE",
        "Thumbs.db",
        "__pycache__",
        "_static",
        "build",
        "dist",
        "env",
        "htmlcov",
        "node_modules",
        "venv",
    ],
    "whitelist": [],
    "include_all": True,
    "title": "Project source-tree",
    "linenos": False,
    "extra_languages": {},
    "file_options": {},
    "file_options_profiles": {},
    "file_options_profile": None,
    "order": [],
}

LANGUAGE_MAP: dict[str, str] = {
    ".py": "python",
    ".pyi": "python",
    ".pyx": "cython",
    ".js": "javascript",
    ".mjs": "javascript",
    ".ts": "typescript",
    ".tsx": "tsx",
    ".jsx": "jsx",
    ".java": "java",
    ".kt": "kotlin",
    ".md": "markdown",
    ".yaml": "yaml",
    ".yml": "yaml",
    ".json": "json",
    ".sh": "bash",
    ".bash": "bash",
    ".zsh": "bash",
    ".rst": "rst",
    ".toml": "toml",
    ".cfg": "ini",
    ".ini": "ini",
    ".html": "html",
    ".jinja": "jinja",
    ".jinja2": "jinja",
    ".css": "css",
    ".scss": "scss",
    ".sass": "sass",
    ".less": "less",
    ".sql": "sql",
    ".rb": "ruby",
    ".go": "go",
    ".rs": "rust",
    ".c": "c",
    ".cpp": "cpp",
    ".h": "c",
    ".hpp": "cpp",
    ".xml": "xml",
    ".r": "r",
    ".R": "r",
    ".lua": "lua",
    ".php": "php",
    ".swift": "swift",
    ".dockerfile": "dockerfile",
    ".tf": "hcl",
    ".graphql": "graphql",
    ".proto": "protobuf",
    ".makefile": "makefile",
}

# Valid per-file literalinclude options (subset that controls content range).
VALID_FILE_OPTIONS: frozenset[str] = frozenset(
    ["lines", "start-at", "start-after", "end-before", "end-at"]
)


# ── config ───────────────────────────────────────────────────────────


[docs] def load_config(project_root: Path) -> dict[str, Any]: """Load ``[tool.sphinx-source-tree]`` from *pyproject.toml*. Returns the full section dict, which may contain a ``files`` key (list of per-file override dicts) alongside top-level defaults. """ pyproject_path = project_root / "pyproject.toml" if not pyproject_path.is_file(): return {} try: if sys.version_info >= (3, 11): import tomllib else: import tomli as tomllib # type: ignore[no-redef] with open(pyproject_path, "rb") as fh: data = tomllib.load(fh) return data.get("tool", {}).get("sphinx-source-tree", {}) except Exception: return {}
def _normalise_keys(d: dict[str, Any]) -> dict[str, Any]: """Return a copy of *d* with hyphenated keys converted to underscores.""" return {k.replace("-", "_"): v for k, v in d.items()}
[docs] def resolve_config( cli_ns: argparse.Namespace, defaults: dict[str, Any] | None = None, ) -> dict[str, Any]: """Merge *defaults* < *pyproject.toml* < *CLI arguments*. Only CLI values that were explicitly provided (not ``None``) override. When ``[[tool.sphinx-source-tree.files]]`` entries are present the returned dict contains a ``"files"`` key: a list of fully-resolved per-file configs (each already merged with top-level pyproject defaults and CLI overrides). """ cfg = dict(defaults or DEFAULTS) # Determine project root first (needed to locate pyproject.toml) project_root = Path( cli_ns.project_root if cli_ns.project_root is not None else cfg.get("project_root", ".") ).resolve() # Layer 2: pyproject.toml top-level (exclude "files" – handled below) file_cfg = _normalise_keys(load_config(project_root)) per_file_entries: list[dict[str, Any]] = file_cfg.pop("files", []) cfg.update({k: v for k, v in file_cfg.items() if k != "project_root"}) # Layer 3: explicit CLI args cli_overrides = { k: v for k, v in vars(cli_ns).items() if v is not None and k != "project_root" } cfg.update(cli_overrides) cfg["project_root"] = str(project_root) # Build per-file configs: DEFAULTS < top-level pyproject < per-file entry # < CLI overrides if per_file_entries: resolved_files: list[dict[str, Any]] = [] for entry in per_file_entries: entry = _normalise_keys(entry) # Start from the already-merged top-level cfg (minus "files") file_resolved = dict(cfg) file_resolved.update(entry) # CLI always wins last file_resolved.update(cli_overrides) file_resolved["project_root"] = str(project_root) resolved_files.append(file_resolved) cfg["files"] = resolved_files return cfg
# ---------------------------------------------------------------------------- # helpers # ---------------------------------------------------------------------------- def _is_ignored(rel_path: str, name: str, patterns: list[str]) -> bool: """Match against both the full relative path and the bare name. For each pattern: - If it contains '/', match only against the full path (with wildcards allowed) - Otherwise, match against any path component (e.g., dir/file → matches name, or full path) """ # Normalize path separators to '/' rel_path = rel_path.replace(os.sep, "/") name_parts = rel_path.split("/") for pat in patterns: # Normalize pattern separators (e.g. "dir/*.pyc" → "dir/*.pyc") pat = pat.replace(os.sep, "/") # If pattern contains '/', treat as glob against *entire path* if "/" in pat: if fnmatch.fnmatch(rel_path, pat): return True else: # Otherwise, match against any path component (dir/file.py → # matches "file.py") # or match against the *relative path* (e.g., "__pycache__/foo" # matches "*__pycache__*") if any(fnmatch.fnmatch(part, pat) for part in name_parts): return True # Also try full path with glob: e.g. pat="*.pyc" should # match "foo.pyc" anywhere if fnmatch.fnmatch(rel_path, f"*{pat}*") or fnmatch.fnmatch( rel_path, f"*{pat}" ): return True return False def _matches_whitelist(rel_path: str, whitelist: list[str]) -> bool: for w in whitelist: w = w.strip("/") if rel_path == w or rel_path.startswith(w + "/"): return True return False def _should_show_dir(rel_path: str, whitelist: list[str]) -> bool: """True when the directory is whitelisted *or* is an ancestor of one.""" if _matches_whitelist(rel_path, whitelist): return True return any(w.strip("/").startswith(rel_path + "/") for w in whitelist) def _validate_file_options( options: dict[str, Any], source: str = "", ) -> dict[str, str]: """Return only the recognised inclusion-range options, coerced to strings. Unknown keys are silently dropped with a stderr warning. """ validated: dict[str, str] = {} for key, value in options.items(): normalised = key.replace("_", "-") if normalised in VALID_FILE_OPTIONS: validated[normalised] = str(value) else: label = f" for {source!r}" if source else "" print( f"Warning: unknown file option {key!r}{label} ignored. " f"Valid options: {sorted(VALID_FILE_OPTIONS)}", file=sys.stderr, ) return validated def _resolve_file_options_profile(cfg: dict[str, Any]) -> dict[str, Any]: """Return the effective ``file_options`` dict for *cfg*. Resolution order: 1. If ``file_options_profile`` names a key in ``file_options_profiles``, use that profile's mapping. 2. If ``file_options_profile`` is set but the name is not found in ``file_options_profiles``, emit a warning and fall back to step 3. 3. Use ``file_options`` directly (the default / top-level flat mapping). """ profiles: dict[str, Any] = cfg.get("file_options_profiles") or {} profile_name: str | None = cfg.get("file_options_profile") if profile_name is not None: if profile_name in profiles: return profiles[profile_name] print( f"Warning: file-options-profile {profile_name!r} not found in " f"file-options-profiles. " f"Available profiles: {sorted(profiles) or '(none)'}. " f"Falling back to top-level file-options.", file=sys.stderr, ) return cfg.get("file_options") or {} def _apply_order( files: list[Path], order: list[str], root: Path, ) -> list[Path]: """Return *files* reordered so that paths listed in *order* come first. Files named in *order* appear at the front in the specified sequence. Any *order* entry that does not match a collected file is silently skipped (the file may have been excluded by extension / ignore rules). The remaining files follow in their original (sorted) order. *order* entries are interpreted as paths relative to *root*. Absolute paths are also accepted and resolved relative to *root* automatically. """ if not order: return files # Build a lookup: relative-posix-path → Path object file_map: dict[str, Path] = { fp.relative_to(root).as_posix(): fp for fp in files } # Normalise each order entry to a relative posix key ordered_keys: list[str] = [] for entry in order: entry_path = Path(entry) if entry_path.is_absolute(): try: key = entry_path.relative_to(root).as_posix() except ValueError: key = entry_path.as_posix() else: key = Path(entry).as_posix() ordered_keys.append(key) # Collect pinned files (in order), then the rest pinned: list[Path] = [] for key in ordered_keys: if key in file_map: pinned.append(file_map[key]) else: print( f"Warning: order entry {key!r} does not match any collected " f"file and will be ignored.", file=sys.stderr, ) pinned_set = {fp.relative_to(root).as_posix() for fp in pinned} rest = [ fp for fp in files if fp.relative_to(root).as_posix() not in pinned_set ] return pinned + rest # ---------------------------------------------------------------------------- # Core API # ----------------------------------------------------------------------------
[docs] def detect_language( path: Path, extra: dict[str, str] | None = None, ) -> str: """Map a file suffix to its Sphinx highlight language string.""" merged = {**LANGUAGE_MAP, **(extra or {})} return merged.get(path.suffix, "")
[docs] def build_tree( path: Path, *, max_depth: int, ignore: list[str], whitelist: list[str], include_all: bool, root: Path, prefix: str = "", ) -> str: """Return an ASCII directory tree for *path* (recursive). Entries are filtered *before* connectors are assigned so that the last visible entry always receives ``└──``. """ if max_depth < 0: return "" entries = sorted( path.iterdir(), key=lambda p: (p.is_file(), p.name.lower()) ) visible: list[Path] = [] for entry in entries: rel = entry.relative_to(root).as_posix() if _is_ignored(rel, entry.name, ignore): continue if not include_all and whitelist: if entry.is_dir(): if not _should_show_dir(rel, whitelist): continue elif not _matches_whitelist(rel, whitelist): continue visible.append(entry) lines: list[str] = [] for idx, entry in enumerate(visible): is_last = idx == len(visible) - 1 connector = "\u2514\u2500\u2500 " if is_last else "\u251c\u2500\u2500 " lines.append(f"{prefix}{connector}{entry.name}") if entry.is_dir(): extension = " " if is_last else "\u2502 " sub = build_tree( entry, max_depth=max_depth - 1, ignore=ignore, whitelist=whitelist, include_all=include_all, root=root, prefix=prefix + extension, ) if sub: lines.extend(sub.splitlines()) return "\n".join(lines)
[docs] def collect_files( root: Path, *, extensions: list[str], ignore: list[str], whitelist: list[str], include_all: bool, ) -> list[Path]: """Return a sorted list of files eligible for ``literalinclude``.""" result: list[Path] = [] for fp in sorted(root.rglob("*")): if not fp.is_file() or fp.suffix not in extensions: continue rel = fp.relative_to(root).as_posix() if _is_ignored(rel, fp.name, ignore): continue if ( not include_all and whitelist and not _matches_whitelist(rel, whitelist) ): continue result.append(fp) return result
[docs] def generate( project_root: Path | str = ".", output: Path | str = "docs/source_tree.rst", *, depth: int = 10, extensions: list[str] | None = None, ignore: list[str] | None = None, whitelist: list[str] | None = None, include_all: bool = True, title: str = "Project source-tree", linenos: bool = False, extra_languages: dict[str, str] | None = None, file_options: dict[str, dict[str, Any]] | None = None, order: list[str] | None = None, ) -> str: """Build the full ``.rst`` document and return it as a string. Parameters ---------- project_root: Path to the project directory. output: Destination ``.rst`` path (used to compute relative ``literalinclude`` paths, **not** written by this function). depth: Maximum tree depth. extensions: File suffixes to include via ``literalinclude``. ignore: Glob patterns to skip (matched against both relative path and file name). whitelist: Directories to restrict to (ignored when *include_all* is true). include_all: Bypass the whitelist. title: RST section title. linenos: Add ``:linenos:`` to every ``literalinclude``. extra_languages: Additional ``{suffix: language}`` mappings merged on top of the built-in ``LANGUAGE_MAP``. file_options: Per-file ``literalinclude`` inclusion-range options. Keys are file paths relative to *project_root* (or absolute); values are dicts with any subset of: ``lines``, ``start-at``, ``start-after``, ``end-before``, ``end-at``. Example:: { "src/app.py": {"end-before": "# ===== Tests ====="}, "src/utils.py": {"lines": "1-40"}, } In ``pyproject.toml`` (top-level, or as the default profile):: [tool.sphinx-source-tree.file-options] "src/app.py" = {"end-before" = "# ===== Tests ====="} "src/utils.py" = {"lines" = "1-40"} Named profiles are defined under ``[tool.sphinx-source-tree.file-options-profiles.<name>]`` and selected per output file via ``file-options-profile``. When called directly the *file_options* argument already contains the resolved (profile-selected) mapping; profile resolution happens in ``_generate_from_cfg``. order: Explicit ordering for the ``literalinclude`` listing. Each element is a file path relative to *project_root* (absolute paths are also accepted). Files listed here appear **first**, in the given sequence; all remaining collected files follow in their default sorted order. Files not present in the collected set (e.g. excluded by extension or ignore rules) are silently skipped with a stderr warning. This option does **not** affect the ASCII directory tree — only the ``literalinclude`` blocks. Example in ``pyproject.toml``:: [tool.sphinx-source-tree] order = [ "README.rst", "pyproject.toml", "src/app.py", ] Or per ``[[files]]`` entry:: [[tool.sphinx-source-tree.files]] output = "docs/source_tree.rst" order = ["src/core.py", "src/utils.py"] """ root = Path(project_root).resolve() output_dir = Path(output).resolve().parent _extensions = ( extensions if extensions is not None else list(DEFAULTS["extensions"]) ) _ignore = ignore if ignore is not None else list(DEFAULTS["ignore"]) _whitelist = ( whitelist if whitelist is not None else list(DEFAULTS["whitelist"]) ) # Normalise file_options keys to relative-posix strings _file_options: dict[str, dict[str, str]] = {} for key, opts in (file_options or {}).items(): key_path = Path(key) if key_path.is_absolute(): try: rel_key = key_path.relative_to(root).as_posix() except ValueError: rel_key = key_path.as_posix() else: rel_key = Path(key).as_posix() _file_options[rel_key] = _validate_file_options(opts, source=key) underline = "=" * len(title) header = ( f"{title}\n" f"{underline}\n" f"\n" f"Below is the layout of the project (to {depth} levels), " f"followed by\nthe contents of each key file.\n" f"\n" f".. code-block:: text\n" f" :caption: Project directory layout\n" f"\n" f" {root.name}/" ) tree = build_tree( root, max_depth=depth, ignore=_ignore, whitelist=_whitelist, include_all=include_all, root=root, prefix=" ", ) parts: list[str] = [header, tree, ""] files = collect_files( root, extensions=_extensions, ignore=_ignore, whitelist=_whitelist, include_all=include_all, ) # Apply explicit ordering (only affects literalinclude listing) files = _apply_order(files, order or [], root) for fp in files: rel = fp.relative_to(root).as_posix() include_path = os.path.relpath(fp, output_dir).replace(os.sep, "/") lang = detect_language(fp, extra_languages) section_underline = "-" * len(rel) block: list[str] = [ rel, section_underline, "", f".. literalinclude:: {include_path}", ] if lang: block.append(f" :language: {lang}") block.append(f" :caption: {rel}") if linenos: block.append(" :linenos:") # Append any per-file inclusion-range options for opt_key, opt_val in _file_options.get(rel, {}).items(): block.append(f" :{opt_key}: {opt_val}") block.append("") parts.extend(block) return "\n".join(parts)
def _generate_from_cfg(cfg: dict[str, Any]) -> str: """Call ``generate()`` using a resolved config dict.""" return generate( project_root=cfg["project_root"], output=cfg.get("output", DEFAULTS["output"]), depth=cfg.get("depth", DEFAULTS["depth"]), extensions=cfg.get("extensions"), ignore=cfg.get("ignore"), whitelist=cfg.get("whitelist"), include_all=cfg.get("include_all", DEFAULTS["include_all"]), title=cfg.get("title", DEFAULTS["title"]), linenos=cfg.get("linenos", DEFAULTS["linenos"]), extra_languages=cfg.get("extra_languages"), file_options=_resolve_file_options_profile(cfg), order=cfg.get("order"), ) def _write_output(content: str, out_path: Path) -> None: """Write *content* to *out_path*, creating parent directories as needed.""" if not out_path.is_absolute(): out_path = Path.cwd() / out_path out_path = out_path.resolve() out_path.parent.mkdir(parents=True, exist_ok=True) out_path.write_text(content, encoding="utf-8") print(f"Wrote {out_path}") # ---------------------------------------------------------------------------- # CLI # ----------------------------------------------------------------------------
[docs] def build_parser() -> argparse.ArgumentParser: """Create the argument parser (exposed for documentation / testing).""" p = argparse.ArgumentParser( prog="sphinx-source-tree", description=( "Generate a .rst file with an ASCII project tree " "and literalinclude blocks for every source file." ), ) p.add_argument( "-V", "--version", action="version", version=f"%(prog)s {__version__}", ) p.add_argument( "-p", "--project-root", type=Path, default=None, help="Project directory (default: .)", ) p.add_argument( "-d", "--depth", type=int, default=None, help="Max tree depth (default: 10)", ) p.add_argument( "-o", "--output", default=None, help="Output .rst path (default: docs/source_tree.rst)", ) p.add_argument( "-e", "--extensions", nargs="+", default=None, metavar="EXT", help="File extensions to include (default: .py .md .js .rst)", ) p.add_argument( "-i", "--ignore", nargs="+", default=None, metavar="PAT", help="Glob patterns to ignore", ) p.add_argument( "-w", "--whitelist", nargs="+", default=None, metavar="DIR", help="Only include these directories (ignored when --include-all)", ) p.add_argument( "--include-all", action=argparse.BooleanOptionalAction, default=None, help="Include everything regardless of whitelist", ) p.add_argument( "-t", "--title", default=None, help='RST section title (default: "Project source-tree")', ) p.add_argument( "--linenos", action=argparse.BooleanOptionalAction, default=None, help="Add :linenos: to literalinclude directives", ) p.add_argument( "--order", nargs="+", default=None, metavar="PATH", help=( "Explicit file ordering for literalinclude listing. " "Listed files appear first, in the given sequence; " "remaining files follow in default sorted order. " "Does not affect the directory tree." ), ) p.add_argument( "--stdout", action="store_true", default=None, help="Print to stdout instead of writing to a file", ) return p
[docs] def main(argv: list[str] | None = None) -> None: """Entry point for the ``sphinx-source-tree`` command.""" parser = build_parser() args = parser.parse_args(argv) stdout = args.stdout delattr(args, "stdout") cfg = resolve_config(args) per_file_cfgs: list[dict[str, Any]] = cfg.get("files", []) if per_file_cfgs: # Multi-file mode: generate one RST per [[files]] entry. # --stdout emits all files concatenated to stdout. for file_cfg in per_file_cfgs: content = _generate_from_cfg(file_cfg) if stdout: sys.stdout.write(content) else: _write_output( content, Path(file_cfg.get("output", DEFAULTS["output"])), ) else: # Single-file mode (original behaviour). content = _generate_from_cfg(cfg) if stdout: sys.stdout.write(content) else: _write_output( content, Path(cfg.get("output", DEFAULTS["output"])), )
if __name__ == "__main__": main()