"""
sphinx-source-tree
==================
Generate a reStructuredText file containing an ASCII project tree
and ``literalinclude`` directives for every source file.
Reads defaults from ``[tool.sphinx-source-tree]`` in ``pyproject.toml``.
Per-file settings live under ``[[tool.sphinx-source-tree.files]]``.
CLI arguments always take precedence.
"""
from __future__ import annotations
import argparse
import fnmatch
import os
import sys
from pathlib import Path
from typing import Any
__title__ = "sphinx-source-tree"
__version__ = "0.2.3"
__author__ = "Artur Barseghyan <artur.barseghyan@gmail.com>"
__copyright__ = "2026 Artur Barseghyan"
__license__ = "MIT"
__all__ = (
"build_parser",
"build_tree",
"collect_files",
"detect_language",
"generate",
"load_config",
"main",
"resolve_config",
)
DEFAULTS: dict[str, Any] = {
"project_root": ".",
"depth": 10,
"output": "docs/source_tree.rst",
"extensions": [
".js",
".json",
".md",
".py",
".rst",
".toml",
".yaml",
".yml",
],
"ignore": [
"*.egg-info",
"*.py,cover",
"*.pyc",
"*.pyo",
".DS_Store",
".coverage",
".coverage.*",
".git",
".hg",
".hypothesis",
".idea",
".mypy_cache",
".nox",
".pytest_cache",
".ruff_cache",
".secrets.baseline",
".svn",
".tox",
".venv",
".vscode",
"LICENSE",
"Thumbs.db",
"__pycache__",
"_static",
"build",
"dist",
"env",
"htmlcov",
"node_modules",
"venv",
],
"whitelist": [],
"include_all": True,
"title": "Project source-tree",
"linenos": False,
"extra_languages": {},
"file_options": {},
"file_options_profiles": {},
"file_options_profile": None,
"order": [],
}
LANGUAGE_MAP: dict[str, str] = {
".py": "python",
".pyi": "python",
".pyx": "cython",
".js": "javascript",
".mjs": "javascript",
".ts": "typescript",
".tsx": "tsx",
".jsx": "jsx",
".java": "java",
".kt": "kotlin",
".md": "markdown",
".yaml": "yaml",
".yml": "yaml",
".json": "json",
".sh": "bash",
".bash": "bash",
".zsh": "bash",
".rst": "rst",
".toml": "toml",
".cfg": "ini",
".ini": "ini",
".html": "html",
".jinja": "jinja",
".jinja2": "jinja",
".css": "css",
".scss": "scss",
".sass": "sass",
".less": "less",
".sql": "sql",
".rb": "ruby",
".go": "go",
".rs": "rust",
".c": "c",
".cpp": "cpp",
".h": "c",
".hpp": "cpp",
".xml": "xml",
".r": "r",
".R": "r",
".lua": "lua",
".php": "php",
".swift": "swift",
".dockerfile": "dockerfile",
".tf": "hcl",
".graphql": "graphql",
".proto": "protobuf",
".makefile": "makefile",
}
# Valid per-file literalinclude options (subset that controls content range).
VALID_FILE_OPTIONS: frozenset[str] = frozenset(
["lines", "start-at", "start-after", "end-before", "end-at"]
)
# ── config ───────────────────────────────────────────────────────────
[docs]
def load_config(project_root: Path) -> dict[str, Any]:
"""Load ``[tool.sphinx-source-tree]`` from *pyproject.toml*.
Returns the full section dict, which may contain a ``files`` key
(list of per-file override dicts) alongside top-level defaults.
"""
pyproject_path = project_root / "pyproject.toml"
if not pyproject_path.is_file():
return {}
try:
if sys.version_info >= (3, 11):
import tomllib
else:
import tomli as tomllib # type: ignore[no-redef]
with open(pyproject_path, "rb") as fh:
data = tomllib.load(fh)
return data.get("tool", {}).get("sphinx-source-tree", {})
except Exception:
return {}
def _normalise_keys(d: dict[str, Any]) -> dict[str, Any]:
"""Return a copy of *d* with hyphenated keys converted to underscores."""
return {k.replace("-", "_"): v for k, v in d.items()}
[docs]
def resolve_config(
cli_ns: argparse.Namespace,
defaults: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""Merge *defaults* < *pyproject.toml* < *CLI arguments*.
Only CLI values that were explicitly provided (not ``None``) override.
When ``[[tool.sphinx-source-tree.files]]`` entries are present the
returned dict contains a ``"files"`` key: a list of fully-resolved
per-file configs (each already merged with top-level pyproject defaults
and CLI overrides).
"""
cfg = dict(defaults or DEFAULTS)
# Determine project root first (needed to locate pyproject.toml)
project_root = Path(
cli_ns.project_root
if cli_ns.project_root is not None
else cfg.get("project_root", ".")
).resolve()
# Layer 2: pyproject.toml top-level (exclude "files" – handled below)
file_cfg = _normalise_keys(load_config(project_root))
per_file_entries: list[dict[str, Any]] = file_cfg.pop("files", [])
cfg.update({k: v for k, v in file_cfg.items() if k != "project_root"})
# Layer 3: explicit CLI args
cli_overrides = {
k: v
for k, v in vars(cli_ns).items()
if v is not None and k != "project_root"
}
cfg.update(cli_overrides)
cfg["project_root"] = str(project_root)
# Build per-file configs: DEFAULTS < top-level pyproject < per-file entry
# < CLI overrides
if per_file_entries:
resolved_files: list[dict[str, Any]] = []
for entry in per_file_entries:
entry = _normalise_keys(entry)
# Start from the already-merged top-level cfg (minus "files")
file_resolved = dict(cfg)
file_resolved.update(entry)
# CLI always wins last
file_resolved.update(cli_overrides)
file_resolved["project_root"] = str(project_root)
resolved_files.append(file_resolved)
cfg["files"] = resolved_files
return cfg
# ----------------------------------------------------------------------------
# helpers
# ----------------------------------------------------------------------------
def _is_ignored(rel_path: str, name: str, patterns: list[str]) -> bool:
"""Match against both the full relative path and the bare name.
For each pattern:
- If it contains '/', match only against the full path (with wildcards
allowed)
- Otherwise, match against any path component (e.g., dir/file → matches
name, or full path)
"""
# Normalize path separators to '/'
rel_path = rel_path.replace(os.sep, "/")
name_parts = rel_path.split("/")
for pat in patterns:
# Normalize pattern separators (e.g. "dir/*.pyc" → "dir/*.pyc")
pat = pat.replace(os.sep, "/")
# If pattern contains '/', treat as glob against *entire path*
if "/" in pat:
if fnmatch.fnmatch(rel_path, pat):
return True
else:
# Otherwise, match against any path component (dir/file.py →
# matches "file.py")
# or match against the *relative path* (e.g., "__pycache__/foo"
# matches "*__pycache__*")
if any(fnmatch.fnmatch(part, pat) for part in name_parts):
return True
# Also try full path with glob: e.g. pat="*.pyc" should
# match "foo.pyc" anywhere
if fnmatch.fnmatch(rel_path, f"*{pat}*") or fnmatch.fnmatch(
rel_path, f"*{pat}"
):
return True
return False
def _matches_whitelist(rel_path: str, whitelist: list[str]) -> bool:
for w in whitelist:
w = w.strip("/")
if rel_path == w or rel_path.startswith(w + "/"):
return True
return False
def _should_show_dir(rel_path: str, whitelist: list[str]) -> bool:
"""True when the directory is whitelisted *or* is an ancestor of one."""
if _matches_whitelist(rel_path, whitelist):
return True
return any(w.strip("/").startswith(rel_path + "/") for w in whitelist)
def _validate_file_options(
options: dict[str, Any],
source: str = "",
) -> dict[str, str]:
"""Return only the recognised inclusion-range options, coerced to strings.
Unknown keys are silently dropped with a stderr warning.
"""
validated: dict[str, str] = {}
for key, value in options.items():
normalised = key.replace("_", "-")
if normalised in VALID_FILE_OPTIONS:
validated[normalised] = str(value)
else:
label = f" for {source!r}" if source else ""
print(
f"Warning: unknown file option {key!r}{label} ignored. "
f"Valid options: {sorted(VALID_FILE_OPTIONS)}",
file=sys.stderr,
)
return validated
def _resolve_file_options_profile(cfg: dict[str, Any]) -> dict[str, Any]:
"""Return the effective ``file_options`` dict for *cfg*.
Resolution order:
1. If ``file_options_profile`` names a key in ``file_options_profiles``,
use that profile's mapping.
2. If ``file_options_profile`` is set but the name is not found in
``file_options_profiles``, emit a warning and fall back to step 3.
3. Use ``file_options`` directly (the default / top-level flat mapping).
"""
profiles: dict[str, Any] = cfg.get("file_options_profiles") or {}
profile_name: str | None = cfg.get("file_options_profile")
if profile_name is not None:
if profile_name in profiles:
return profiles[profile_name]
print(
f"Warning: file-options-profile {profile_name!r} not found in "
f"file-options-profiles. "
f"Available profiles: {sorted(profiles) or '(none)'}. "
f"Falling back to top-level file-options.",
file=sys.stderr,
)
return cfg.get("file_options") or {}
def _apply_order(
files: list[Path],
order: list[str],
root: Path,
) -> list[Path]:
"""Return *files* reordered so that paths listed in *order* come first.
Files named in *order* appear at the front in the specified sequence.
Any *order* entry that does not match a collected file is silently
skipped (the file may have been excluded by extension / ignore rules).
The remaining files follow in their original (sorted) order.
*order* entries are interpreted as paths relative to *root*. Absolute
paths are also accepted and resolved relative to *root* automatically.
"""
if not order:
return files
# Build a lookup: relative-posix-path → Path object
file_map: dict[str, Path] = {
fp.relative_to(root).as_posix(): fp for fp in files
}
# Normalise each order entry to a relative posix key
ordered_keys: list[str] = []
for entry in order:
entry_path = Path(entry)
if entry_path.is_absolute():
try:
key = entry_path.relative_to(root).as_posix()
except ValueError:
key = entry_path.as_posix()
else:
key = Path(entry).as_posix()
ordered_keys.append(key)
# Collect pinned files (in order), then the rest
pinned: list[Path] = []
for key in ordered_keys:
if key in file_map:
pinned.append(file_map[key])
else:
print(
f"Warning: order entry {key!r} does not match any collected "
f"file and will be ignored.",
file=sys.stderr,
)
pinned_set = {fp.relative_to(root).as_posix() for fp in pinned}
rest = [
fp for fp in files if fp.relative_to(root).as_posix() not in pinned_set
]
return pinned + rest
# ----------------------------------------------------------------------------
# Core API
# ----------------------------------------------------------------------------
[docs]
def detect_language(
path: Path,
extra: dict[str, str] | None = None,
) -> str:
"""Map a file suffix to its Sphinx highlight language string."""
merged = {**LANGUAGE_MAP, **(extra or {})}
return merged.get(path.suffix, "")
[docs]
def build_tree(
path: Path,
*,
max_depth: int,
ignore: list[str],
whitelist: list[str],
include_all: bool,
root: Path,
prefix: str = "",
) -> str:
"""Return an ASCII directory tree for *path* (recursive).
Entries are filtered *before* connectors are assigned so that the
last visible entry always receives ``└──``.
"""
if max_depth < 0:
return ""
entries = sorted(
path.iterdir(), key=lambda p: (p.is_file(), p.name.lower())
)
visible: list[Path] = []
for entry in entries:
rel = entry.relative_to(root).as_posix()
if _is_ignored(rel, entry.name, ignore):
continue
if not include_all and whitelist:
if entry.is_dir():
if not _should_show_dir(rel, whitelist):
continue
elif not _matches_whitelist(rel, whitelist):
continue
visible.append(entry)
lines: list[str] = []
for idx, entry in enumerate(visible):
is_last = idx == len(visible) - 1
connector = "\u2514\u2500\u2500 " if is_last else "\u251c\u2500\u2500 "
lines.append(f"{prefix}{connector}{entry.name}")
if entry.is_dir():
extension = " " if is_last else "\u2502 "
sub = build_tree(
entry,
max_depth=max_depth - 1,
ignore=ignore,
whitelist=whitelist,
include_all=include_all,
root=root,
prefix=prefix + extension,
)
if sub:
lines.extend(sub.splitlines())
return "\n".join(lines)
[docs]
def collect_files(
root: Path,
*,
extensions: list[str],
ignore: list[str],
whitelist: list[str],
include_all: bool,
) -> list[Path]:
"""Return a sorted list of files eligible for ``literalinclude``."""
result: list[Path] = []
for fp in sorted(root.rglob("*")):
if not fp.is_file() or fp.suffix not in extensions:
continue
rel = fp.relative_to(root).as_posix()
if _is_ignored(rel, fp.name, ignore):
continue
if (
not include_all
and whitelist
and not _matches_whitelist(rel, whitelist)
):
continue
result.append(fp)
return result
[docs]
def generate(
project_root: Path | str = ".",
output: Path | str = "docs/source_tree.rst",
*,
depth: int = 10,
extensions: list[str] | None = None,
ignore: list[str] | None = None,
whitelist: list[str] | None = None,
include_all: bool = True,
title: str = "Project source-tree",
linenos: bool = False,
extra_languages: dict[str, str] | None = None,
file_options: dict[str, dict[str, Any]] | None = None,
order: list[str] | None = None,
) -> str:
"""Build the full ``.rst`` document and return it as a string.
Parameters
----------
project_root:
Path to the project directory.
output:
Destination ``.rst`` path (used to compute relative
``literalinclude`` paths, **not** written by this function).
depth:
Maximum tree depth.
extensions:
File suffixes to include via ``literalinclude``.
ignore:
Glob patterns to skip (matched against both relative path and
file name).
whitelist:
Directories to restrict to (ignored when *include_all* is true).
include_all:
Bypass the whitelist.
title:
RST section title.
linenos:
Add ``:linenos:`` to every ``literalinclude``.
extra_languages:
Additional ``{suffix: language}`` mappings merged on top of the
built-in ``LANGUAGE_MAP``.
file_options:
Per-file ``literalinclude`` inclusion-range options. Keys are
file paths relative to *project_root* (or absolute); values are
dicts with any subset of: ``lines``, ``start-at``,
``start-after``, ``end-before``, ``end-at``. Example::
{
"src/app.py": {"end-before": "# ===== Tests ====="},
"src/utils.py": {"lines": "1-40"},
}
In ``pyproject.toml`` (top-level, or as the default profile)::
[tool.sphinx-source-tree.file-options]
"src/app.py" = {"end-before" = "# ===== Tests ====="}
"src/utils.py" = {"lines" = "1-40"}
Named profiles are defined under
``[tool.sphinx-source-tree.file-options-profiles.<name>]`` and
selected per output file via ``file-options-profile``. When
called directly the *file_options* argument already contains the
resolved (profile-selected) mapping; profile resolution happens
in ``_generate_from_cfg``.
order:
Explicit ordering for the ``literalinclude`` listing. Each
element is a file path relative to *project_root* (absolute paths
are also accepted). Files listed here appear **first**, in the
given sequence; all remaining collected files follow in their
default sorted order. Files not present in the collected set
(e.g. excluded by extension or ignore rules) are silently skipped
with a stderr warning.
This option does **not** affect the ASCII directory tree — only
the ``literalinclude`` blocks.
Example in ``pyproject.toml``::
[tool.sphinx-source-tree]
order = [
"README.rst",
"pyproject.toml",
"src/app.py",
]
Or per ``[[files]]`` entry::
[[tool.sphinx-source-tree.files]]
output = "docs/source_tree.rst"
order = ["src/core.py", "src/utils.py"]
"""
root = Path(project_root).resolve()
output_dir = Path(output).resolve().parent
_extensions = (
extensions if extensions is not None else list(DEFAULTS["extensions"])
)
_ignore = ignore if ignore is not None else list(DEFAULTS["ignore"])
_whitelist = (
whitelist if whitelist is not None else list(DEFAULTS["whitelist"])
)
# Normalise file_options keys to relative-posix strings
_file_options: dict[str, dict[str, str]] = {}
for key, opts in (file_options or {}).items():
key_path = Path(key)
if key_path.is_absolute():
try:
rel_key = key_path.relative_to(root).as_posix()
except ValueError:
rel_key = key_path.as_posix()
else:
rel_key = Path(key).as_posix()
_file_options[rel_key] = _validate_file_options(opts, source=key)
underline = "=" * len(title)
header = (
f"{title}\n"
f"{underline}\n"
f"\n"
f"Below is the layout of the project (to {depth} levels), "
f"followed by\nthe contents of each key file.\n"
f"\n"
f".. code-block:: text\n"
f" :caption: Project directory layout\n"
f"\n"
f" {root.name}/"
)
tree = build_tree(
root,
max_depth=depth,
ignore=_ignore,
whitelist=_whitelist,
include_all=include_all,
root=root,
prefix=" ",
)
parts: list[str] = [header, tree, ""]
files = collect_files(
root,
extensions=_extensions,
ignore=_ignore,
whitelist=_whitelist,
include_all=include_all,
)
# Apply explicit ordering (only affects literalinclude listing)
files = _apply_order(files, order or [], root)
for fp in files:
rel = fp.relative_to(root).as_posix()
include_path = os.path.relpath(fp, output_dir).replace(os.sep, "/")
lang = detect_language(fp, extra_languages)
section_underline = "-" * len(rel)
block: list[str] = [
rel,
section_underline,
"",
f".. literalinclude:: {include_path}",
]
if lang:
block.append(f" :language: {lang}")
block.append(f" :caption: {rel}")
if linenos:
block.append(" :linenos:")
# Append any per-file inclusion-range options
for opt_key, opt_val in _file_options.get(rel, {}).items():
block.append(f" :{opt_key}: {opt_val}")
block.append("")
parts.extend(block)
return "\n".join(parts)
def _generate_from_cfg(cfg: dict[str, Any]) -> str:
"""Call ``generate()`` using a resolved config dict."""
return generate(
project_root=cfg["project_root"],
output=cfg.get("output", DEFAULTS["output"]),
depth=cfg.get("depth", DEFAULTS["depth"]),
extensions=cfg.get("extensions"),
ignore=cfg.get("ignore"),
whitelist=cfg.get("whitelist"),
include_all=cfg.get("include_all", DEFAULTS["include_all"]),
title=cfg.get("title", DEFAULTS["title"]),
linenos=cfg.get("linenos", DEFAULTS["linenos"]),
extra_languages=cfg.get("extra_languages"),
file_options=_resolve_file_options_profile(cfg),
order=cfg.get("order"),
)
def _write_output(content: str, out_path: Path) -> None:
"""Write *content* to *out_path*, creating parent directories as needed."""
if not out_path.is_absolute():
out_path = Path.cwd() / out_path
out_path = out_path.resolve()
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text(content, encoding="utf-8")
print(f"Wrote {out_path}")
# ----------------------------------------------------------------------------
# CLI
# ----------------------------------------------------------------------------
[docs]
def build_parser() -> argparse.ArgumentParser:
"""Create the argument parser (exposed for documentation / testing)."""
p = argparse.ArgumentParser(
prog="sphinx-source-tree",
description=(
"Generate a .rst file with an ASCII project tree "
"and literalinclude blocks for every source file."
),
)
p.add_argument(
"-V",
"--version",
action="version",
version=f"%(prog)s {__version__}",
)
p.add_argument(
"-p",
"--project-root",
type=Path,
default=None,
help="Project directory (default: .)",
)
p.add_argument(
"-d",
"--depth",
type=int,
default=None,
help="Max tree depth (default: 10)",
)
p.add_argument(
"-o",
"--output",
default=None,
help="Output .rst path (default: docs/source_tree.rst)",
)
p.add_argument(
"-e",
"--extensions",
nargs="+",
default=None,
metavar="EXT",
help="File extensions to include (default: .py .md .js .rst)",
)
p.add_argument(
"-i",
"--ignore",
nargs="+",
default=None,
metavar="PAT",
help="Glob patterns to ignore",
)
p.add_argument(
"-w",
"--whitelist",
nargs="+",
default=None,
metavar="DIR",
help="Only include these directories (ignored when --include-all)",
)
p.add_argument(
"--include-all",
action=argparse.BooleanOptionalAction,
default=None,
help="Include everything regardless of whitelist",
)
p.add_argument(
"-t",
"--title",
default=None,
help='RST section title (default: "Project source-tree")',
)
p.add_argument(
"--linenos",
action=argparse.BooleanOptionalAction,
default=None,
help="Add :linenos: to literalinclude directives",
)
p.add_argument(
"--order",
nargs="+",
default=None,
metavar="PATH",
help=(
"Explicit file ordering for literalinclude listing. "
"Listed files appear first, in the given sequence; "
"remaining files follow in default sorted order. "
"Does not affect the directory tree."
),
)
p.add_argument(
"--stdout",
action="store_true",
default=None,
help="Print to stdout instead of writing to a file",
)
return p
[docs]
def main(argv: list[str] | None = None) -> None:
"""Entry point for the ``sphinx-source-tree`` command."""
parser = build_parser()
args = parser.parse_args(argv)
stdout = args.stdout
delattr(args, "stdout")
cfg = resolve_config(args)
per_file_cfgs: list[dict[str, Any]] = cfg.get("files", [])
if per_file_cfgs:
# Multi-file mode: generate one RST per [[files]] entry.
# --stdout emits all files concatenated to stdout.
for file_cfg in per_file_cfgs:
content = _generate_from_cfg(file_cfg)
if stdout:
sys.stdout.write(content)
else:
_write_output(
content,
Path(file_cfg.get("output", DEFAULTS["output"])),
)
else:
# Single-file mode (original behaviour).
content = _generate_from_cfg(cfg)
if stdout:
sys.stdout.write(content)
else:
_write_output(
content,
Path(cfg.get("output", DEFAULTS["output"])),
)
if __name__ == "__main__":
main()