Created
December 10, 2025 12:56
-
-
Save nstefan13/937d05c6c236d5428c775ef6051c7942 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| create_hierarchy.py | |
| Creates a directory hierarchy containing files, directories and symlinks | |
| (both valid and intentionally broken). Useful for stress-testing recursive | |
| traversal tools that check for broken links. | |
| Usage: | |
| python3 create_hierarchy.py /path/to/output_root [--dirs N] [--files N] [--symlinks N] [--broken-ratio R] [--seed S] | |
| The script follows the user's requested design: | |
| 1) generate_in_memory_tree(...) - builds an in-memory representation of the | |
| filesystem nodes and decides which symlinks will point to which targets | |
| (including broken ones). | |
| 2) materialize_tree_on_disk(...) - creates directories, files and symlinks | |
| on the real filesystem and records which links are good vs broken. | |
| At the end the script prints a short summary and writes a JSON report to | |
| "link_report.json" inside the created root directory. | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import os | |
| import random | |
| import sys | |
| from dataclasses import dataclass, field | |
| from pathlib import Path | |
| from typing import Dict, List, Optional, Tuple | |
| @dataclass | |
| class Node: | |
| name: str | |
| parent: Optional["DirNode"] = None | |
| def path_components(self) -> List[str]: | |
| parts = [] | |
| cur: Optional[Node] = self | |
| while cur is not None and cur.parent is not None: | |
| parts.append(cur.name) | |
| cur = cur.parent | |
| parts.reverse() | |
| return parts | |
| @dataclass | |
| class FileNode(Node): | |
| content: str = "" | |
| @dataclass | |
| class DirNode(Node): | |
| children: List[Node] = field(default_factory=list) | |
| def add_child(self, node: Node) -> None: | |
| node.parent = self | |
| self.children.append(node) | |
| @dataclass | |
| class SymlinkNode(Node): | |
| # target can be either a Node (existing) or a string (for a broken target) | |
| target_node: Optional[Node] = None | |
| target_string: Optional[str] = None | |
| def target_is_broken(self) -> bool: | |
| return self.target_node is None and self.target_string is not None | |
| def generate_in_memory_tree( | |
| root_name: str = "root", | |
| n_dirs: int = 10, | |
| n_files: int = 30, | |
| n_symlinks: int = 20, | |
| broken_ratio: float = 0.3, | |
| max_depth: int = 4, | |
| seed: int = 42, | |
| ) -> Tuple[DirNode, List[SymlinkNode]]: | |
| random.seed(seed) | |
| root = DirNode(name=root_name, parent=None) | |
| # We'll maintain a pool of directories where new nodes can be attached. | |
| dir_pool: List[DirNode] = [root] | |
| file_pool: List[FileNode] = [] | |
| # Create directories | |
| for i in range(n_dirs): | |
| # pick a parent that isn't too deep | |
| attempts = 0 | |
| while True: | |
| parent = random.choice(dir_pool) | |
| depth = len(parent.path_components()) | |
| if depth < max_depth or attempts > 10: | |
| break | |
| attempts += 1 | |
| d = DirNode(name=f"dir_{i}") | |
| parent.add_child(d) | |
| dir_pool.append(d) | |
| # Create files | |
| for i in range(n_files): | |
| parent = random.choice(dir_pool) | |
| f = FileNode(name=f"file_{i}.txt", content=f"This is file {i}\n") | |
| parent.add_child(f) | |
| file_pool.append(f) | |
| # Create symlinks (some good, some intentionally broken) | |
| symlink_nodes: List[SymlinkNode] = [] | |
| all_targets: List[Node] = dir_pool + file_pool | |
| for i in range(n_symlinks): | |
| location = random.choice(dir_pool) | |
| should_be_broken = random.random() < broken_ratio | |
| linkname = f"link_{i}" | |
| # allow links to directories and files; add suffix for files to avoid name clashes | |
| if should_be_broken: | |
| # produce a target string that will not exist when materialized | |
| # e.g. ../nonexistent_xyz or absolute path outside the tree | |
| if random.random() < 0.5: | |
| # relative broken target | |
| target_string = f"../nonexistent_target_{i}" | |
| else: | |
| # absolute broken target | |
| target_string = f"/nonexistent/definitely_missing_{i}" | |
| ln = SymlinkNode(name=linkname, target_node=None, target_string=target_string) | |
| location.add_child(ln) | |
| symlink_nodes.append(ln) | |
| else: | |
| # point to an existing node (file or dir) | |
| target = random.choice(all_targets) | |
| ln = SymlinkNode(name=linkname, target_node=target, target_string=None) | |
| location.add_child(ln) | |
| symlink_nodes.append(ln) | |
| return root, symlink_nodes | |
| def materialize_tree_on_disk(root_node: DirNode, root_path: Path, symlink_nodes: List[SymlinkNode]) -> Dict: | |
| """ | |
| Create the physical filesystem items for every node in root_node (except | |
| symlinks). Then create symlinks described in symlink_nodes. Return a | |
| dictionary report with counts and lists of links and their statuses. | |
| """ | |
| report = { | |
| "created_dirs": [], | |
| "created_files": [], | |
| "symlinks": [], # list of dicts: {path, target, status} | |
| } | |
| # First pass: create directories and files (non-symlinks) | |
| def _create_node(node: Node, parent_fs_path: Path): | |
| if isinstance(node, DirNode): | |
| fsdir = parent_fs_path / node.name | |
| fsdir.mkdir(parents=True, exist_ok=True) | |
| report["created_dirs"].append(str(fsdir)) | |
| for child in node.children: | |
| # Skip symlinks for now | |
| if isinstance(child, SymlinkNode): | |
| continue | |
| _create_node(child, fsdir) | |
| elif isinstance(node, FileNode): | |
| fsfile = parent_fs_path / node.name | |
| fsfile.write_text(node.content) | |
| report["created_files"].append(str(fsfile)) | |
| else: | |
| # SymlinkNodes are handled in second pass | |
| pass | |
| # If root_node's name is the same as root_path.name, create inside it, else use root_path/root_node.name | |
| if root_path.exists() and not root_path.is_dir(): | |
| raise RuntimeError(f"Target exists and is not a directory: {root_path}") | |
| base = root_path | |
| base.mkdir(parents=True, exist_ok=True) | |
| # create immediate children of root_node inside base | |
| for child in root_node.children: | |
| if isinstance(child, SymlinkNode): | |
| continue | |
| _create_node(child, base) | |
| # Second pass: create symlinks | |
| good = 0 | |
| broken = 0 | |
| symlink_reports = [] | |
| for sl in symlink_nodes: | |
| # compute filesystem path for location where symlink will live | |
| link_parent_components = sl.parent.path_components() if sl.parent else [] | |
| link_parent_fs = base.joinpath(*link_parent_components) | |
| link_path = link_parent_fs / sl.name | |
| if sl.target_node is not None: | |
| # compute the real filesystem path of the target | |
| target_components = sl.target_node.path_components() | |
| target_fs_path = base.joinpath(*target_components) | |
| # compute a relative path from the link location to the target | |
| try: | |
| rel_target = os.path.relpath(str(target_fs_path), start=str(link_parent_fs)) | |
| except Exception: | |
| rel_target = str(target_fs_path) | |
| # create symlink | |
| try: | |
| # remove existing if any | |
| if link_path.exists() or link_path.is_symlink(): | |
| link_path.unlink() | |
| os.symlink(rel_target, str(link_path)) | |
| # check if symlink is good | |
| if os.path.exists(str(link_path)): | |
| status = "good" | |
| good += 1 | |
| else: | |
| status = "broken" | |
| broken += 1 | |
| except OSError as e: | |
| # On platforms where symlink creation is restricted, fall back by | |
| # creating a small .symlink file to indicate intended target. | |
| fallback_file = link_path.with_suffix(link_path.suffix + ".symlink") | |
| fallback_file.write_text(str(rel_target)) | |
| status = f"symlink_failed_wrote_fallback: {e!r}" | |
| broken += 1 | |
| symlink_reports.append({ | |
| "path": str(link_path), | |
| "target": rel_target, | |
| "status": status, | |
| }) | |
| else: | |
| # broken symlink as intended: write the target_string as the symlink target | |
| target_str = sl.target_string or "" | |
| try: | |
| if link_path.exists() or link_path.is_symlink(): | |
| link_path.unlink() | |
| os.symlink(target_str, str(link_path)) | |
| if os.path.exists(str(link_path)): | |
| # surprising: the previously "broken" target actually exists | |
| status = "good_surprising" | |
| good += 1 | |
| else: | |
| status = "broken" | |
| broken += 1 | |
| except OSError as e: | |
| fallback_file = link_path.with_suffix(link_path.suffix + ".symlink") | |
| fallback_file.write_text(target_str) | |
| status = f"symlink_failed_wrote_fallback: {e!r}" | |
| broken += 1 | |
| symlink_reports.append({ | |
| "path": str(link_path), | |
| "target": target_str, | |
| "status": status, | |
| }) | |
| report["symlinks"] = symlink_reports | |
| report_summary = { | |
| "symlinks_total": len(symlink_reports), | |
| "symlinks_good": good, | |
| "symlinks_broken": broken, | |
| } | |
| report["summary"] = report_summary | |
| return report | |
| def main(argv=None): | |
| parser = argparse.ArgumentParser(description="Create a test directory tree with valid and broken symlinks.") | |
| parser.add_argument("rootdir", help="Directory in which to create the hierarchy") | |
| parser.add_argument("--dirs", type=int, default=10, help="Number of extra subdirectories to create") | |
| parser.add_argument("--files", type=int, default=30, help="Number of files to create") | |
| parser.add_argument("--symlinks", type=int, default=20, help="Number of symlinks to create") | |
| parser.add_argument("--broken-ratio", type=float, default=0.3, help="Fraction of symlinks that should be broken (0.0-1.0)") | |
| parser.add_argument("--seed", type=int, default=42, help="Random seed for deterministic trees") | |
| args = parser.parse_args(argv) | |
| rootdir = Path(args.rootdir).expanduser().resolve() | |
| # Build in-memory tree | |
| print(f"Generating in-memory tree (seed={args.seed})...") | |
| root_node, symlink_nodes = generate_in_memory_tree( | |
| root_name=rootdir.name, | |
| n_dirs=args.dirs, | |
| n_files=args.files, | |
| n_symlinks=args.symlinks, | |
| broken_ratio=args.broken_ratio, | |
| seed=args.seed, | |
| ) | |
| # Materialize on disk | |
| print(f"Materializing tree at: {rootdir} ...") | |
| report = materialize_tree_on_disk(root_node, rootdir, symlink_nodes) | |
| # Write report | |
| report_path = rootdir / "link_report.json" | |
| report_path.write_text(json.dumps(report, indent=2)) | |
| print("\nDone. Summary:") | |
| print(f" directories created: {len(report['created_dirs'])}") | |
| print(f" files created: {len(report['created_files'])}") | |
| print(f" symlinks total: {report['summary']['symlinks_total']}") | |
| print(f" good: {report['summary']['symlinks_good']}") | |
| print(f" broken: {report['summary']['symlinks_broken']}") | |
| print(f"Report written to: {report_path}") | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment