Created
October 19, 2024 22:01
-
-
Save abdalrohman/261ee9018a6f103aaaae8e6368d1d2e0 to your computer and use it in GitHub Desktop.
Utility for previewing JSON file structures
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| json_preview.py - Utility for previewing JSON file structures | |
| This module provides functionality to peek into JSON files and analyze their structure | |
| without loading the entire file into memory. | |
| License: MIT | |
| Author: M.Abdulrahman Alnaseer | |
| GitHub: abdalrohman (github.com) | |
| Copyright (c) 2024 M.Abdulrahman Alnaseer | |
| Permission is hereby granted, free of charge, to any person obtaining a copy | |
| of this software and associated documentation files (the "Software"), to deal | |
| in the Software without restriction, including without limitation the rights | |
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| copies of the Software, and to permit persons to whom the Software is | |
| furnished to do so, subject to the following conditions: | |
| The above copyright notice and this permission notice shall be included in all | |
| copies or substantial portions of the Software. | |
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
| SOFTWARE. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| from dataclasses import dataclass | |
| from enum import Enum, auto | |
| from pathlib import Path | |
| from typing import Any | |
| class JsonStructureError(Exception): | |
| """Custom exception for JSON structure-related errors.""" | |
| pass | |
| class PreviewStrategy(Enum): | |
| """Enumeration of available preview display strategies.""" | |
| COMPACT = auto() | |
| DETAILED = auto() | |
| @dataclass | |
| class JsonPreview: | |
| """Data class to hold JSON preview results.""" | |
| parsed_structure: Any | None | |
| raw_preview: str | |
| is_complete: bool | |
| structure_type: str | |
| class JsonStructureAnalyzer: | |
| """Analyzes JSON file structure and provides preview capabilities.""" | |
| def __init__(self, max_file_size: int = 10_000_000): | |
| """ | |
| Initialize the analyzer. | |
| :param max_file_size: Maximum allowed file size in bytes | |
| """ | |
| self.max_file_size = max_file_size | |
| def _validate_input(self, file_path: str | Path, char_limit: int) -> Path: | |
| """ | |
| Validate input parameters. | |
| :param file_path: Path to JSON file | |
| :param char_limit: Number of characters to read | |
| :raises ValueError: If parameters are invalid | |
| :raises FileNotFoundError: If file doesn't exist | |
| :return: Validated Path object | |
| """ | |
| if char_limit <= 0: | |
| raise ValueError("char_limit must be positive") | |
| path = Path(file_path) | |
| if not path.exists(): | |
| raise FileNotFoundError(f"File not found: {file_path}") | |
| if path.stat().st_size > self.max_file_size: | |
| raise ValueError(f"File size exceeds maximum allowed size of {self.max_file_size} bytes") | |
| return path | |
| def _balance_brackets(self, content: str) -> tuple[str, bool]: | |
| """ | |
| Balance JSON brackets and braces in the content. | |
| :param content: JSON content string | |
| :return: Tuple of (balanced content, is completely balanced) | |
| """ | |
| brackets_stack = [] | |
| braces_stack = [] | |
| last_valid_pos = 0 | |
| for i, char in enumerate(content): | |
| if char == "{": | |
| braces_stack.append(char) | |
| elif char == "[": | |
| brackets_stack.append(char) | |
| elif char == "}" and braces_stack: | |
| braces_stack.pop() | |
| elif char == "]" and brackets_stack: | |
| brackets_stack.pop() | |
| if not brackets_stack and not braces_stack: | |
| last_valid_pos = i + 1 | |
| balanced_content = content[:last_valid_pos] | |
| if brackets_stack or braces_stack: | |
| balanced_content += "]" * len(brackets_stack) | |
| balanced_content += "}" * len(braces_stack) | |
| return balanced_content, False | |
| return balanced_content, True | |
| def peek_json_structure(self, file_path: str | Path, char_limit: int = 100) -> JsonPreview: | |
| """ | |
| Read and analyze the structure of a JSON file. | |
| :param file_path: Path to the JSON file | |
| :param char_limit: Number of characters to read | |
| :return: JsonPreview object containing analysis results | |
| :raises JsonStructureError: If JSON cannot be parsed | |
| """ | |
| path = self._validate_input(file_path, char_limit) | |
| try: | |
| with path.open("r", encoding="utf-8") as file: | |
| content_preview = file.read(char_limit) | |
| raw_preview = content_preview + ("..." if len(content_preview) == char_limit else "") | |
| balanced_content, is_complete = self._balance_brackets(content_preview) | |
| try: | |
| parsed_json = json.loads(balanced_content) | |
| structure_type = self._determine_structure_type(parsed_json) | |
| return JsonPreview(parsed_json, raw_preview, is_complete, structure_type) | |
| except json.JSONDecodeError: | |
| return JsonPreview(None, raw_preview, False, "invalid") | |
| except UnicodeDecodeError: | |
| raise JsonStructureError("File is not valid UTF-8 encoded") | |
| def _determine_structure_type(self, parsed_json: Any) -> str: | |
| """ | |
| Determine the type of JSON structure. | |
| :param parsed_json: Parsed JSON data | |
| :return: String describing the structure type | |
| """ | |
| if isinstance(parsed_json, dict): | |
| return "object" | |
| elif isinstance(parsed_json, list): | |
| return "array" | |
| else: | |
| return "primitive" | |
| def print_preview(self, preview: JsonPreview, strategy: PreviewStrategy = PreviewStrategy.DETAILED) -> None: | |
| """ | |
| Print JSON preview according to the specified strategy. | |
| :param preview: JsonPreview object | |
| :param strategy: PreviewStrategy enum value | |
| """ | |
| print("\nRaw preview:") | |
| print("-" * 50) | |
| print(preview.raw_preview) | |
| print("-" * 50) | |
| if preview.parsed_structure is not None: | |
| print("\nStructure Analysis:") | |
| print(f"Type: {preview.structure_type}") | |
| if strategy == PreviewStrategy.DETAILED: | |
| if isinstance(preview.parsed_structure, dict): | |
| print("Keys:", list(preview.parsed_structure.keys())) | |
| elif isinstance(preview.parsed_structure, list) and preview.parsed_structure: | |
| print(f"Array length in preview: {len(preview.parsed_structure)}") | |
| print("First item type:", type(preview.parsed_structure[0]).__name__) | |
| if __name__ == "__main__": | |
| """Example usage of the JSON preview utility.""" | |
| analyzer = JsonStructureAnalyzer() | |
| try: | |
| preview = analyzer.peek_json_structure("data/example.json", 500) | |
| analyzer.print_preview(preview, PreviewStrategy.DETAILED) | |
| except (JsonStructureError, ValueError, FileNotFoundError) as e: | |
| print(f"Error: {e}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment