Created
November 25, 2025 13:46
-
-
Save lcarva/659d9536c2b9895d649645ac49fe660a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import re | |
| import glob | |
| import os | |
| from datetime import datetime | |
| import matplotlib.pyplot as plt | |
| from collections import Counter | |
| import numpy as np | |
| # Find all files matching the date pattern | |
| files = glob.glob('????_??_??.md') | |
| # Extract dates and file sizes | |
| entries = [] | |
| for file in files: | |
| match = re.match(r'(\d{4})_(\d{2})_(\d{2})\.md', file) | |
| if match: | |
| year, month, day = match.groups() | |
| try: | |
| date = datetime(int(year), int(month), int(day)) | |
| size = os.path.getsize(file) # Size in bytes | |
| with open(file, 'r', encoding='utf-8') as f: | |
| lines = len(f.readlines()) | |
| entries.append((date, size, lines, file)) | |
| except (ValueError, IOError): | |
| pass | |
| # Sort by date | |
| entries.sort(key=lambda x: x[0]) | |
| dates = [e[0] for e in entries] | |
| sizes = [e[1] for e in entries] | |
| line_counts = [e[2] for e in entries] | |
| print(f"Found {len(entries)} dated files") | |
| print(f"Date range: {dates[0].date()} to {dates[-1].date()}") | |
| print(f"Size range: {min(sizes)} to {max(sizes)} bytes ({min(sizes)/1024:.1f} to {max(sizes)/1024:.1f} KB)") | |
| print(f"Line count range: {min(line_counts)} to {max(line_counts)} lines") | |
| print(f"Average size: {np.mean(sizes)/1024:.1f} KB, Median: {np.median(sizes)/1024:.1f} KB") | |
| print(f"Average lines: {np.mean(line_counts):.0f}, Median: {np.median(line_counts):.0f}") | |
| # Create visualization | |
| fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(14, 10)) | |
| # Timeline plot with size as marker size | |
| sizes_normalized = [(s - min(sizes)) / (max(sizes) - min(sizes) + 1) * 300 + 20 for s in sizes] | |
| ax1.scatter(dates, [1]*len(dates), alpha=0.6, s=sizes_normalized, c=line_counts, cmap='viridis') | |
| ax1.set_yticks([]) | |
| ax1.set_xlabel('Date') | |
| ax1.set_title('Timeline of Journal Entries (marker size = file size, color = line count)') | |
| ax1.grid(True, alpha=0.3) | |
| plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45, ha='right') | |
| # Entries per month | |
| months = [d.strftime('%Y-%m') for d in dates] | |
| month_counts = Counter(months) | |
| sorted_months = sorted(month_counts.keys()) | |
| counts = [month_counts[m] for m in sorted_months] | |
| ax2.bar(range(len(sorted_months)), counts, alpha=0.7) | |
| ax2.set_xticks(range(len(sorted_months))) | |
| ax2.set_xticklabels(sorted_months, rotation=45, ha='right') | |
| ax2.set_xlabel('Month') | |
| ax2.set_ylabel('Number of Entries') | |
| ax2.set_title('Journal Entries per Month') | |
| ax2.grid(True, alpha=0.3, axis='y') | |
| # Size over time | |
| sizes_kb = [s/1024 for s in sizes] | |
| ax3.scatter(dates, sizes_kb, alpha=0.6, s=50, c=line_counts, cmap='viridis') | |
| ax3.set_xlabel('Date') | |
| ax3.set_ylabel('Size (KB)') | |
| ax3.set_title('Journal Entry Size Over Time (color = line count)') | |
| ax3.grid(True, alpha=0.3) | |
| plt.setp(ax3.xaxis.get_majorticklabels(), rotation=45, ha='right') | |
| # Size distribution histogram | |
| ax4.hist(sizes_kb, bins=20, alpha=0.7, edgecolor='black') | |
| ax4.axvline(np.mean(sizes_kb), color='red', linestyle='--', label=f'Mean: {np.mean(sizes_kb):.1f} KB') | |
| ax4.axvline(np.median(sizes_kb), color='green', linestyle='--', label=f'Median: {np.median(sizes_kb):.1f} KB') | |
| ax4.set_xlabel('Size (KB)') | |
| ax4.set_ylabel('Frequency') | |
| ax4.set_title('Distribution of Journal Entry Sizes') | |
| ax4.legend() | |
| ax4.grid(True, alpha=0.3, axis='y') | |
| plt.tight_layout() | |
| plt.savefig('date_visualization.png', dpi=150, bbox_inches='tight') | |
| print(f"\nVisualization saved to: date_visualization.png") | |
| plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment