Skip to content

Instantly share code, notes, and snippets.

@lcarva
Created November 25, 2025 13:46
Show Gist options
  • Select an option

  • Save lcarva/659d9536c2b9895d649645ac49fe660a to your computer and use it in GitHub Desktop.

Select an option

Save lcarva/659d9536c2b9895d649645ac49fe660a to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import re
import glob
import os
from datetime import datetime
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np
# Find all files matching the date pattern
files = glob.glob('????_??_??.md')
# Extract dates and file sizes
entries = []
for file in files:
match = re.match(r'(\d{4})_(\d{2})_(\d{2})\.md', file)
if match:
year, month, day = match.groups()
try:
date = datetime(int(year), int(month), int(day))
size = os.path.getsize(file) # Size in bytes
with open(file, 'r', encoding='utf-8') as f:
lines = len(f.readlines())
entries.append((date, size, lines, file))
except (ValueError, IOError):
pass
# Sort by date
entries.sort(key=lambda x: x[0])
dates = [e[0] for e in entries]
sizes = [e[1] for e in entries]
line_counts = [e[2] for e in entries]
print(f"Found {len(entries)} dated files")
print(f"Date range: {dates[0].date()} to {dates[-1].date()}")
print(f"Size range: {min(sizes)} to {max(sizes)} bytes ({min(sizes)/1024:.1f} to {max(sizes)/1024:.1f} KB)")
print(f"Line count range: {min(line_counts)} to {max(line_counts)} lines")
print(f"Average size: {np.mean(sizes)/1024:.1f} KB, Median: {np.median(sizes)/1024:.1f} KB")
print(f"Average lines: {np.mean(line_counts):.0f}, Median: {np.median(line_counts):.0f}")
# Create visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(14, 10))
# Timeline plot with size as marker size
sizes_normalized = [(s - min(sizes)) / (max(sizes) - min(sizes) + 1) * 300 + 20 for s in sizes]
ax1.scatter(dates, [1]*len(dates), alpha=0.6, s=sizes_normalized, c=line_counts, cmap='viridis')
ax1.set_yticks([])
ax1.set_xlabel('Date')
ax1.set_title('Timeline of Journal Entries (marker size = file size, color = line count)')
ax1.grid(True, alpha=0.3)
plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45, ha='right')
# Entries per month
months = [d.strftime('%Y-%m') for d in dates]
month_counts = Counter(months)
sorted_months = sorted(month_counts.keys())
counts = [month_counts[m] for m in sorted_months]
ax2.bar(range(len(sorted_months)), counts, alpha=0.7)
ax2.set_xticks(range(len(sorted_months)))
ax2.set_xticklabels(sorted_months, rotation=45, ha='right')
ax2.set_xlabel('Month')
ax2.set_ylabel('Number of Entries')
ax2.set_title('Journal Entries per Month')
ax2.grid(True, alpha=0.3, axis='y')
# Size over time
sizes_kb = [s/1024 for s in sizes]
ax3.scatter(dates, sizes_kb, alpha=0.6, s=50, c=line_counts, cmap='viridis')
ax3.set_xlabel('Date')
ax3.set_ylabel('Size (KB)')
ax3.set_title('Journal Entry Size Over Time (color = line count)')
ax3.grid(True, alpha=0.3)
plt.setp(ax3.xaxis.get_majorticklabels(), rotation=45, ha='right')
# Size distribution histogram
ax4.hist(sizes_kb, bins=20, alpha=0.7, edgecolor='black')
ax4.axvline(np.mean(sizes_kb), color='red', linestyle='--', label=f'Mean: {np.mean(sizes_kb):.1f} KB')
ax4.axvline(np.median(sizes_kb), color='green', linestyle='--', label=f'Median: {np.median(sizes_kb):.1f} KB')
ax4.set_xlabel('Size (KB)')
ax4.set_ylabel('Frequency')
ax4.set_title('Distribution of Journal Entry Sizes')
ax4.legend()
ax4.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.savefig('date_visualization.png', dpi=150, bbox_inches='tight')
print(f"\nVisualization saved to: date_visualization.png")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment