Skip to content

Instantly share code, notes, and snippets.

@cohnt
Created July 17, 2025 22:20
Show Gist options
  • Select an option

  • Save cohnt/295932f31c56d629bada030ab97a4da0 to your computer and use it in GitHub Desktop.

Select an option

Save cohnt/295932f31c56d629bada030ab97a4da0 to your computer and use it in GitHub Desktop.
Plot Your Lines of Code Added to a Repo
import subprocess
import datetime
from collections import defaultdict
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import os
import sys
import mplcursors
import re
def get_lines_and_commits_by_author(repo_path, author_name):
os.chdir(repo_path)
cmd = [
"git", "log",
f"--author={author_name}",
"--pretty=format:%ad||%s",
"--date=short",
"--numstat"
]
try:
raw_output = subprocess.check_output(cmd, text=True)
except subprocess.CalledProcessError as e:
print("Git command failed:", e)
sys.exit(1)
lines_by_day = defaultdict(int)
commits_by_day = defaultdict(list)
current_date = None
current_msg = None
for line in raw_output.splitlines():
if "||" in line:
parts = line.split("||", maxsplit=1)
if len(parts) == 2:
current_date, current_msg = parts
commits_by_day[current_date].append(current_msg)
else:
parts = line.strip().split()
if len(parts) == 3 and current_date:
added, _, _ = parts
if added.isdigit():
lines_by_day[current_date] += int(added)
return lines_by_day, commits_by_day
def group_by_day_and_cumulate(lines_by_day, commits_by_day):
sorted_dates = sorted(
(datetime.datetime.strptime(date_str, "%Y-%m-%d"), count, commits_by_day[date_str])
for date_str, count in lines_by_day.items()
)
cumulative_data = []
total = 0
for date, count, messages in sorted_dates:
total += count
cumulative_data.append((date, total, messages))
return cumulative_data
def plot_cumulative_lines(data, author_name):
dates = [x[0] for x in data]
counts = [x[1] for x in data]
messages = [x[2] for x in data]
fig, ax = plt.subplots(figsize=(10, 5))
# Draw actual data points for interaction
scatter = ax.scatter(dates, counts, label='Commits')
# Draw the cumulative line
ax.plot(dates, counts, linewidth=2, label='Cumulative lines')
ax.set_title(f"Cumulative Lines Added Per Day: {author_name}")
ax.set_xlabel("Date")
ax.set_ylabel("Cumulative Lines Added")
ax.grid(True)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
plt.xticks(rotation=45)
plt.tight_layout()
# Add interactive tooltips to data points
cursor = mplcursors.cursor(scatter, hover=True)
@cursor.connect("add")
def on_add(sel):
i = int(sel.index)
tooltip_lines = []
for msg in messages[i]:
tooltip_lines.append(f"• {msg}")
# Extract PR number from message (e.g., "#23111" or "pull request #23111")
match = re.search(r"#(\d+)", msg)
if match:
pr_number = match.group(1)
pr_url = f"https://github.com/RobotLocomotion/drake/pull/{pr_number}"
tooltip_lines.append(f" {pr_url}")
# Set the annotation textimport subprocess
import datetime
from collections import defaultdict
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import os
import sys
import mplcursors
import re
def get_lines_and_commits_by_author(repo_path, author_name):
os.chdir(repo_path)
cmd = [
"git", "log",
f"--author={author_name}",
"--pretty=format:%ad||%s",
"--date=short",
"--numstat"
]
try:
raw_output = subprocess.check_output(cmd, text=True)
except subprocess.CalledProcessError as e:
print("Git command failed:", e)
sys.exit(1)
lines_by_day = defaultdict(int)
commits_by_day = defaultdict(list)
current_date = None
current_msg = None
for line in raw_output.splitlines():
if "||" in line:
parts = line.split("||", maxsplit=1)
if len(parts) == 2:
current_date, current_msg = parts
commits_by_day[current_date].append(current_msg)
else:
parts = line.strip().split()
if len(parts) == 3 and current_date:
added = parts[0]
if added.isdigit():
lines_by_day[current_date] += int(added)
return lines_by_day, commits_by_day
def group_by_day_and_cumulate(lines_by_day, commits_by_day):
sorted_dates = sorted(
(datetime.datetime.strptime(date_str, "%Y-%m-%d"), count, commits_by_day[date_str])
for date_str, count in lines_by_day.items()
)
cumulative_data = []
total = 0
for date, count, messages in sorted_dates:
total += count
cumulative_data.append((date, total, messages))
return cumulative_data
def plot_cumulative_lines(data, author_name):
dates = [x[0] for x in data]
counts = [x[1] for x in data]
messages = [x[2] for x in data]
fig, ax = plt.subplots(figsize=(10, 5))
# Draw actual data points for interaction
scatter = ax.scatter(dates, counts, label='Commits')
# Draw the cumulative line
ax.plot(dates, counts, linewidth=2, label='Cumulative lines')
ax.set_title(f"Cumulative Lines Added Per Day: {author_name}")
ax.set_xlabel("Date")
ax.set_ylabel("Cumulative Lines Added")
ax.grid(True)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
plt.xticks(rotation=45)
plt.tight_layout()
# Add interactive tooltips to data points with custom positioning
cursor = mplcursors.cursor(scatter, hover=True)
@cursor.connect("add")
def on_add(sel):
i = int(sel.index)
tooltip_lines = []
for msg in messages[i]:
tooltip_lines.append(f"• {msg}")
# Set the annotation text
sel.annotation.set(text=f"{dates[i].strftime('%Y-%m-%d')}\n" + "\n".join(tooltip_lines), fontsize=9)
plt.legend()
plt.show()
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("repo_path", help="Path to the local git repository")
parser.add_argument("author", help="Author name or email (as recognized by git)")
args = parser.parse_args()
print(f"Analyzing repo: {args.repo_path}")
lines_by_day, commits_by_day = get_lines_and_commits_by_author(args.repo_path, args.author)
cumulative_data = group_by_day_and_cumulate(lines_by_day, commits_by_day)
plot_cumulative_lines(cumulative_data, args.author)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment