chalg · September 20, 2025 23:21
diff --git a/u_cameco_wna_event_curve.py b/u_cameco_wna_event_curve.py
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 import yfinance as yf

 # ---------- Reload & prep uranium data ----------
 uranium_path = "data/Uranium Futures Historical Data.csv"
 u = pd.read_csv(uranium_path)

 u['Date']  = pd.to_datetime(u['Date'], errors='coerce')
 u['Price'] = pd.to_numeric(u['Price'], errors='coerce')
 u = u.dropna(subset=['Date', 'Price']).sort_values('Date').reset_index(drop=True)
 u = u.set_index('Date').asfreq('B').ffill()  # business days
 u['ret'] = u['Price'].pct_change()
 u = u.dropna(subset=['ret'])

 # ---------- Download and prep CCO.TO stock data ----------
 print("Downloading CCO.TO data...")
 cco = yf.download('CCO.TO', start='2000-01-01', end='2025-09-21', progress=False)
 cco = cco[['Close']].copy()
 cco.columns = ['Price']
 cco = cco.dropna()
 cco = cco.asfreq('B').ffill()  # business days, forward fill
 cco['ret'] = cco['Price'].pct_change()
 cco = cco.dropna(subset=['ret'])

 print(f"Uranium data: {u.index.min()} to {u.index.max()}")
 print(f"CCO.TO data: {cco.index.min()} to {cco.index.max()}")

 # ---------- Helper: approximate WNA date (first Thu on/after Sep 5) ----------
 import datetime as dt
 def approximate_wna_middate(year: int) -> pd.Timestamp:
    d = dt.date(year, 9, 5)  # Sep 5
    # weekday(): Mon=0 ... Sun=6 ; Thursday=3
    days_to_thu = (3 - d.weekday()) % 7
    d = d + dt.timedelta(days=days_to_thu)
    return pd.Timestamp(d)

 # ---------- Manual overrides for years you know exactly ----------
 manual_event_dates = {
    2015: "2015-09-10",
    2016: "2016-09-15",
    2017: "2017-09-14",
    2018: "2018-09-06",
    2019: "2019-09-05",
    # 2020 omitted (pandemic uncertainty)
    2021: "2021-09-09",
    2022: "2022-09-08",
    2023: "2023-09-07",
    2024: "2024-09-05",
    2025: "2025-09-04"  # Added 2025 date
 }

 # Years to exclude explicitly (Fukushima) + anything else you want to skip
 exclude_years = {2011, 2020}

 # Build per-year event index using manual if available, else approximation
 # Use overlapping period for both datasets
 use_years = range(max(u.index.min().year, cco.index.min().year, 2000), 
                 min(u.index.max().year, cco.index.max().year, 2025) + 1)

 event_idx = {}
 event_idx_2025 = {}  # Separate index for 2025

 for y in use_years:
    if y in exclude_years:
        continue  # <-- skip 2011 (and 2020)
    d = pd.Timestamp(manual_event_dates[y]) if y in manual_event_dates else approximate_wna_middate(y)
    
    # Check if date exists in both datasets
    if (d < u.index.min() or d > u.index.max() or 
        d < cco.index.min() or d > cco.index.max()):
        continue
    
    d_nearest = u.index[u.index.get_indexer([d], method='nearest')][0]
    
    if y == 2025:
        event_idx_2025[y] = d_nearest  # Store 2025 separately
    else:
        event_idx[y] = d_nearest  # Historical events for averaging

 print(f"Historical events used: {len(event_idx)} years → {sorted(event_idx.keys())}")
 print(f"2025 event: {sorted(event_idx_2025.keys())}")

 # ---------- Function to calculate event-window cumulative returns ----------
 def calculate_event_returns(data, event_idx, pre=30, post=90):
    window = np.arange(-pre, post + 1)
    paths = []
    
    for y, d in event_idx.items():
        # pull the window of returns
        start_date = d + pd.tseries.offsets.BDay(window[0])
        end_date = d + pd.tseries.offsets.BDay(window[-1])
        
        rets = data.loc[start_date:end_date]['ret']
        if len(rets) != len(window):
            # incomplete window at dataset edges
            continue

        # cumulative path and normalize so t=0 == 1.0
        cum = (1 + rets.values).cumprod()
        base_idx = np.where(window == 0)[0][0]
        cum_norm = cum / cum[base_idx]
        paths.append(cum_norm)
    
    paths = np.array(paths)
    avg_path = paths.mean(axis=0) if len(paths) else np.array([])
    std_path = paths.std(axis=0) if len(paths) else np.array([])
    n_events = len(paths)
    se_path = (std_path / np.sqrt(n_events)) if n_events else std_path
    
    return window, avg_path, se_path, n_events

 # ---------- Function to get single event trajectory (for 2025) ----------
 def get_single_event_trajectory(data, event_date, pre=30, post=90):
    start_date = event_date + pd.tseries.offsets.BDay(-pre)
    end_date = event_date + pd.tseries.offsets.BDay(post)
    
    # Get available data within the window
    try:
        available_data = data.loc[start_date:end_date]
        if len(available_data) == 0:
            return np.array([]), np.array([])
        
        # Calculate actual window positions based on available data
        actual_window = []
        
        # Find the event date position in available data
        event_pos = None
        for i, date in enumerate(available_data.index):
            days_diff = (date - event_date).days
            actual_window.append(days_diff)
            if event_pos is None and date >= event_date:
                event_pos = i
        
        if event_pos is None:
            return np.array([]), np.array([])
        
        # Calculate cumulative returns and normalize to event date
        rets = available_data['ret'].values
        cum = (1 + rets).cumprod()
        cum_norm = cum / cum[event_pos]
        
        return np.array(actual_window), cum_norm
        
    except KeyError:
        return np.array([]), np.array([])

 # ---------- Calculate event returns for historical data ----------
 pre, post = 30, 90

 window_u, avg_path_u, se_path_u, n_events_u = calculate_event_returns(u, event_idx, pre, post)
 window_cco, avg_path_cco, se_path_cco, n_events_cco = calculate_event_returns(cco, event_idx, pre, post)

 # ---------- Calculate 2025 trajectories ----------
 trajectory_2025_u = np.array([])
 trajectory_2025_cco = np.array([])
 window_2025_u = np.array([])
 window_2025_cco = np.array([])

 if 2025 in event_idx_2025:
    event_2025 = event_idx_2025[2025]
    window_2025_u, trajectory_2025_u = get_single_event_trajectory(u, event_2025, pre, post)
    window_2025_cco, trajectory_2025_cco = get_single_event_trajectory(cco, event_2025, pre, post)

 print(f"Historical Uranium futures: {n_events_u} complete event windows")
 print(f"Historical CCO.TO stock: {n_events_cco} complete event windows")
 print(f"2025 Uranium trajectory points: {len(trajectory_2025_u)}")
 print(f"2025 CCO.TO trajectory points: {len(trajectory_2025_cco)}")

 # ---------- Plot ----------
 out_path = "images/uranium_event_curve_with_cco_2025.png"
 fig, ax = plt.subplots(figsize=(8, 6)) #was (10, 6)

 # Plot historical averages
 if len(avg_path_u):
    ax.plot(window_u, (avg_path_u - 1.0) * 100, 
            label=f'Uranium Futures Average (n={n_events_u})', 
            linewidth=2, color='orange', alpha=0.8)
    ax.fill_between(window_u,
                    (avg_path_u - 1.0 - se_path_u) * 100,
                    (avg_path_u - 1.0 + se_path_u) * 100,
                    alpha=0.15, color='orange')

 if len(avg_path_cco):
    ax.plot(window_cco, (avg_path_cco - 1.0) * 100, 
            label=f'CCO.TO Average (n={n_events_cco})', 
            linewidth=2, color='blue', alpha=0.8)
    ax.fill_between(window_cco,
                    (avg_path_cco - 1.0 - se_path_cco) * 100,
                    (avg_path_cco - 1.0 + se_path_cco) * 100,
                    alpha=0.15, color='blue')

 # Plot 2025 trajectories
 if len(trajectory_2025_u):
    ax.plot(window_2025_u, (trajectory_2025_u - 1.0) * 100, 
            label='Uranium 2025 (Current)', 
            linewidth=3, color='red', linestyle='-', alpha=0.9)

 if len(trajectory_2025_cco):
    ax.plot(window_2025_cco, (trajectory_2025_cco - 1.0) * 100, 
            label='CCO.TO 2025 (Current)', 
            linewidth=3, color='darkgreen', linestyle='-', alpha=0.9)

 ax.axvline(0, linestyle='--', linewidth=1, color='black', alpha=0.7)
 ax.axhline(0, linestyle='-', linewidth=0.5, color='black', alpha=0.3)
 ax.set_xlabel('Business days relative to event (0 = WNA Symposium mid-date)')
 ax.set_ylabel('Cumulative return (%)')
 ax.set_title('Uranium Futures vs Cameco: Historical Average vs 2025 Current Trajectory\nAround WNA Symposium')
 ax.legend(loc='best')
 ax.grid(True, linestyle=':', alpha=0.7)

 # Add caption
 caption = "Sources: Uranium - investing.com; CCO.TO - Yahoo Finance"
 ax.text(
    0.98, 0.02,
    caption,
    transform=ax.transAxes,
    ha='right', va='bottom',
    fontsize=8, fontstyle='italic', color='grey',
    bbox=dict(facecolor='white', alpha=0.6, edgecolor='none', pad=1.5)
 )

 fig.tight_layout()
 fig.savefig(out_path, bbox_inches='tight', dpi=300)
 plt.show()
 plt.close(fig)

 # ---------- Print summary statistics ----------
 print("\n" + "="*60)
 print("SUMMARY STATISTICS")
 print("="*60)

 if len(avg_path_u):
    final_ret_u = (avg_path_u[-1] - 1.0) * 100
    print(f"Historical Uranium Futures Average - Final cumulative return (+{post} days): {final_ret_u:.2f}%")

 if len(avg_path_cco):
    final_ret_cco = (avg_path_cco[-1] - 1.0) * 100
    print(f"Historical CCO.TO Average - Final cumulative return (+{post} days): {final_ret_cco:.2f}%")

 if len(trajectory_2025_u):
    current_ret_u = (trajectory_2025_u[-1] - 1.0) * 100
    days_from_event = window_2025_u[-1] if len(window_2025_u) else 0
    print(f"2025 Uranium Current - Return at +{days_from_event:.0f} days: {current_ret_u:.2f}%")

 if len(trajectory_2025_cco):
    current_ret_cco = (trajectory_2025_cco[-1] - 1.0) * 100
    days_from_event = window_2025_cco[-1] if len(window_2025_cco) else 0
    print(f"2025 CCO.TO Current - Return at +{days_from_event:.0f} days: {current_ret_cco:.2f}%")

 print(f"\nHistorical event years: {sorted(event_idx.keys())}")
 if event_idx_2025:
    print(f"Current trajectory year: {sorted(event_idx_2025.keys())}")
 print(f"Output saved to: {out_path}")
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import yfinance as yf

	# ---------- Reload & prep uranium data ----------
	uranium_path = "data/Uranium Futures Historical Data.csv"
	u = pd.read_csv(uranium_path)

	u['Date'] = pd.to_datetime(u['Date'], errors='coerce')
	u['Price'] = pd.to_numeric(u['Price'], errors='coerce')
	u = u.dropna(subset=['Date', 'Price']).sort_values('Date').reset_index(drop=True)
	u = u.set_index('Date').asfreq('B').ffill() # business days
	u['ret'] = u['Price'].pct_change()
	u = u.dropna(subset=['ret'])

	# ---------- Download and prep CCO.TO stock data ----------
	print("Downloading CCO.TO data...")
	cco = yf.download('CCO.TO', start='2000-01-01', end='2025-09-21', progress=False)
	cco = cco[['Close']].copy()
	cco.columns = ['Price']
	cco = cco.dropna()
	cco = cco.asfreq('B').ffill() # business days, forward fill
	cco['ret'] = cco['Price'].pct_change()
	cco = cco.dropna(subset=['ret'])

	print(f"Uranium data: {u.index.min()} to {u.index.max()}")
	print(f"CCO.TO data: {cco.index.min()} to {cco.index.max()}")

	# ---------- Helper: approximate WNA date (first Thu on/after Sep 5) ----------
	import datetime as dt
	def approximate_wna_middate(year: int) -> pd.Timestamp:
	d = dt.date(year, 9, 5) # Sep 5
	# weekday(): Mon=0 ... Sun=6 ; Thursday=3
	days_to_thu = (3 - d.weekday()) % 7
	d = d + dt.timedelta(days=days_to_thu)
	return pd.Timestamp(d)

	# ---------- Manual overrides for years you know exactly ----------
	manual_event_dates = {
	2015: "2015-09-10",
	2016: "2016-09-15",
	2017: "2017-09-14",
	2018: "2018-09-06",
	2019: "2019-09-05",
	# 2020 omitted (pandemic uncertainty)
	2021: "2021-09-09",
	2022: "2022-09-08",
	2023: "2023-09-07",
	2024: "2024-09-05",
	2025: "2025-09-04" # Added 2025 date
	}

	# Years to exclude explicitly (Fukushima) + anything else you want to skip
	exclude_years = {2011, 2020}

	# Build per-year event index using manual if available, else approximation
	# Use overlapping period for both datasets
	use_years = range(max(u.index.min().year, cco.index.min().year, 2000),
	min(u.index.max().year, cco.index.max().year, 2025) + 1)

	event_idx = {}
	event_idx_2025 = {} # Separate index for 2025

	for y in use_years:
	if y in exclude_years:
	continue # <-- skip 2011 (and 2020)
	d = pd.Timestamp(manual_event_dates[y]) if y in manual_event_dates else approximate_wna_middate(y)

	# Check if date exists in both datasets
	if (d < u.index.min() or d > u.index.max() or
	d < cco.index.min() or d > cco.index.max()):
	continue

	d_nearest = u.index[u.index.get_indexer([d], method='nearest')][0]

	if y == 2025:
	event_idx_2025[y] = d_nearest # Store 2025 separately
	else:
	event_idx[y] = d_nearest # Historical events for averaging

	print(f"Historical events used: {len(event_idx)} years → {sorted(event_idx.keys())}")
	print(f"2025 event: {sorted(event_idx_2025.keys())}")

	# ---------- Function to calculate event-window cumulative returns ----------
	def calculate_event_returns(data, event_idx, pre=30, post=90):
	window = np.arange(-pre, post + 1)
	paths = []

	for y, d in event_idx.items():
	# pull the window of returns
	start_date = d + pd.tseries.offsets.BDay(window[0])
	end_date = d + pd.tseries.offsets.BDay(window[-1])

	rets = data.loc[start_date:end_date]['ret']
	if len(rets) != len(window):
	# incomplete window at dataset edges
	continue

	# cumulative path and normalize so t=0 == 1.0
	cum = (1 + rets.values).cumprod()
	base_idx = np.where(window == 0)[0][0]
	cum_norm = cum / cum[base_idx]
	paths.append(cum_norm)

	paths = np.array(paths)
	avg_path = paths.mean(axis=0) if len(paths) else np.array([])
	std_path = paths.std(axis=0) if len(paths) else np.array([])
	n_events = len(paths)
	se_path = (std_path / np.sqrt(n_events)) if n_events else std_path

	return window, avg_path, se_path, n_events

	# ---------- Function to get single event trajectory (for 2025) ----------
	def get_single_event_trajectory(data, event_date, pre=30, post=90):
	start_date = event_date + pd.tseries.offsets.BDay(-pre)
	end_date = event_date + pd.tseries.offsets.BDay(post)

	# Get available data within the window
	try:
	available_data = data.loc[start_date:end_date]
	if len(available_data) == 0:
	return np.array([]), np.array([])

	# Calculate actual window positions based on available data
	actual_window = []

	# Find the event date position in available data
	event_pos = None
	for i, date in enumerate(available_data.index):
	days_diff = (date - event_date).days
	actual_window.append(days_diff)
	if event_pos is None and date >= event_date:
	event_pos = i

	if event_pos is None:
	return np.array([]), np.array([])

	# Calculate cumulative returns and normalize to event date
	rets = available_data['ret'].values
	cum = (1 + rets).cumprod()
	cum_norm = cum / cum[event_pos]

	return np.array(actual_window), cum_norm

	except KeyError:
	return np.array([]), np.array([])

	# ---------- Calculate event returns for historical data ----------
	pre, post = 30, 90

	window_u, avg_path_u, se_path_u, n_events_u = calculate_event_returns(u, event_idx, pre, post)
	window_cco, avg_path_cco, se_path_cco, n_events_cco = calculate_event_returns(cco, event_idx, pre, post)

	# ---------- Calculate 2025 trajectories ----------
	trajectory_2025_u = np.array([])
	trajectory_2025_cco = np.array([])
	window_2025_u = np.array([])
	window_2025_cco = np.array([])

	if 2025 in event_idx_2025:
	event_2025 = event_idx_2025[2025]
	window_2025_u, trajectory_2025_u = get_single_event_trajectory(u, event_2025, pre, post)
	window_2025_cco, trajectory_2025_cco = get_single_event_trajectory(cco, event_2025, pre, post)

	print(f"Historical Uranium futures: {n_events_u} complete event windows")
	print(f"Historical CCO.TO stock: {n_events_cco} complete event windows")
	print(f"2025 Uranium trajectory points: {len(trajectory_2025_u)}")
	print(f"2025 CCO.TO trajectory points: {len(trajectory_2025_cco)}")

	# ---------- Plot ----------
	out_path = "images/uranium_event_curve_with_cco_2025.png"
	fig, ax = plt.subplots(figsize=(8, 6)) #was (10, 6)

	# Plot historical averages
	if len(avg_path_u):
	ax.plot(window_u, (avg_path_u - 1.0) * 100,
	label=f'Uranium Futures Average (n={n_events_u})',
	linewidth=2, color='orange', alpha=0.8)
	ax.fill_between(window_u,
	(avg_path_u - 1.0 - se_path_u) * 100,
	(avg_path_u - 1.0 + se_path_u) * 100,
	alpha=0.15, color='orange')

	if len(avg_path_cco):
	ax.plot(window_cco, (avg_path_cco - 1.0) * 100,
	label=f'CCO.TO Average (n={n_events_cco})',
	linewidth=2, color='blue', alpha=0.8)
	ax.fill_between(window_cco,
	(avg_path_cco - 1.0 - se_path_cco) * 100,
	(avg_path_cco - 1.0 + se_path_cco) * 100,
	alpha=0.15, color='blue')

	# Plot 2025 trajectories
	if len(trajectory_2025_u):
	ax.plot(window_2025_u, (trajectory_2025_u - 1.0) * 100,
	label='Uranium 2025 (Current)',
	linewidth=3, color='red', linestyle='-', alpha=0.9)

	if len(trajectory_2025_cco):
	ax.plot(window_2025_cco, (trajectory_2025_cco - 1.0) * 100,
	label='CCO.TO 2025 (Current)',
	linewidth=3, color='darkgreen', linestyle='-', alpha=0.9)

	ax.axvline(0, linestyle='--', linewidth=1, color='black', alpha=0.7)
	ax.axhline(0, linestyle='-', linewidth=0.5, color='black', alpha=0.3)
	ax.set_xlabel('Business days relative to event (0 = WNA Symposium mid-date)')
	ax.set_ylabel('Cumulative return (%)')
	ax.set_title('Uranium Futures vs Cameco: Historical Average vs 2025 Current Trajectory\nAround WNA Symposium')
	ax.legend(loc='best')
	ax.grid(True, linestyle=':', alpha=0.7)

	# Add caption
	caption = "Sources: Uranium - investing.com; CCO.TO - Yahoo Finance"
	ax.text(
	0.98, 0.02,
	caption,
	transform=ax.transAxes,
	ha='right', va='bottom',
	fontsize=8, fontstyle='italic', color='grey',
	bbox=dict(facecolor='white', alpha=0.6, edgecolor='none', pad=1.5)
	)

	fig.tight_layout()
	fig.savefig(out_path, bbox_inches='tight', dpi=300)
	plt.show()
	plt.close(fig)

	# ---------- Print summary statistics ----------
	print("\n" + "="*60)
	print("SUMMARY STATISTICS")
	print("="*60)

	if len(avg_path_u):
	final_ret_u = (avg_path_u[-1] - 1.0) * 100
	print(f"Historical Uranium Futures Average - Final cumulative return (+{post} days): {final_ret_u:.2f}%")

	if len(avg_path_cco):
	final_ret_cco = (avg_path_cco[-1] - 1.0) * 100
	print(f"Historical CCO.TO Average - Final cumulative return (+{post} days): {final_ret_cco:.2f}%")

	if len(trajectory_2025_u):
	current_ret_u = (trajectory_2025_u[-1] - 1.0) * 100
	days_from_event = window_2025_u[-1] if len(window_2025_u) else 0
	print(f"2025 Uranium Current - Return at +{days_from_event:.0f} days: {current_ret_u:.2f}%")

	if len(trajectory_2025_cco):
	current_ret_cco = (trajectory_2025_cco[-1] - 1.0) * 100
	days_from_event = window_2025_cco[-1] if len(window_2025_cco) else 0
	print(f"2025 CCO.TO Current - Return at +{days_from_event:.0f} days: {current_ret_cco:.2f}%")

	print(f"\nHistorical event years: {sorted(event_idx.keys())}")
	if event_idx_2025:
	print(f"Current trajectory year: {sorted(event_idx_2025.keys())}")
	print(f"Output saved to: {out_path}")
No results found