tapyu · September 14, 2025 00:00
diff --git a/.take_a_look.md b/.take_a_look.md
diff --git a/repro_issue.png b/repro_issue.png
diff --git a/repro_issue.py b/repro_issue.py
 """
 https://stackoverflow.com/questions/79759086/n-jobs-2-breaks-reproducibility
 Simple CNNClassifier MWE: n_jobs reproducibility issue

 Focused demonstration based on:
 https://stackoverflow.com/questions/79759086/n-jobs-2-breaks-reproducibility

 This MWE focuses on:
 1. CNNClassifier only (as requested)
 2. Learning curves (score vs epochs) 
 3. Small dataset for speed
 4. Clear visualization of reproducibility differences
 """

 import numpy as np
 import random
 import os
 import matplotlib.pyplot as plt
 import tensorflow as tf
 from sklearn.model_selection import StratifiedKFold
 from sktime.classification.deep_learning import CNNClassifier
 from sktime.classification.model_selection import TSCGridSearchCV
 import warnings
 warnings.filterwarnings('ignore')
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Reduce TensorFlow logging

 def set_seed(seed=42):
    """Set all random seeds for reproducibility (mimics original question)."""
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    
    # Set TensorFlow seeds
    import tensorflow as tf
    tf.random.set_seed(seed)
    tf.config.experimental.enable_op_determinism()

 def allow_memory_growth():
    gpus = tf.config.list_physical_devices("GPU")
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
                assert tf.config.experimental.get_memory_growth(gpu), f"Failed to enable memory growth for {gpu.name}."
        except RuntimeError as e:
            print(e)

 def create_time_series_data(seed=42):
    """Create small time series dataset for fast execution."""
    set_seed(seed)
    
    n_samples = 60  # Very small for speed
    n_timepoints = 30  # Short series
    n_classes = 2  # Binary classification
    
    X = []
    y = []
    
    for i in range(n_samples):
        class_label = i % n_classes
        
        if class_label == 0:
            # Pattern 1: Sine wave with noise
            t = np.linspace(0, 2*np.pi, n_timepoints)
            ts = np.sin(t) + 100 * np.random.randn(n_timepoints)
        else:
            # Pattern 2: Linear trend with noise
            ts = np.linspace(-1, 1, n_timepoints) + 100 * np.random.randn(n_timepoints)
        
        X.append(ts)
        y.append(class_label)
    
    return np.array(X), np.array(y)

 def run_single_cnn_experiment(n_jobs, seed=42, run_id=1):
    """Run a single CNN experiment and return results."""
    
    print(f"    Run {run_id} with n_jobs={n_jobs}...")
    
    # Set seed before each run (critical for reproducibility test)
    set_seed(seed)

    # Allow memory growth (mimics original question setup)
    allow_memory_growth()
    
    # Create dataset
    X, y = create_time_series_data(seed)
    
    # Create CNNClassifier (similar to original question setup)
    cnn = CNNClassifier(
        n_epochs=10,  # Very small for speed
        batch_size=4,
        verbose=False,
        random_state=seed,
        n_conv_layers=1,  # Simplified
        kernel_size=3
    )
    
    # Create cross-validation (mimics original question)
    cv = StratifiedKFold(
        n_splits=3,
        shuffle=True,
        random_state=seed
    )
    
    # Simple parameter grid (mimics original grid search)
    param_grid = {
        'batch_size': [4, 8],
        'kernel_size': [3, 5]
    }
    
    # GridSearchCV (similar to TSCGridSearchCV from original question)
    grid_search = TSCGridSearchCV(
        estimator=cnn,
        param_grid=param_grid,
        cv=cv,
        n_jobs=n_jobs,  # This is where the issue occurs
        scoring='accuracy'
    )
    
    # Fit and get results
    grid_search.fit(X, y)
    
    return {
        'run_id': run_id,
        'n_jobs': n_jobs,
        'best_params': grid_search.best_params_,
        'best_score': grid_search.best_score_,
        'all_scores': grid_search.cv_results_['mean_test_score'],
        'learning_curve': grid_search.best_estimator_.summary()['accuracy']
    }

 def test_cnn_reproducibility():
    """Test CNNClassifier reproducibility with n_jobs=1 vs n_jobs=2."""
    
    print("=" * 60)
    print("CNNClassifier n_jobs Reproducibility Test")
    print("=" * 60)
    print("Based on: https://stackoverflow.com/questions/79759086/")
    print("Testing with small dataset for maximum speed...")
    
    # Test n_jobs=1 (should be reproducible)
    print("\n1. Testing n_jobs=1 (should be reproducible)")
    print("-" * 50)
    
    results_n1 = []
    for i in range(3):
        result = run_single_cnn_experiment(n_jobs=1, seed=42, run_id=i+1)
        results_n1.append(result)
        print(f"       Best: {result['best_params']}, Score: {result['best_score']:.6f}")

    # Check reproducibility for n_jobs=1 by comparing the learning curve and best hyperparameters
    reproducible_n1 = all(
        r['learning_curve'] == results_n1[0]['learning_curve'] and
        r['best_params'] == results_n1[0]['best_params']
        for r in results_n1[1:]
    )
    
    print(f"\n   YES n_jobs=1 reproducible: {reproducible_n1}")
    
    # Test n_jobs=2 (may break reproducibility)
    print("\n2. Testing n_jobs=2 (may break reproducibility)")
    print("-" * 50)
    
    results_n2 = []
    for i in range(3):
        result = run_single_cnn_experiment(n_jobs=2, seed=42, run_id=i+1)
        results_n2.append(result)
        print(f"       Best: {result['best_params']}, Score: {result['best_score']:.6f}")
    
    # Check reproducibility for n_jobs=2
    reproducible_n2 = all(
        r['learning_curve'] == results_n2[0]['learning_curve'] and
        r['best_params'] == results_n2[0]['best_params']
        for r in results_n2[1:]
    )
    
    print(f"\n   {'YES' if reproducible_n2 else 'NO'} n_jobs=2 reproducible: {reproducible_n2}")
    
    # Create learning curve visualization
    create_learning_curves_plot(results_n1, results_n2, reproducible_n1, reproducible_n2)
    
    # Final summary
    print("\n" + "=" * 60)
    print("FINAL RESULTS")
    print("=" * 60)
    print(f"n_jobs=1:  Reproducible = {reproducible_n1}")
    print(f"n_jobs=2:  Reproducible = {reproducible_n2}")
    
    if reproducible_n1 and not reproducible_n2:
        print("\n🎯 SUCCESS: Demonstrated the n_jobs>=2 reproducibility issue!")
        print("   n_jobs=1 is reproducible, but n_jobs=2 is not.")
    elif not reproducible_n2:
        print("\n⚠️  PARTIAL: n_jobs=2 shows non-reproducible behavior.")
    else:
        print("\nYES No reproducibility issue detected in this run.")
        print("   Note: The issue may be intermittent or system-dependent.")
        print("   Try running multiple times or with different parameters.")

 def create_learning_curves_plot(results_n1, results_n2, repro_n1, repro_n2):
    """Create learning curve plots comparing n_jobs=1 vs n_jobs=2."""
    
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(14, 10))
    
    # Colors for different runs
    colors = ['blue', 'red', 'green']
    
    # Plot 1: Best scores comparison for n_jobs=1
    runs = [r['run_id'] for r in results_n1]
    scores = [r['best_score'] for r in results_n1]
    
    bars1 = ax1.bar(runs, scores, color='lightblue', alpha=0.7, edgecolor='blue')
    ax1.set_title(f'n_jobs=1: Best Scores per Run\n(Reproducible: {"YES" if repro_n1 else "NO"})')
    ax1.set_xlabel('Run Number')
    ax1.set_ylabel('Best CV Score')
    ax1.set_ylim([min(scores) - 0.01, max(scores) + 0.01])
    ax1.grid(True, alpha=0.3)
    
    # Add value labels on bars
    for bar, score in zip(bars1, scores):
        ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001, 
                f'{score:.4f}', ha='center', va='bottom', fontsize=9)
    
    # Plot 2: Best scores comparison for n_jobs=2
    runs = [r['run_id'] for r in results_n2]
    scores = [r['best_score'] for r in results_n2]
    
    bars2 = ax2.bar(runs, scores, color='lightcoral', alpha=0.7, edgecolor='red')
    ax2.set_title(f'n_jobs=2: Best Scores per Run\n(Reproducible: {"YES" if repro_n2 else "NO"})')
    ax2.set_xlabel('Run Number')
    ax2.set_ylabel('Best CV Score')
    ax2.set_ylim([min(scores) - 0.01, max(scores) + 0.01])
    ax2.grid(True, alpha=0.3)
    
    # Add value labels on bars
    for bar, score in zip(bars2, scores):
        ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001, 
                f'{score:.4f}', ha='center', va='bottom', fontsize=9)
    
    # Plot 3: All CV scores for n_jobs=1 (learning curve style)
    for i, result in enumerate(results_n1):
        param_indices = range(len(result['learning_curve']))
        ax3.plot(param_indices, result['learning_curve'], 
                marker='o', color=colors[i], alpha=0.8, 
                label=f'Run {result["run_id"]}', linewidth=2)
    
    ax3.set_title('n_jobs=1: CV Scores Across Parameter Sets')
    ax3.set_xlabel('Parameter Set Index')
    ax3.set_ylabel('CV Score')
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    
    # Plot 4: All CV scores for n_jobs=2 (learning curve style)
    for i, result in enumerate(results_n2):
        param_indices = range(len(result['learning_curve']))
        ax4.plot(param_indices, result['learning_curve'], 
                marker='s', color=colors[i], alpha=0.8, 
                label=f'Run {result["run_id"]}', linewidth=2)
    
    ax4.set_title('n_jobs=2: CV Scores Across Parameter Sets')
    ax4.set_xlabel('Parameter Set Index')
    ax4.set_ylabel('CV Score')
    ax4.legend()
    ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('./cnn_learning_curves.png', dpi=150, bbox_inches='tight')
    print("\n📊 Learning curves plot saved: cnn_learning_curves.png")
    
    # Interpretation guide
    print("\n🔍 PLOT INTERPRETATION:")
    print("   • Overlapping lines in plots 3&4 = Reproducible results")
    print("   • Separated lines = Non-reproducible results")
    print("   • Compare n_jobs=1 vs n_jobs=2 patterns")

 if __name__ == "__main__":
    test_cnn_reproducibility()
	"""
	https://stackoverflow.com/questions/79759086/n-jobs-2-breaks-reproducibility
	Simple CNNClassifier MWE: n_jobs reproducibility issue

	Focused demonstration based on:
	https://stackoverflow.com/questions/79759086/n-jobs-2-breaks-reproducibility

	This MWE focuses on:
	1. CNNClassifier only (as requested)
	2. Learning curves (score vs epochs)
	3. Small dataset for speed
	4. Clear visualization of reproducibility differences
	"""

	import numpy as np
	import random
	import os
	import matplotlib.pyplot as plt
	import tensorflow as tf
	from sklearn.model_selection import StratifiedKFold
	from sktime.classification.deep_learning import CNNClassifier
	from sktime.classification.model_selection import TSCGridSearchCV
	import warnings
	warnings.filterwarnings('ignore')
	os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Reduce TensorFlow logging

	def set_seed(seed=42):
	"""Set all random seeds for reproducibility (mimics original question)."""
	random.seed(seed)
	np.random.seed(seed)
	os.environ['PYTHONHASHSEED'] = str(seed)

	# Set TensorFlow seeds
	import tensorflow as tf
	tf.random.set_seed(seed)
	tf.config.experimental.enable_op_determinism()

	def allow_memory_growth():
	gpus = tf.config.list_physical_devices("GPU")
	if gpus:
	try:
	for gpu in gpus:
	tf.config.experimental.set_memory_growth(gpu, True)
	assert tf.config.experimental.get_memory_growth(gpu), f"Failed to enable memory growth for {gpu.name}."
	except RuntimeError as e:
	print(e)

	def create_time_series_data(seed=42):
	"""Create small time series dataset for fast execution."""
	set_seed(seed)

	n_samples = 60 # Very small for speed
	n_timepoints = 30 # Short series
	n_classes = 2 # Binary classification

	X = []
	y = []

	for i in range(n_samples):
	class_label = i % n_classes

	if class_label == 0:
	# Pattern 1: Sine wave with noise
	t = np.linspace(0, 2*np.pi, n_timepoints)
	ts = np.sin(t) + 100 * np.random.randn(n_timepoints)
	else:
	# Pattern 2: Linear trend with noise
	ts = np.linspace(-1, 1, n_timepoints) + 100 * np.random.randn(n_timepoints)

	X.append(ts)
	y.append(class_label)

	return np.array(X), np.array(y)

	def run_single_cnn_experiment(n_jobs, seed=42, run_id=1):
	"""Run a single CNN experiment and return results."""

	print(f" Run {run_id} with n_jobs={n_jobs}...")

	# Set seed before each run (critical for reproducibility test)
	set_seed(seed)

	# Allow memory growth (mimics original question setup)
	allow_memory_growth()

	# Create dataset
	X, y = create_time_series_data(seed)

	# Create CNNClassifier (similar to original question setup)
	cnn = CNNClassifier(
	n_epochs=10, # Very small for speed
	batch_size=4,
	verbose=False,
	random_state=seed,
	n_conv_layers=1, # Simplified
	kernel_size=3
	)

	# Create cross-validation (mimics original question)
	cv = StratifiedKFold(
	n_splits=3,
	shuffle=True,
	random_state=seed
	)

	# Simple parameter grid (mimics original grid search)
	param_grid = {
	'batch_size': [4, 8],
	'kernel_size': [3, 5]
	}

	# GridSearchCV (similar to TSCGridSearchCV from original question)
	grid_search = TSCGridSearchCV(
	estimator=cnn,
	param_grid=param_grid,
	cv=cv,
	n_jobs=n_jobs, # This is where the issue occurs
	scoring='accuracy'
	)

	# Fit and get results
	grid_search.fit(X, y)

	return {
	'run_id': run_id,
	'n_jobs': n_jobs,
	'best_params': grid_search.best_params_,
	'best_score': grid_search.best_score_,
	'all_scores': grid_search.cv_results_['mean_test_score'],
	'learning_curve': grid_search.best_estimator_.summary()['accuracy']
	}

	def test_cnn_reproducibility():
	"""Test CNNClassifier reproducibility with n_jobs=1 vs n_jobs=2."""

	print("=" * 60)
	print("CNNClassifier n_jobs Reproducibility Test")
	print("=" * 60)
	print("Based on: https://stackoverflow.com/questions/79759086/")
	print("Testing with small dataset for maximum speed...")

	# Test n_jobs=1 (should be reproducible)
	print("\n1. Testing n_jobs=1 (should be reproducible)")
	print("-" * 50)

	results_n1 = []
	for i in range(3):
	result = run_single_cnn_experiment(n_jobs=1, seed=42, run_id=i+1)
	results_n1.append(result)
	print(f" Best: {result['best_params']}, Score: {result['best_score']:.6f}")

	# Check reproducibility for n_jobs=1 by comparing the learning curve and best hyperparameters
	reproducible_n1 = all(
	r['learning_curve'] == results_n1[0]['learning_curve'] and
	r['best_params'] == results_n1[0]['best_params']
	for r in results_n1[1:]
	)

	print(f"\n YES n_jobs=1 reproducible: {reproducible_n1}")

	# Test n_jobs=2 (may break reproducibility)
	print("\n2. Testing n_jobs=2 (may break reproducibility)")
	print("-" * 50)

	results_n2 = []
	for i in range(3):
	result = run_single_cnn_experiment(n_jobs=2, seed=42, run_id=i+1)
	results_n2.append(result)
	print(f" Best: {result['best_params']}, Score: {result['best_score']:.6f}")

	# Check reproducibility for n_jobs=2
	reproducible_n2 = all(
	r['learning_curve'] == results_n2[0]['learning_curve'] and
	r['best_params'] == results_n2[0]['best_params']
	for r in results_n2[1:]
	)

	print(f"\n {'YES' if reproducible_n2 else 'NO'} n_jobs=2 reproducible: {reproducible_n2}")

	# Create learning curve visualization
	create_learning_curves_plot(results_n1, results_n2, reproducible_n1, reproducible_n2)

	# Final summary
	print("\n" + "=" * 60)
	print("FINAL RESULTS")
	print("=" * 60)
	print(f"n_jobs=1: Reproducible = {reproducible_n1}")
	print(f"n_jobs=2: Reproducible = {reproducible_n2}")

	if reproducible_n1 and not reproducible_n2:
	print("\n🎯 SUCCESS: Demonstrated the n_jobs>=2 reproducibility issue!")
	print(" n_jobs=1 is reproducible, but n_jobs=2 is not.")
	elif not reproducible_n2:
	print("\n⚠️ PARTIAL: n_jobs=2 shows non-reproducible behavior.")
	else:
	print("\nYES No reproducibility issue detected in this run.")
	print(" Note: The issue may be intermittent or system-dependent.")
	print(" Try running multiple times or with different parameters.")

	def create_learning_curves_plot(results_n1, results_n2, repro_n1, repro_n2):
	"""Create learning curve plots comparing n_jobs=1 vs n_jobs=2."""

	fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(14, 10))

	# Colors for different runs
	colors = ['blue', 'red', 'green']

	# Plot 1: Best scores comparison for n_jobs=1
	runs = [r['run_id'] for r in results_n1]
	scores = [r['best_score'] for r in results_n1]

	bars1 = ax1.bar(runs, scores, color='lightblue', alpha=0.7, edgecolor='blue')
	ax1.set_title(f'n_jobs=1: Best Scores per Run\n(Reproducible: {"YES" if repro_n1 else "NO"})')
	ax1.set_xlabel('Run Number')
	ax1.set_ylabel('Best CV Score')
	ax1.set_ylim([min(scores) - 0.01, max(scores) + 0.01])
	ax1.grid(True, alpha=0.3)

	# Add value labels on bars
	for bar, score in zip(bars1, scores):
	ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001,
	f'{score:.4f}', ha='center', va='bottom', fontsize=9)

	# Plot 2: Best scores comparison for n_jobs=2
	runs = [r['run_id'] for r in results_n2]
	scores = [r['best_score'] for r in results_n2]

	bars2 = ax2.bar(runs, scores, color='lightcoral', alpha=0.7, edgecolor='red')
	ax2.set_title(f'n_jobs=2: Best Scores per Run\n(Reproducible: {"YES" if repro_n2 else "NO"})')
	ax2.set_xlabel('Run Number')
	ax2.set_ylabel('Best CV Score')
	ax2.set_ylim([min(scores) - 0.01, max(scores) + 0.01])
	ax2.grid(True, alpha=0.3)

	# Add value labels on bars
	for bar, score in zip(bars2, scores):
	ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001,
	f'{score:.4f}', ha='center', va='bottom', fontsize=9)

	# Plot 3: All CV scores for n_jobs=1 (learning curve style)
	for i, result in enumerate(results_n1):
	param_indices = range(len(result['learning_curve']))
	ax3.plot(param_indices, result['learning_curve'],
	marker='o', color=colors[i], alpha=0.8,
	label=f'Run {result["run_id"]}', linewidth=2)

	ax3.set_title('n_jobs=1: CV Scores Across Parameter Sets')
	ax3.set_xlabel('Parameter Set Index')
	ax3.set_ylabel('CV Score')
	ax3.legend()
	ax3.grid(True, alpha=0.3)

	# Plot 4: All CV scores for n_jobs=2 (learning curve style)
	for i, result in enumerate(results_n2):
	param_indices = range(len(result['learning_curve']))
	ax4.plot(param_indices, result['learning_curve'],
	marker='s', color=colors[i], alpha=0.8,
	label=f'Run {result["run_id"]}', linewidth=2)

	ax4.set_title('n_jobs=2: CV Scores Across Parameter Sets')
	ax4.set_xlabel('Parameter Set Index')
	ax4.set_ylabel('CV Score')
	ax4.legend()
	ax4.grid(True, alpha=0.3)

	plt.tight_layout()
	plt.savefig('./cnn_learning_curves.png', dpi=150, bbox_inches='tight')
	print("\n📊 Learning curves plot saved: cnn_learning_curves.png")

	# Interpretation guide
	print("\n🔍 PLOT INTERPRETATION:")
	print(" • Overlapping lines in plots 3&4 = Reproducible results")
	print(" • Separated lines = Non-reproducible results")
	print(" • Compare n_jobs=1 vs n_jobs=2 patterns")

	if __name__ == "__main__":
	test_cnn_reproducibility()
No results found