Last active
September 14, 2025 00:00
-
-
Save tapyu/05ca5852ff2edc9a9b201a47ba1d368c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| https://stackoverflow.com/questions/79759086/n-jobs-2-breaks-reproducibility | |
| Simple CNNClassifier MWE: n_jobs reproducibility issue | |
| Focused demonstration based on: | |
| https://stackoverflow.com/questions/79759086/n-jobs-2-breaks-reproducibility | |
| This MWE focuses on: | |
| 1. CNNClassifier only (as requested) | |
| 2. Learning curves (score vs epochs) | |
| 3. Small dataset for speed | |
| 4. Clear visualization of reproducibility differences | |
| """ | |
| import numpy as np | |
| import random | |
| import os | |
| import matplotlib.pyplot as plt | |
| import tensorflow as tf | |
| from sklearn.model_selection import StratifiedKFold | |
| from sktime.classification.deep_learning import CNNClassifier | |
| from sktime.classification.model_selection import TSCGridSearchCV | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Reduce TensorFlow logging | |
| def set_seed(seed=42): | |
| """Set all random seeds for reproducibility (mimics original question).""" | |
| random.seed(seed) | |
| np.random.seed(seed) | |
| os.environ['PYTHONHASHSEED'] = str(seed) | |
| # Set TensorFlow seeds | |
| import tensorflow as tf | |
| tf.random.set_seed(seed) | |
| tf.config.experimental.enable_op_determinism() | |
| def allow_memory_growth(): | |
| gpus = tf.config.list_physical_devices("GPU") | |
| if gpus: | |
| try: | |
| for gpu in gpus: | |
| tf.config.experimental.set_memory_growth(gpu, True) | |
| assert tf.config.experimental.get_memory_growth(gpu), f"Failed to enable memory growth for {gpu.name}." | |
| except RuntimeError as e: | |
| print(e) | |
| def create_time_series_data(seed=42): | |
| """Create small time series dataset for fast execution.""" | |
| set_seed(seed) | |
| n_samples = 60 # Very small for speed | |
| n_timepoints = 30 # Short series | |
| n_classes = 2 # Binary classification | |
| X = [] | |
| y = [] | |
| for i in range(n_samples): | |
| class_label = i % n_classes | |
| if class_label == 0: | |
| # Pattern 1: Sine wave with noise | |
| t = np.linspace(0, 2*np.pi, n_timepoints) | |
| ts = np.sin(t) + 100 * np.random.randn(n_timepoints) | |
| else: | |
| # Pattern 2: Linear trend with noise | |
| ts = np.linspace(-1, 1, n_timepoints) + 100 * np.random.randn(n_timepoints) | |
| X.append(ts) | |
| y.append(class_label) | |
| return np.array(X), np.array(y) | |
| def run_single_cnn_experiment(n_jobs, seed=42, run_id=1): | |
| """Run a single CNN experiment and return results.""" | |
| print(f" Run {run_id} with n_jobs={n_jobs}...") | |
| # Set seed before each run (critical for reproducibility test) | |
| set_seed(seed) | |
| # Allow memory growth (mimics original question setup) | |
| allow_memory_growth() | |
| # Create dataset | |
| X, y = create_time_series_data(seed) | |
| # Create CNNClassifier (similar to original question setup) | |
| cnn = CNNClassifier( | |
| n_epochs=10, # Very small for speed | |
| batch_size=4, | |
| verbose=False, | |
| random_state=seed, | |
| n_conv_layers=1, # Simplified | |
| kernel_size=3 | |
| ) | |
| # Create cross-validation (mimics original question) | |
| cv = StratifiedKFold( | |
| n_splits=3, | |
| shuffle=True, | |
| random_state=seed | |
| ) | |
| # Simple parameter grid (mimics original grid search) | |
| param_grid = { | |
| 'batch_size': [4, 8], | |
| 'kernel_size': [3, 5] | |
| } | |
| # GridSearchCV (similar to TSCGridSearchCV from original question) | |
| grid_search = TSCGridSearchCV( | |
| estimator=cnn, | |
| param_grid=param_grid, | |
| cv=cv, | |
| n_jobs=n_jobs, # This is where the issue occurs | |
| scoring='accuracy' | |
| ) | |
| # Fit and get results | |
| grid_search.fit(X, y) | |
| return { | |
| 'run_id': run_id, | |
| 'n_jobs': n_jobs, | |
| 'best_params': grid_search.best_params_, | |
| 'best_score': grid_search.best_score_, | |
| 'all_scores': grid_search.cv_results_['mean_test_score'], | |
| 'learning_curve': grid_search.best_estimator_.summary()['accuracy'] | |
| } | |
| def test_cnn_reproducibility(): | |
| """Test CNNClassifier reproducibility with n_jobs=1 vs n_jobs=2.""" | |
| print("=" * 60) | |
| print("CNNClassifier n_jobs Reproducibility Test") | |
| print("=" * 60) | |
| print("Based on: https://stackoverflow.com/questions/79759086/") | |
| print("Testing with small dataset for maximum speed...") | |
| # Test n_jobs=1 (should be reproducible) | |
| print("\n1. Testing n_jobs=1 (should be reproducible)") | |
| print("-" * 50) | |
| results_n1 = [] | |
| for i in range(3): | |
| result = run_single_cnn_experiment(n_jobs=1, seed=42, run_id=i+1) | |
| results_n1.append(result) | |
| print(f" Best: {result['best_params']}, Score: {result['best_score']:.6f}") | |
| # Check reproducibility for n_jobs=1 by comparing the learning curve and best hyperparameters | |
| reproducible_n1 = all( | |
| r['learning_curve'] == results_n1[0]['learning_curve'] and | |
| r['best_params'] == results_n1[0]['best_params'] | |
| for r in results_n1[1:] | |
| ) | |
| print(f"\n YES n_jobs=1 reproducible: {reproducible_n1}") | |
| # Test n_jobs=2 (may break reproducibility) | |
| print("\n2. Testing n_jobs=2 (may break reproducibility)") | |
| print("-" * 50) | |
| results_n2 = [] | |
| for i in range(3): | |
| result = run_single_cnn_experiment(n_jobs=2, seed=42, run_id=i+1) | |
| results_n2.append(result) | |
| print(f" Best: {result['best_params']}, Score: {result['best_score']:.6f}") | |
| # Check reproducibility for n_jobs=2 | |
| reproducible_n2 = all( | |
| r['learning_curve'] == results_n2[0]['learning_curve'] and | |
| r['best_params'] == results_n2[0]['best_params'] | |
| for r in results_n2[1:] | |
| ) | |
| print(f"\n {'YES' if reproducible_n2 else 'NO'} n_jobs=2 reproducible: {reproducible_n2}") | |
| # Create learning curve visualization | |
| create_learning_curves_plot(results_n1, results_n2, reproducible_n1, reproducible_n2) | |
| # Final summary | |
| print("\n" + "=" * 60) | |
| print("FINAL RESULTS") | |
| print("=" * 60) | |
| print(f"n_jobs=1: Reproducible = {reproducible_n1}") | |
| print(f"n_jobs=2: Reproducible = {reproducible_n2}") | |
| if reproducible_n1 and not reproducible_n2: | |
| print("\n🎯 SUCCESS: Demonstrated the n_jobs>=2 reproducibility issue!") | |
| print(" n_jobs=1 is reproducible, but n_jobs=2 is not.") | |
| elif not reproducible_n2: | |
| print("\n⚠️ PARTIAL: n_jobs=2 shows non-reproducible behavior.") | |
| else: | |
| print("\nYES No reproducibility issue detected in this run.") | |
| print(" Note: The issue may be intermittent or system-dependent.") | |
| print(" Try running multiple times or with different parameters.") | |
| def create_learning_curves_plot(results_n1, results_n2, repro_n1, repro_n2): | |
| """Create learning curve plots comparing n_jobs=1 vs n_jobs=2.""" | |
| fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(14, 10)) | |
| # Colors for different runs | |
| colors = ['blue', 'red', 'green'] | |
| # Plot 1: Best scores comparison for n_jobs=1 | |
| runs = [r['run_id'] for r in results_n1] | |
| scores = [r['best_score'] for r in results_n1] | |
| bars1 = ax1.bar(runs, scores, color='lightblue', alpha=0.7, edgecolor='blue') | |
| ax1.set_title(f'n_jobs=1: Best Scores per Run\n(Reproducible: {"YES" if repro_n1 else "NO"})') | |
| ax1.set_xlabel('Run Number') | |
| ax1.set_ylabel('Best CV Score') | |
| ax1.set_ylim([min(scores) - 0.01, max(scores) + 0.01]) | |
| ax1.grid(True, alpha=0.3) | |
| # Add value labels on bars | |
| for bar, score in zip(bars1, scores): | |
| ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001, | |
| f'{score:.4f}', ha='center', va='bottom', fontsize=9) | |
| # Plot 2: Best scores comparison for n_jobs=2 | |
| runs = [r['run_id'] for r in results_n2] | |
| scores = [r['best_score'] for r in results_n2] | |
| bars2 = ax2.bar(runs, scores, color='lightcoral', alpha=0.7, edgecolor='red') | |
| ax2.set_title(f'n_jobs=2: Best Scores per Run\n(Reproducible: {"YES" if repro_n2 else "NO"})') | |
| ax2.set_xlabel('Run Number') | |
| ax2.set_ylabel('Best CV Score') | |
| ax2.set_ylim([min(scores) - 0.01, max(scores) + 0.01]) | |
| ax2.grid(True, alpha=0.3) | |
| # Add value labels on bars | |
| for bar, score in zip(bars2, scores): | |
| ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001, | |
| f'{score:.4f}', ha='center', va='bottom', fontsize=9) | |
| # Plot 3: All CV scores for n_jobs=1 (learning curve style) | |
| for i, result in enumerate(results_n1): | |
| param_indices = range(len(result['learning_curve'])) | |
| ax3.plot(param_indices, result['learning_curve'], | |
| marker='o', color=colors[i], alpha=0.8, | |
| label=f'Run {result["run_id"]}', linewidth=2) | |
| ax3.set_title('n_jobs=1: CV Scores Across Parameter Sets') | |
| ax3.set_xlabel('Parameter Set Index') | |
| ax3.set_ylabel('CV Score') | |
| ax3.legend() | |
| ax3.grid(True, alpha=0.3) | |
| # Plot 4: All CV scores for n_jobs=2 (learning curve style) | |
| for i, result in enumerate(results_n2): | |
| param_indices = range(len(result['learning_curve'])) | |
| ax4.plot(param_indices, result['learning_curve'], | |
| marker='s', color=colors[i], alpha=0.8, | |
| label=f'Run {result["run_id"]}', linewidth=2) | |
| ax4.set_title('n_jobs=2: CV Scores Across Parameter Sets') | |
| ax4.set_xlabel('Parameter Set Index') | |
| ax4.set_ylabel('CV Score') | |
| ax4.legend() | |
| ax4.grid(True, alpha=0.3) | |
| plt.tight_layout() | |
| plt.savefig('./cnn_learning_curves.png', dpi=150, bbox_inches='tight') | |
| print("\n📊 Learning curves plot saved: cnn_learning_curves.png") | |
| # Interpretation guide | |
| print("\n🔍 PLOT INTERPRETATION:") | |
| print(" • Overlapping lines in plots 3&4 = Reproducible results") | |
| print(" • Separated lines = Non-reproducible results") | |
| print(" • Compare n_jobs=1 vs n_jobs=2 patterns") | |
| if __name__ == "__main__": | |
| test_cnn_reproducibility() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
