Created
August 4, 2025 17:19
-
-
Save vukrosic/da035f569ea9fad87283a6308058bb14 to your computer and use it in GitHub Desktop.
Simple neural network that fits a line and plots graphs for easier understanding
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| class SimpleNeuralNetwork: | |
| def __init__(self, input_size, hidden_size, output_size, learning_rate=0.1): | |
| # Initialize weights and biases for 2-layer network | |
| self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2.0 / input_size) | |
| self.b1 = np.zeros((1, hidden_size)) # Hidden layer bias | |
| self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2.0 / hidden_size) | |
| self.b2 = np.zeros((1, output_size)) # Output layer bias | |
| self.learning_rate = learning_rate | |
| def sigmoid(self, x): | |
| # Sigmoid activation function with numerical stability | |
| return 1 / (1 + np.exp(-np.clip(x, -500, 500))) | |
| def sigmoid_derivative(self, x): | |
| # Derivative of sigmoid function | |
| return x * (1 - x) | |
| def forward(self, X): | |
| # Forward propagation with biases | |
| self.z1 = np.dot(X, self.W1) + self.b1 # Add bias | |
| self.a1 = self.sigmoid(self.z1) | |
| self.z2 = np.dot(self.a1, self.W2) + self.b2 # Add bias | |
| self.a2 = self.sigmoid(self.z2) | |
| return self.a2 | |
| def backward(self, X, y, output): | |
| # Backward propagation | |
| m = X.shape[0] # Number of samples | |
| # Calculate output layer error | |
| output_error = y - output | |
| output_delta = output_error * self.sigmoid_derivative(output) | |
| # Calculate hidden layer error | |
| hidden_error = output_delta.dot(self.W2.T) | |
| hidden_delta = hidden_error * self.sigmoid_derivative(self.a1) | |
| # Update weights and biases | |
| self.W2 += self.a1.T.dot(output_delta) * self.learning_rate / m | |
| self.b2 += np.sum(output_delta, axis=0, keepdims=True) * self.learning_rate / m # Update output bias | |
| self.W1 += X.T.dot(hidden_delta) * self.learning_rate / m | |
| self.b1 += np.sum(hidden_delta, axis=0, keepdims=True) * self.learning_rate / m # Update hidden bias | |
| def train(self, X, y, epochs): | |
| # Train the neural network | |
| losses = [] | |
| for i in range(epochs): | |
| output = self.forward(X) | |
| self.backward(X, y, output) | |
| loss = np.mean(np.square(y - output)) | |
| losses.append(loss) | |
| if i % 1000 == 0: | |
| print(f"Epoch {i}, Loss: {loss:.6f}") | |
| return losses | |
| def plot_function_approximation(self, X, y): | |
| # Plot function approximation with debugging info | |
| plt.figure(figsize=(12, 8)) | |
| # Create a fine mesh for smooth curve | |
| x_fine = np.linspace(X.min(), X.max(), 1000).reshape(-1, 1) | |
| # Get both hidden layer and final output | |
| self.forward(x_fine) # This updates self.z1, self.a1, self.z2, self.a2 | |
| y_pred_fine = self.a2 # Final output | |
| hidden_activations = self.a1 # Hidden layer activations | |
| # Plot the original data points | |
| plt.scatter(X, y, color='red', s=50, label='Training Data', zorder=5) | |
| # Plot the learned function | |
| plt.plot(x_fine, y_pred_fine, 'b-', linewidth=2, label='Neural Network Approximation') | |
| # Plot each hidden neuron's sigmoid on the same plot | |
| for i in range(hidden_activations.shape[1]): | |
| plt.plot(x_fine, hidden_activations[:, i], '--', linewidth=2, | |
| label=f'Hidden Neuron {i+1} (a1[{i}])', alpha=0.7) | |
| # Plot the true function for comparison | |
| y_true = 0.5 * np.sin(2 * np.pi * x_fine) + 0.5 | |
| plt.plot(x_fine, y_true, 'g--', linewidth=2, label='True Function') | |
| # Add debugging information to the plot - ordered by network flow | |
| w1 = self.W1 # Input to hidden weights (now 1x2) | |
| b1 = self.b1 # Hidden layer biases (now 1x2) | |
| w2 = self.W2 # Hidden to output weights (now 2x1) | |
| b2 = self.b2 # Output layer bias (now 1x1) | |
| # Get the actual z1 and z2 values from the last forward pass | |
| # We need to do a forward pass on the training data to get these values | |
| self.forward(X) # This updates self.z1, self.a1, self.z2, self.a2 | |
| # Order: Input → Layer 1 → Layer 2 → Output | |
| input_range = f"Input range: {X.min():.3f} to {X.max():.3f}" | |
| w1_info = f"W1 (input→hidden): {w1.flatten()}" | |
| b1_info = f"b1 (hidden bias): {b1.flatten()}" | |
| z1_range = f"z1 range: {self.z1.min():.6f} to {self.z1.max():.6f}" | |
| a1_range = f"a1 range (hidden): {self.a1.min():.6f} to {self.a1.max():.6f}" | |
| w2_info = f"W2 (hidden→output): {w2.flatten()}" | |
| b2_info = f"b2 (output bias): {b2.flatten()}" | |
| z2_range = f"z2 range: {self.z2.min():.6f} to {self.z2.max():.6f}" | |
| output_range = f"Output range: {y_pred_fine.min():.6f} to {y_pred_fine.max():.6f}" | |
| # Move text to top right corner and make it more visible | |
| plt.text(0.98, 0.98, f"{input_range}\n{w1_info}\n{b1_info}\n{z1_range}\n{a1_range}\n{w2_info}\n{b2_info}\n{z2_range}\n{output_range}", | |
| transform=plt.gca().transAxes, verticalalignment='top', horizontalalignment='right', | |
| bbox=dict(boxstyle='round', facecolor='yellow', alpha=0.9, edgecolor='black'), | |
| fontsize=9, fontweight='bold') | |
| plt.xlabel('Input x') | |
| plt.ylabel('Output y') | |
| plt.title('Function Approximation with 2 Hidden Neurons (With Biases)') | |
| plt.legend() | |
| plt.grid(True, alpha=0.3) | |
| plt.savefig('function_approximation.png', dpi=300, bbox_inches='tight') | |
| plt.show() | |
| def plot_network_structure(self): | |
| # Plot neural network structure with connection strengths | |
| plt.figure(figsize=(12, 8)) | |
| # Define positions for neurons | |
| input_pos = [0.1, 0.5] # Input neuron position | |
| hidden_positions = [[0.4, 0.7], [0.4, 0.3]] # Hidden neurons positions | |
| output_pos = [0.7, 0.5] # Output neuron position | |
| # Draw neurons | |
| # Input neuron | |
| plt.scatter(input_pos[0], input_pos[1], s=1000, c='lightblue', edgecolors='black', linewidth=2, zorder=5) | |
| plt.text(input_pos[0], input_pos[1], 'Input', ha='center', va='center', fontsize=12, fontweight='bold') | |
| # Hidden neurons | |
| for i, pos in enumerate(hidden_positions): | |
| plt.scatter(pos[0], pos[1], s=1000, c='lightgreen', edgecolors='black', linewidth=2, zorder=5) | |
| plt.text(pos[0], pos[1], f'H{i+1}', ha='center', va='center', fontsize=12, fontweight='bold') | |
| # Output neuron | |
| plt.scatter(output_pos[0], output_pos[1], s=1000, c='lightcoral', edgecolors='black', linewidth=2, zorder=5) | |
| plt.text(output_pos[0], output_pos[1], 'Output', ha='center', va='center', fontsize=12, fontweight='bold') | |
| # Draw connections with weights | |
| # Input to hidden connections | |
| for i in range(len(hidden_positions)): | |
| weight = self.W1[0, i] # W1 is 1x2, so we get weight from input to hidden neuron i | |
| color = 'red' if weight < 0 else 'blue' | |
| alpha = min(abs(weight) * 2, 1.0) # Line opacity based on weight magnitude | |
| linewidth = max(abs(weight) * 3, 0.5) # Line thickness based on weight magnitude | |
| plt.plot([input_pos[0], hidden_positions[i][0]], | |
| [input_pos[1], hidden_positions[i][1]], | |
| color=color, linewidth=linewidth, alpha=alpha, zorder=1) | |
| # Add weight label | |
| mid_x = (input_pos[0] + hidden_positions[i][0]) / 2 | |
| mid_y = (input_pos[1] + hidden_positions[i][1]) / 2 | |
| plt.text(mid_x, mid_y + 0.05, f'{weight:.3f}', ha='center', va='center', | |
| fontsize=10, fontweight='bold', | |
| bbox=dict(boxstyle='round,pad=0.2', facecolor='white', alpha=0.8)) | |
| # Hidden to output connections | |
| for i in range(len(hidden_positions)): | |
| weight = self.W2[i, 0] # W2 is 2x1, so we get weight from hidden neuron i to output | |
| color = 'red' if weight < 0 else 'blue' | |
| alpha = min(abs(weight) * 2, 1.0) | |
| linewidth = max(abs(weight) * 3, 0.5) | |
| plt.plot([hidden_positions[i][0], output_pos[0]], | |
| [hidden_positions[i][1], output_pos[1]], | |
| color=color, linewidth=linewidth, alpha=alpha, zorder=1) | |
| # Add weight label | |
| mid_x = (hidden_positions[i][0] + output_pos[0]) / 2 | |
| mid_y = (hidden_positions[i][1] + output_pos[1]) / 2 | |
| plt.text(mid_x, mid_y - 0.05, f'{weight:.3f}', ha='center', va='center', | |
| fontsize=10, fontweight='bold', | |
| bbox=dict(boxstyle='round,pad=0.2', facecolor='white', alpha=0.8)) | |
| # Add bias connections | |
| bias_y_offset = 0.1 | |
| # Hidden layer biases | |
| for i, pos in enumerate(hidden_positions): | |
| bias = self.b1[0, i] | |
| color = 'red' if bias < 0 else 'blue' | |
| alpha = min(abs(bias) * 2, 1.0) | |
| linewidth = max(abs(bias) * 3, 0.5) | |
| # Draw bias connection from left side | |
| plt.plot([pos[0] - 0.05, pos[0]], [pos[1] + bias_y_offset, pos[1]], | |
| color=color, linewidth=linewidth, alpha=alpha, linestyle='--', zorder=1) | |
| # Add bias label | |
| plt.text(pos[0] - 0.1, pos[1] + bias_y_offset + 0.05, f'b={bias:.3f}', | |
| ha='center', va='center', fontsize=9, fontweight='bold', | |
| bbox=dict(boxstyle='round,pad=0.1', facecolor='white', alpha=0.8)) | |
| # Output layer bias | |
| bias = self.b2[0, 0] | |
| color = 'red' if bias < 0 else 'blue' | |
| alpha = min(abs(bias) * 2, 1.0) | |
| linewidth = max(abs(bias) * 3, 0.5) | |
| plt.plot([output_pos[0] - 0.05, output_pos[0]], [output_pos[1] + bias_y_offset, output_pos[1]], | |
| color=color, linewidth=linewidth, alpha=alpha, linestyle='--', zorder=1) | |
| plt.text(output_pos[0] - 0.1, output_pos[1] + bias_y_offset + 0.05, f'b={bias:.3f}', | |
| ha='center', va='center', fontsize=9, fontweight='bold', | |
| bbox=dict(boxstyle='round,pad=0.1', facecolor='white', alpha=0.8)) | |
| # Add legend | |
| plt.plot([], [], 'b-', linewidth=2, label='Positive Weight') | |
| plt.plot([], [], 'r-', linewidth=2, label='Negative Weight') | |
| plt.plot([], [], 'k--', linewidth=2, label='Bias Connection') | |
| plt.xlim(0, 0.8) | |
| plt.ylim(0, 1) | |
| plt.axis('off') | |
| plt.title('Neural Network Structure with Connection Strengths\n(Red=Negative, Blue=Positive, Thickness=Magnitude)', | |
| fontsize=14, fontweight='bold', pad=20) | |
| plt.legend(loc='upper right', bbox_to_anchor=(1, 1)) | |
| plt.tight_layout() | |
| plt.savefig('network_structure.png', dpi=300, bbox_inches='tight') | |
| plt.show() | |
| # Example usage - Simple function approximation | |
| if __name__ == "__main__": | |
| # Create a simple 1D function approximation dataset | |
| # We'll approximate y = 0.5 * sin(2πx) + 0.5 | |
| np.random.seed(42) # For reproducibility | |
| # Generate more training data | |
| X = np.random.uniform(0, 1, 100).reshape(-1, 1) # 100 random points between 0 and 1 | |
| y = 0.5 * np.sin(2 * np.pi * X) + 0.5 # Target function | |
| # Create and train neural network with 2 hidden neurons and biases | |
| nn = SimpleNeuralNetwork(input_size=1, hidden_size=2, output_size=1, learning_rate=0.1) | |
| print("Training Neural Network on Function Approximation") | |
| print("=" * 50) | |
| print("Target function: y = 0.5 * sin(2πx) + 0.5") | |
| print("Network architecture: 1 input → 2 hidden → 1 output (with biases)") | |
| print(f"Training data: {len(X)} points") | |
| print("=" * 50) | |
| # Train the network for longer | |
| losses = nn.train(X, y, epochs=20000) | |
| # Test the network | |
| print("\nFinal Results:") | |
| print("=" * 50) | |
| predictions = nn.forward(X) | |
| mse = np.mean(np.square(y - predictions)) | |
| print(f"Final Mean Squared Error: {mse:.6f}") | |
| print(f"Final W1: {nn.W1}") | |
| print(f"Final b1: {nn.b1}") | |
| print(f"Final W2: {nn.W2}") | |
| print(f"Final b2: {nn.b2}") | |
| # Show some example predictions | |
| print("\nSample Predictions:") | |
| for i in range(min(10, len(X))): | |
| print(f"Input: {X[i][0]:.3f}, Target: {y[i][0]:.3f}, Prediction: {predictions[i][0]:.3f}") | |
| # Visualize the function approximation | |
| nn.plot_function_approximation(X, y) | |
| # Visualize the network structure with connection strengths | |
| nn.plot_network_structure() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment