Skip to content

Instantly share code, notes, and snippets.

@vukrosic
Created August 4, 2025 17:19
Show Gist options
  • Select an option

  • Save vukrosic/da035f569ea9fad87283a6308058bb14 to your computer and use it in GitHub Desktop.

Select an option

Save vukrosic/da035f569ea9fad87283a6308058bb14 to your computer and use it in GitHub Desktop.
Simple neural network that fits a line and plots graphs for easier understanding
import numpy as np
import matplotlib.pyplot as plt
class SimpleNeuralNetwork:
def __init__(self, input_size, hidden_size, output_size, learning_rate=0.1):
# Initialize weights and biases for 2-layer network
self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2.0 / input_size)
self.b1 = np.zeros((1, hidden_size)) # Hidden layer bias
self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2.0 / hidden_size)
self.b2 = np.zeros((1, output_size)) # Output layer bias
self.learning_rate = learning_rate
def sigmoid(self, x):
# Sigmoid activation function with numerical stability
return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
def sigmoid_derivative(self, x):
# Derivative of sigmoid function
return x * (1 - x)
def forward(self, X):
# Forward propagation with biases
self.z1 = np.dot(X, self.W1) + self.b1 # Add bias
self.a1 = self.sigmoid(self.z1)
self.z2 = np.dot(self.a1, self.W2) + self.b2 # Add bias
self.a2 = self.sigmoid(self.z2)
return self.a2
def backward(self, X, y, output):
# Backward propagation
m = X.shape[0] # Number of samples
# Calculate output layer error
output_error = y - output
output_delta = output_error * self.sigmoid_derivative(output)
# Calculate hidden layer error
hidden_error = output_delta.dot(self.W2.T)
hidden_delta = hidden_error * self.sigmoid_derivative(self.a1)
# Update weights and biases
self.W2 += self.a1.T.dot(output_delta) * self.learning_rate / m
self.b2 += np.sum(output_delta, axis=0, keepdims=True) * self.learning_rate / m # Update output bias
self.W1 += X.T.dot(hidden_delta) * self.learning_rate / m
self.b1 += np.sum(hidden_delta, axis=0, keepdims=True) * self.learning_rate / m # Update hidden bias
def train(self, X, y, epochs):
# Train the neural network
losses = []
for i in range(epochs):
output = self.forward(X)
self.backward(X, y, output)
loss = np.mean(np.square(y - output))
losses.append(loss)
if i % 1000 == 0:
print(f"Epoch {i}, Loss: {loss:.6f}")
return losses
def plot_function_approximation(self, X, y):
# Plot function approximation with debugging info
plt.figure(figsize=(12, 8))
# Create a fine mesh for smooth curve
x_fine = np.linspace(X.min(), X.max(), 1000).reshape(-1, 1)
# Get both hidden layer and final output
self.forward(x_fine) # This updates self.z1, self.a1, self.z2, self.a2
y_pred_fine = self.a2 # Final output
hidden_activations = self.a1 # Hidden layer activations
# Plot the original data points
plt.scatter(X, y, color='red', s=50, label='Training Data', zorder=5)
# Plot the learned function
plt.plot(x_fine, y_pred_fine, 'b-', linewidth=2, label='Neural Network Approximation')
# Plot each hidden neuron's sigmoid on the same plot
for i in range(hidden_activations.shape[1]):
plt.plot(x_fine, hidden_activations[:, i], '--', linewidth=2,
label=f'Hidden Neuron {i+1} (a1[{i}])', alpha=0.7)
# Plot the true function for comparison
y_true = 0.5 * np.sin(2 * np.pi * x_fine) + 0.5
plt.plot(x_fine, y_true, 'g--', linewidth=2, label='True Function')
# Add debugging information to the plot - ordered by network flow
w1 = self.W1 # Input to hidden weights (now 1x2)
b1 = self.b1 # Hidden layer biases (now 1x2)
w2 = self.W2 # Hidden to output weights (now 2x1)
b2 = self.b2 # Output layer bias (now 1x1)
# Get the actual z1 and z2 values from the last forward pass
# We need to do a forward pass on the training data to get these values
self.forward(X) # This updates self.z1, self.a1, self.z2, self.a2
# Order: Input → Layer 1 → Layer 2 → Output
input_range = f"Input range: {X.min():.3f} to {X.max():.3f}"
w1_info = f"W1 (input→hidden): {w1.flatten()}"
b1_info = f"b1 (hidden bias): {b1.flatten()}"
z1_range = f"z1 range: {self.z1.min():.6f} to {self.z1.max():.6f}"
a1_range = f"a1 range (hidden): {self.a1.min():.6f} to {self.a1.max():.6f}"
w2_info = f"W2 (hidden→output): {w2.flatten()}"
b2_info = f"b2 (output bias): {b2.flatten()}"
z2_range = f"z2 range: {self.z2.min():.6f} to {self.z2.max():.6f}"
output_range = f"Output range: {y_pred_fine.min():.6f} to {y_pred_fine.max():.6f}"
# Move text to top right corner and make it more visible
plt.text(0.98, 0.98, f"{input_range}\n{w1_info}\n{b1_info}\n{z1_range}\n{a1_range}\n{w2_info}\n{b2_info}\n{z2_range}\n{output_range}",
transform=plt.gca().transAxes, verticalalignment='top', horizontalalignment='right',
bbox=dict(boxstyle='round', facecolor='yellow', alpha=0.9, edgecolor='black'),
fontsize=9, fontweight='bold')
plt.xlabel('Input x')
plt.ylabel('Output y')
plt.title('Function Approximation with 2 Hidden Neurons (With Biases)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig('function_approximation.png', dpi=300, bbox_inches='tight')
plt.show()
def plot_network_structure(self):
# Plot neural network structure with connection strengths
plt.figure(figsize=(12, 8))
# Define positions for neurons
input_pos = [0.1, 0.5] # Input neuron position
hidden_positions = [[0.4, 0.7], [0.4, 0.3]] # Hidden neurons positions
output_pos = [0.7, 0.5] # Output neuron position
# Draw neurons
# Input neuron
plt.scatter(input_pos[0], input_pos[1], s=1000, c='lightblue', edgecolors='black', linewidth=2, zorder=5)
plt.text(input_pos[0], input_pos[1], 'Input', ha='center', va='center', fontsize=12, fontweight='bold')
# Hidden neurons
for i, pos in enumerate(hidden_positions):
plt.scatter(pos[0], pos[1], s=1000, c='lightgreen', edgecolors='black', linewidth=2, zorder=5)
plt.text(pos[0], pos[1], f'H{i+1}', ha='center', va='center', fontsize=12, fontweight='bold')
# Output neuron
plt.scatter(output_pos[0], output_pos[1], s=1000, c='lightcoral', edgecolors='black', linewidth=2, zorder=5)
plt.text(output_pos[0], output_pos[1], 'Output', ha='center', va='center', fontsize=12, fontweight='bold')
# Draw connections with weights
# Input to hidden connections
for i in range(len(hidden_positions)):
weight = self.W1[0, i] # W1 is 1x2, so we get weight from input to hidden neuron i
color = 'red' if weight < 0 else 'blue'
alpha = min(abs(weight) * 2, 1.0) # Line opacity based on weight magnitude
linewidth = max(abs(weight) * 3, 0.5) # Line thickness based on weight magnitude
plt.plot([input_pos[0], hidden_positions[i][0]],
[input_pos[1], hidden_positions[i][1]],
color=color, linewidth=linewidth, alpha=alpha, zorder=1)
# Add weight label
mid_x = (input_pos[0] + hidden_positions[i][0]) / 2
mid_y = (input_pos[1] + hidden_positions[i][1]) / 2
plt.text(mid_x, mid_y + 0.05, f'{weight:.3f}', ha='center', va='center',
fontsize=10, fontweight='bold',
bbox=dict(boxstyle='round,pad=0.2', facecolor='white', alpha=0.8))
# Hidden to output connections
for i in range(len(hidden_positions)):
weight = self.W2[i, 0] # W2 is 2x1, so we get weight from hidden neuron i to output
color = 'red' if weight < 0 else 'blue'
alpha = min(abs(weight) * 2, 1.0)
linewidth = max(abs(weight) * 3, 0.5)
plt.plot([hidden_positions[i][0], output_pos[0]],
[hidden_positions[i][1], output_pos[1]],
color=color, linewidth=linewidth, alpha=alpha, zorder=1)
# Add weight label
mid_x = (hidden_positions[i][0] + output_pos[0]) / 2
mid_y = (hidden_positions[i][1] + output_pos[1]) / 2
plt.text(mid_x, mid_y - 0.05, f'{weight:.3f}', ha='center', va='center',
fontsize=10, fontweight='bold',
bbox=dict(boxstyle='round,pad=0.2', facecolor='white', alpha=0.8))
# Add bias connections
bias_y_offset = 0.1
# Hidden layer biases
for i, pos in enumerate(hidden_positions):
bias = self.b1[0, i]
color = 'red' if bias < 0 else 'blue'
alpha = min(abs(bias) * 2, 1.0)
linewidth = max(abs(bias) * 3, 0.5)
# Draw bias connection from left side
plt.plot([pos[0] - 0.05, pos[0]], [pos[1] + bias_y_offset, pos[1]],
color=color, linewidth=linewidth, alpha=alpha, linestyle='--', zorder=1)
# Add bias label
plt.text(pos[0] - 0.1, pos[1] + bias_y_offset + 0.05, f'b={bias:.3f}',
ha='center', va='center', fontsize=9, fontweight='bold',
bbox=dict(boxstyle='round,pad=0.1', facecolor='white', alpha=0.8))
# Output layer bias
bias = self.b2[0, 0]
color = 'red' if bias < 0 else 'blue'
alpha = min(abs(bias) * 2, 1.0)
linewidth = max(abs(bias) * 3, 0.5)
plt.plot([output_pos[0] - 0.05, output_pos[0]], [output_pos[1] + bias_y_offset, output_pos[1]],
color=color, linewidth=linewidth, alpha=alpha, linestyle='--', zorder=1)
plt.text(output_pos[0] - 0.1, output_pos[1] + bias_y_offset + 0.05, f'b={bias:.3f}',
ha='center', va='center', fontsize=9, fontweight='bold',
bbox=dict(boxstyle='round,pad=0.1', facecolor='white', alpha=0.8))
# Add legend
plt.plot([], [], 'b-', linewidth=2, label='Positive Weight')
plt.plot([], [], 'r-', linewidth=2, label='Negative Weight')
plt.plot([], [], 'k--', linewidth=2, label='Bias Connection')
plt.xlim(0, 0.8)
plt.ylim(0, 1)
plt.axis('off')
plt.title('Neural Network Structure with Connection Strengths\n(Red=Negative, Blue=Positive, Thickness=Magnitude)',
fontsize=14, fontweight='bold', pad=20)
plt.legend(loc='upper right', bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.savefig('network_structure.png', dpi=300, bbox_inches='tight')
plt.show()
# Example usage - Simple function approximation
if __name__ == "__main__":
# Create a simple 1D function approximation dataset
# We'll approximate y = 0.5 * sin(2πx) + 0.5
np.random.seed(42) # For reproducibility
# Generate more training data
X = np.random.uniform(0, 1, 100).reshape(-1, 1) # 100 random points between 0 and 1
y = 0.5 * np.sin(2 * np.pi * X) + 0.5 # Target function
# Create and train neural network with 2 hidden neurons and biases
nn = SimpleNeuralNetwork(input_size=1, hidden_size=2, output_size=1, learning_rate=0.1)
print("Training Neural Network on Function Approximation")
print("=" * 50)
print("Target function: y = 0.5 * sin(2πx) + 0.5")
print("Network architecture: 1 input → 2 hidden → 1 output (with biases)")
print(f"Training data: {len(X)} points")
print("=" * 50)
# Train the network for longer
losses = nn.train(X, y, epochs=20000)
# Test the network
print("\nFinal Results:")
print("=" * 50)
predictions = nn.forward(X)
mse = np.mean(np.square(y - predictions))
print(f"Final Mean Squared Error: {mse:.6f}")
print(f"Final W1: {nn.W1}")
print(f"Final b1: {nn.b1}")
print(f"Final W2: {nn.W2}")
print(f"Final b2: {nn.b2}")
# Show some example predictions
print("\nSample Predictions:")
for i in range(min(10, len(X))):
print(f"Input: {X[i][0]:.3f}, Target: {y[i][0]:.3f}, Prediction: {predictions[i][0]:.3f}")
# Visualize the function approximation
nn.plot_function_approximation(X, y)
# Visualize the network structure with connection strengths
nn.plot_network_structure()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment