import numpy as np
import pandas as pd
data = np.loadtxt("data/logisticRegressionCircular.txt",delimiter = ",")
x = data[:,:2]
y = data[:,2]
# 2D PLOT
import matplotlib.pyplot as plt
%matplotlib inline
plt.clf() # clear the figure
positive = y == 1
negative = y == 0
plt.plot(x[positive, 0], x[positive, 1], '.', label="Positive")
plt.plot(x[negative, 0], x[negative, 1], '.', label="Negative")
plt.ylabel('Test 2')
plt.xlabel('Test 1')
plt.legend(loc="upper right")
plt.show()
'''
#alternate
# x_values_transformed is going to have one extra column which is the bias. You can remove it manually
bias -> first column (default - 1)
(or)
setting 'include_bias' = False
poly = PolynomialFeatures(degree=6,include_bias=False)
-------
from sklearn.preprocessing import PolynomialFeatures
#transforming the features using polynomial features
poly = PolynomialFeatures(degree=6)
x_values_transformed = poly.fit_transform(x)
#get output feature names for transformation
print(poly.get_feature_names_out(input_features=None))
--
for example, for degree = 4 :
x0 -> feature 1, x1 -> feature 2, bias -> include_bias=False (no bias)
['x0' 'x1' 'x0^2' 'x0 x1' 'x1^2' 'x0^3' 'x0^2 x1' 'x0 x1^2' 'x1^3' 'x0^4'
'x0^3 x1' 'x0^2 x1^2' 'x0 x1^3' 'x1^4']
--
print(x_values_transformed.shape)
'''
def map_feature(x1, x2, degree=6):
x1 = np.atleast_1d(x1)
x2 = np.atleast_1d(x2)
output = []
for i in range(1, degree+1):
for j in range(i + 1):
output.append((x1**(i-j) * (x2**j)))
return np.stack(output, axis=1)
mapped_x = map_feature(x[:, 0], x[:, 1],6)
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras import Sequential
from tensorflow.keras.losses import MeanSquaredError, BinaryCrossentropy
from tensorflow.keras.activations import sigmoid
print("Before Normalization: (max, min)")
print(f"x1: ({np.max(mapped_x[:,0]):0.2f}, {np.min(mapped_x[:,0]):0.2f})")
print(f"x2: ({np.max(mapped_x[:,1]):0.2f}, {np.min(mapped_x[:,1]):0.2f})\n")
#axis = 1 -> normalize feature-wise
norm_layer = tf.keras.layers.Normalization(axis=1)
norm_layer.adapt(mapped_x) # learns mean, variance
x_normalized = norm_layer(mapped_x) #passing the data x through the normalized layer
print("After Normalization: (max, min)")
print(f"x1: ({np.max(x_normalized[:,0]):0.2f}, {np.min(x_normalized[:,0]):0.2f})")
print(f"x2: ({np.max(x_normalized[:,1]):0.2f}, {np.min(x_normalized[:,1]):0.2f})")
Before Normalization: (max, min) x1: (1.07, -0.83) x2: (1.11, -0.77) After Normalization: (max, min) x1: (2.05, -1.79) x2: (1.79, -1.84)
#repeats an array along specified dimensions to create repetitions.
"""
tiling our data to replicate our data,
thereby increasing the sample size for better model training.
increasing epochs can help your model learn better but only up to a point, beyond which overfitting might occur. Data augmentation,
such as tiling, helps improve model generalization.
"""
print(f"Before tiling:\nx_normalized: {x_normalized.shape}, y:{y.shape}\n")
Xt = np.tile(x_normalized,(1000,1))
Yt= np.tile(y.reshape(-1,1),(1000,1))
print(f"After tiling:\nXt:{Xt.shape}, Yt:{Yt.shape}")
Before tiling: x_normalized: (118, 27), y:(118,) After tiling: Xt:(118000, 27), Yt:(118000, 1)
# Set seed for reproducibility, ensuring "random" numbers are predictable
tf.random.set_seed(15)
model = Sequential(
[
# Input layer to specify expected shape of the input data, (27,) in this case
# Input: array of 27 No.s (after mapped)
tf.keras.Input(shape=(27,)),
Dense(3, activation='sigmoid', name = 'layer1'),
Dense(1, activation='sigmoid', name = 'layer2')
]
)
'''
Each Parameter: 32 bit (Single Precision Floating Points) -> 4 bytes
Size: 4 bytes * 88 = 352 Bytes
Input Shape: (27,)
layer1:
3 neurons
27 * 3 = 81 weights
Each Neuron has a bias : 3 * 1 = 3 biases
81 + 3 = 84 parameters
layer2:
activation vector from layer1: (3,)
1 neuron
3 * 1 = 3 weights
Each Neuron has a bias : 1 * 1 = 1 bias
3 + 1 = 4 parameters
TOTAL: 84 parameters + 4 parameters = 88 parameters
'''
model.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= layer1 (Dense) (None, 3) 84 layer2 (Dense) (None, 1) 4 ================================================================= Total params: 88 (352.00 Byte) Trainable params: 88 (352.00 Byte) Non-trainable params: 0 (0.00 Byte) _________________________________________________________________
'''
Weights: (no. of features in input, no. of neurons in the layer)
Bias: no. of neurons in a layer
layer1: 3 neurons, 2 inputs -> Weights: (2,3)
layer2: 1 neuron, 3 inputs -> Weights: (3,1)
'''
W1, b1 = model.get_layer("layer1").get_weights()
W2, b2 = model.get_layer("layer2").get_weights()
print(f"W1 {W1.shape}:\n", W1, f"\nb1{b1.shape}:", b1,"\n")
print(f"W2 {W2.shape}:\n", W2, f"\nb2{b2.shape}:", b2)
W1 (27, 3): [[ 0.41747773 -0.29348433 -0.00217053] [-0.10377398 -0.04383686 -0.19907388] [ 0.40744656 0.28326827 -0.27052522] [ 0.1958254 0.07550794 0.18662721] [-0.17590362 0.2239291 0.19624811] [-0.25595728 -0.09940261 0.0811345 ] [-0.28222048 -0.20608516 0.41728646] [-0.381807 0.37945116 0.24747723] [-0.28109047 0.33268392 -0.24450244] [ 0.29655534 0.25481224 0.09819913] [-0.18075502 0.06103289 0.34142506] [ 0.07974988 0.18387204 -0.30261147] [-0.0074124 0.20929837 0.37018436] [-0.36259004 -0.42179698 0.16162175] [ 0.24684155 -0.09132114 -0.04898393] [ 0.14155704 -0.27164018 -0.35065252] [ 0.43898958 -0.1588648 0.37693393] [ 0.04230395 -0.25392213 0.15354937] [ 0.26376003 -0.22942868 0.3585071 ] [ 0.31141937 0.41472197 0.4270959 ] [ 0.10709476 0.40090126 -0.14387739] [ 0.40508664 0.31219876 0.35890478] [ 0.11861646 0.01079676 0.09644186] [ 0.41760898 -0.15878642 0.282867 ] [ 0.12619841 -0.3383884 -0.20805845] [-0.27858534 0.03080881 0.40079558] [ 0.0346655 0.03375065 0.03876883]] b1(3,): [0. 0. 0.] W2 (3, 1): [[ 0.16493034] [-1.023937 ] [ 0.6455544 ]] b2(1,): [0.]
#batch-size
import math
#default batch-size in keras -> 32 rows
math.ceil(Xt.shape[0]/32)
3688
#Apple Silicon: optimizers.legacy.Adam
#Base: optimizers.Adam
#BinaryCrossentropy() -> BCE -> Logistic Loss Func -> -ylog(y') - (1-y)log(1-y')
model.compile(
loss = tf.keras.losses.BinaryCrossentropy(),
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=0.3),
)
model.fit(
Xt,Yt,
epochs=10,
)
Epoch 1/10 3688/3688 [==============================] - 2s 405us/step - loss: 0.2544 Epoch 2/10 3688/3688 [==============================] - 1s 392us/step - loss: 0.2781 Epoch 3/10 3688/3688 [==============================] - 2s 407us/step - loss: 0.2589 Epoch 4/10 3688/3688 [==============================] - 2s 420us/step - loss: 0.3021 Epoch 5/10 3688/3688 [==============================] - 2s 424us/step - loss: 0.2724 Epoch 6/10 3688/3688 [==============================] - 1s 399us/step - loss: 0.2767 Epoch 7/10 3688/3688 [==============================] - 2s 417us/step - loss: 0.2764 Epoch 8/10 3688/3688 [==============================] - 2s 411us/step - loss: 0.2808 Epoch 9/10 3688/3688 [==============================] - 1s 395us/step - loss: 0.2768 Epoch 10/10 3688/3688 [==============================] - 1s 397us/step - loss: 0.2630
<keras.src.callbacks.History at 0x1197bd280>
W1, b1 = model.get_layer("layer1").get_weights()
W2, b2 = model.get_layer("layer2").get_weights()
print(f"W1 {W1.shape}:\n", W1, f"\nb1{b1.shape}:", b1,"\n")
print(f"W2 {W2.shape}:\n", W2, f"\nb2{b2.shape}:", b2)
W1 (27, 3): [[ 15.328138 -71.234825 69.27692 ] [ 29.92365 -42.74193 43.84616 ] [ -34.141407 -4.707564 -2.0779054] [ -20.260534 63.126026 -116.036514 ] [ -19.381226 62.671906 -18.136982 ] [ 16.604284 -28.91536 -5.725134 ] [ 9.761409 -50.757298 106.47544 ] [ 14.553898 6.5405817 56.862183 ] [ 13.211567 -27.9615 -1.7369088] [ -30.68112 23.88575 -13.655448 ] [ -23.493431 52.256927 -36.96087 ] [ -28.838383 1.6247978 -23.153069 ] [ 2.6439533 -10.462127 -65.59232 ] [ -22.37037 55.297802 -27.925352 ] [ 3.7950525 29.557556 -26.813478 ] [ 20.135395 9.016884 62.975525 ] [ 34.019814 -60.34401 18.324793 ] [ -31.301668 -11.44731 -6.566687 ] [ -4.703855 61.010284 -32.839474 ] [ 4.4943604 -19.30526 -13.230817 ] [ -25.752823 51.724663 -30.334648 ] [ -22.549353 67.37171 -17.016308 ] [ -18.415895 22.12822 -12.431512 ] [ 20.966661 -56.426502 11.210673 ] [ -26.595062 74.7458 -34.644035 ] [ -11.136519 -9.808624 -55.05441 ] [ -30.443558 30.013107 -32.70444 ]] b1(3,): [-3.6942687 2.395449 10.05173 ] W2 (3, 1): [[ 3.4375792] [-2.4956472] [ 2.3688898]] b2(1,): [-2.4697497]
x_test = np.array([[0.051267,0.69956]])
# mapping the features
mapped_x_test = map_feature(x_test[:, 0], x_test[:, 1],6)
# normalizing the features
x_test_normalized = norm_layer(mapped_x_test)
print(x_test_normalized)
# reshaping using tf.reshape if necessary (the model expects input to have a certain shape)
x_test_normalized = tf.reshape(x_test_normalized, (1, -1))
# getting the prediction
y_pred = model.predict(x_test_normalized)
print(f"The model prediction for {x_test} is {y_pred[0][0]}")
tf.Tensor( [[-0.0071013 0.99791807 -0.98977673 0.274898 0.6635931 -0.21829255 -0.21502961 0.06425668 0.6158709 -0.5880131 0.05549109 -0.7235762 0.24604157 0.2439574 -0.24294192 -0.16406117 -0.17240581 -0.29825962 0.08229139 0.17403954 -0.40598637 0.01211625 -0.5542344 0.04696623 -0.50738925 0.16502519 -0.02886213]], shape=(1, 27), dtype=float32) 1/1 [==============================] - 0s 35ms/step The model prediction for [[0.051267 0.69956 ]] is 0.9656660556793213
'''
#alternate (not recommended)
array = np.array([])
for m in range(x_normalized.shape[0]):
y_pred = model.predict(np.reshape(x_normalized[m], (1, -1)))
array = np.append(array, y_pred)
#reshape array to match the shape of y
array = array.reshape(-1, 1)
'''
array = model.predict(x_normalized)
#thresholding the probabilities
array_predictions = (array >= 0.5).astype(int)
#calculating accuracy
accuracy = np.mean(array_predictions == y.reshape(-1, 1)) * 100
print('Accuracy: %f' % accuracy)
4/4 [==============================] - 0s 552us/step Accuracy: 88.983051
from tensorflow.keras.models import Model
# creating a new model that's identical to the original model up to the first layer
intermediate_model = Model(inputs=model.input, outputs=model.get_layer("layer1").output)
# for a given input x_normalized, activations of the first layer
first_layer_activations = intermediate_model.predict(x_normalized)
4/4 [==============================] - 0s 600us/step
import matplotlib.pyplot as plt
# plotting the activations of each neuron
for i in range(first_layer_activations.shape[1]):
plt.plot(first_layer_activations[:, i])
plt.title('Activations of neurons in the first layer')
plt.xlabel('Sample')
plt.ylabel('Activation')
#plt.legend(['Neuron 1', 'Neuron 2', 'Neuron 3'], loc='upper right')
plt.show()
from keras.models import Model
# intermediate model that outputs the activations of the second layer
intermediate_model = Model(inputs=model.input, outputs=model.get_layer("layer2").output)
intermediate_output = intermediate_model.predict(x_normalized)
plt.plot(intermediate_output)
plt.show()
4/4 [==============================] - 0s 507us/step
#implementing simple neural network
'''
if you multiply sigmoid(x) by e^x/e^x
sigmoid(x) = 1 / (1 + np.exp(-x)) = exp(x) / (exp(x) + 1);
- to avoid exponent overflow in some cases (clipping is more ideal)
'''
def sigmoid(x):
x = np.clip(x, -50, 50)
# clipping the values to be within the range of -50 to 50 to avoid overflow
return 1 / (1 + np.exp(-x))
def dense_layer(activation_input, w_values,b_values):
"""
activation_input: (n,) -> reshape to (1,n)
w_values: (n,m) -> n features per neuron, m neurons
b_values: (m,) -> bias vector of m units -> reshape to (1,m)
activation_output: (1,m)
activation_input @ w_values -> (1,n) X (n,m) = (1,m)
"""
activation_output = sigmoid(activation_input.reshape(1,-1) @ w_values + b_values.reshape(1,-1))
return(activation_output)
def sequential_model(X, W1, b1, w2, b2):
a1 = dense_layer(X, W1, b1)
a2 = dense_layer(a1, W2, b2)
return(a2)
def model_predict(X, W1, b1, W2, b2):
m = X.shape[0] # input = (27,)
p = np.zeros(m)
for i in range(m):
p[i] = sequential_model(X[i], W1, b1, W2, b2)[0,0]
return(p)
x_test = np.array([[0.051267,0.69956]])
# mapping the features
mapped_x_test = map_feature(x_test[:, 0], x_test[:, 1],6)
# normalizing the features
x_test_normalized = norm_layer(mapped_x_test)
print(x_test_normalized)
print(x_test_normalized.shape)
prediction = model_predict(np.array(x_test_normalized),W1,b1,W2,b2)
tf.Tensor( [[-0.0071013 0.99791807 -0.98977673 0.274898 0.6635931 -0.21829255 -0.21502961 0.06425668 0.6158709 -0.5880131 0.05549109 -0.7235762 0.24604157 0.2439574 -0.24294192 -0.16406117 -0.17240581 -0.29825962 0.08229139 0.17403954 -0.40598637 0.01211625 -0.5542344 0.04696623 -0.50738925 0.16502519 -0.02886213]], shape=(1, 27), dtype=float32) (1, 27)
dataset_pred = model_predict(np.array(x_normalized),W1,b1,W2,b2)
print(x_normalized.shape)
print(W1.shape)
print(dataset_pred.shape)
(118, 27) (27, 3) (118,)
dataset_accuracy = np.mean(dataset_pred == y.reshape(-1, 1)) * 100
print('Accuracy: %f' % accuracy)
Accuracy: 88.983051
x1_min, x1_max = np.min(mapped_x[:,0]) - 0.5, np.max(mapped_x[:,0]) + 0.5
x2_min, x2_max = np.min(mapped_x[:,1]) - 0.5, np.max(mapped_x[:,1]) + 0.5
x1_values = np.linspace(x1_min, x1_max, 200)
x2_values = np.linspace(x2_min, x2_max, 200)
#meshgrid
x1, x2 = np.meshgrid(x1_values, x2_values)
# flattening x1 and x2 to create X_test array
X_test = np.column_stack((x1.ravel(), x2.ravel()))
# transforming X_test using map_feature function and the normalization layer
mapped_X_test = map_feature(X_test[:, 0], X_test[:, 1], 6)
normalized_X_test = norm_layer(mapped_X_test)
# using the model to predict the class of each point in the grid
Y_pred = model.predict(normalized_X_test)
# reshaping Y_pred to match the shape of the x1 and x2
Y_pred = Y_pred.reshape(x1.shape)
# Create a contour plot to show the decision boundary
plt.figure(figsize=(10, 8))
plt.contour(x1, x2, Y_pred, levels=[0.5], colors='b') # decision boundary at p = 0.5
plt.scatter(x[y == 0, 0], x[y == 0, 1], marker='o', c='r', label="0")
plt.scatter(x[y == 1, 0], x[y == 1, 1], marker='*', c='black', label="1")
plt.xlabel('X1')
plt.ylabel('X2')
plt.legend()
plt.title('Decision Boundary')
plt.show()
1250/1250 [==============================] - 0s 309us/step
'''
exploring meshgrid and ravel
'''
va1s = np.linspace(1, 10, 4)
va2s = np.linspace(1, 10, 4)
v1, v2 = np.meshgrid(va1s, va2s)
print("v1:",v1,"v2:",v2,sep='\n')
print("\nv1 ravel:",v1.ravel())
print("v2 ravel:",v2.ravel())
lmno = np.column_stack((v1.ravel(),v2.ravel()))
print("\ncolumn stack:\n",lmno)
v1: [[ 1. 4. 7. 10.] [ 1. 4. 7. 10.] [ 1. 4. 7. 10.] [ 1. 4. 7. 10.]] v2: [[ 1. 1. 1. 1.] [ 4. 4. 4. 4.] [ 7. 7. 7. 7.] [10. 10. 10. 10.]] v1 ravel: [ 1. 4. 7. 10. 1. 4. 7. 10. 1. 4. 7. 10. 1. 4. 7. 10.] v2 ravel: [ 1. 1. 1. 1. 4. 4. 4. 4. 7. 7. 7. 7. 10. 10. 10. 10.] column stack: [[ 1. 1.] [ 4. 1.] [ 7. 1.] [10. 1.] [ 1. 4.] [ 4. 4.] [ 7. 4.] [10. 4.] [ 1. 7.] [ 4. 7.] [ 7. 7.] [10. 7.] [ 1. 10.] [ 4. 10.] [ 7. 10.] [10. 10.]]
%matplotlib widget
'''
!pip install ipywidgets
!pip install ipympl
'''
#3D Plot
from mpl_toolkits.mplot3d import Axes3D
from ipywidgets import interact
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(x1, x2, Y_pred, color='b', alpha=0.6)
ax.set_xlabel('x1')
ax.set_ylabel('x2')
ax.set_zlabel('Probability')
def update(elev=30, azim=120):
ax.view_init(elev, azim)
plt.draw()
interact(update, elev=(0, 90, 10), azim=(0, 360, 10))
interactive(children=(IntSlider(value=30, description='elev', max=90, step=10), IntSlider(value=120, descripti…
<function __main__.update(elev=30, azim=120)>
#updated offline
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)
fig = go.Figure(data=[go.Surface(x=x1, y=x2, z=Y_pred, colorscale='Viridis')])
fig.update_layout(
title='3D Decision Probability',
autosize=False,
width=700,
height=700,
scene=dict(
xaxis_title='x1',
yaxis_title='x2',
zaxis_title='Probability',
),
margin=dict(l=65, r=50, b=65, t=90)
)
iplot(fig)