import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, log_loss, accuracy_score
from sklearn.model_selection import train_test_split
import cv2
from PIL import Image
import random
from albumentations import *
from tqdm import tqdm
AUG = [ HorizontalFlip, ShiftScaleRotate, RandomRotate90, RandomBrightnessContrast,
Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, ]
print(len(AUG))
path0 = "./Animals/Leopard/pexels-photo-88234.jpeg"
image0c=cv2.imread(path0)
aug=AUG[0](p=1)
image2c=aug(image=image0c)['image']
plt.figure(figsize=(4,4))
plt.imshow(cv2.cvtColor(image2c,cv2.COLOR_BGR2RGB))
plt.show()
data_dir = './Animals/'
Name=[]
for file in os.listdir(data_dir):
Name+=[file]
print(Name)
print(len(Name))
N=[]
for i in range(len(Name)):
N+=[i]
normal_mapping=dict(zip(Name,N))
reverse_mapping=dict(zip(N,Name))
dataset=[]
datalabel=[]
count=0
for file in tqdm(os.listdir(data_dir)):
path=os.path.join(data_dir,file)
for im in os.listdir(path):
image0=load_img(os.path.join(path,im), grayscale=False, color_mode='rgb', target_size=(32,32))
image1=np.asarray(image0)
for i in range(len(AUG)):
aug=AUG[i](p=1)
image2=aug(image=image1)['image']
image3=image2/255.0
if image3.sum()>20:
dataset+=[image3]
datalabel+=[count]
count=count+1
m=len(dataset)
print(m)
M=[]
for i in range(m):
M+=[i]
random.seed(2021)
random.shuffle(M)
trainX=np.array(dataset)[M[0:(m//4)*3]]
testX=np.array(dataset)[M[(m//4)*3:]]
trainY=np.array(datalabel)[M[0:(m//4)*3]]
testY=np.array(datalabel)[M[(m//4)*3:]]
fig,axs = plt.subplots(6,6,figsize=(15,15))
for i in range(36):
image=dataset[M[i]]
r=i//6
c=i%6
axs[r][c].set_xticks([])
axs[r][c].set_yticks([])
ax=axs[r][c].imshow(image)
plt.show()
trainlabels1 = to_categorical(trainY)
trainlabels = np.array(trainlabels1)
trainx, testx, trainy, testy = train_test_split(trainX, trainlabels, test_size=0.2, random_state=44)
print(trainx.shape)
print(testx.shape)
print(trainy.shape)
print(testy.shape)
datagen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=20, zoom_range=0.2,
width_shift_range=0.2, height_shift_range=0.2, shear_range=0.1, fill_mode="nearest")
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
# Initialising the CNN classifier
classifier = Sequential()
# Add a Convolution layer with 32 kernels of 3X3 shape with activation function ReLU
classifier.add(Conv2D(32, (3, 3), input_shape = (32, 32, 3), activation = 'relu', padding = 'same'))
# Add a Max Pooling layer of size 2X2
classifier.add(MaxPooling2D(pool_size = (2, 2)))
# Add another Convolution layer with 32 kernels of 3X3 shape with activation function ReLU
classifier.add(Conv2D(32, (3, 3), activation = 'relu', padding = 'same'))
# Adding another pooling layer
classifier.add(MaxPooling2D(pool_size = (2, 2)))
# Add another Convolution layer with 32 kernels of 3X3 shape with activation function ReLU
classifier.add(Conv2D(32, (3, 3), activation = 'relu', padding = 'same'))
# Adding another pooling layer
classifier.add(MaxPooling2D(pool_size = (2, 2)))
# Flattening the layer before fully connected layers
classifier.add(Flatten())
# Adding a fully connected layer with 512 neurons
classifier.add(Dense(units = 512, activation = 'relu'))
# Adding dropout with probability 0.5
classifier.add(Dropout(0.5))
# Adding a fully connected layer with 128 neurons
classifier.add(Dense(units = 128, activation = 'relu'))
# The final output layer with 5 neuron to predict the categorical classifcation
classifier.add(Dense(units = 6, activation = 'softmax'))
classifier.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
classifier.fit_generator(datagen.flow(trainx, trainy, batch_size=32), epochs = 40)
# Predict the test set to get predictions
# Find out the predictions classes with maximum probability
y_pred = np.argmax((classifier.predict(datagen.flow(testx, batch_size=32))), axis=1)
# Utilities for confusion matrix
from sklearn.metrics import classification_report, confusion_matrix
# Printing the confusion matrix based on the actual data vs predicted data.
print(confusion_matrix(np.argmax(testy, axis=1), y_pred))
# Printing the classification report
print(classification_report(np.argmax(testy, axis=1), y_pred))
pretrained_model3 = tf.keras.applications.DenseNet201(input_shape=(32,32,3),
include_top=False,
weights='imagenet', pooling='avg')
pretrained_model3.trainable = False
inputs3 = pretrained_model3.input
x3 = tf.keras.layers.Dense(128, activation='relu')(pretrained_model3.output)
outputs3 = tf.keras.layers.Dense(6, activation='softmax')(x3)
model = tf.keras.Model(inputs=inputs3, outputs=outputs3)
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
his=model.fit(datagen.flow(trainx, trainy, batch_size=32),
validation_data=(testx,testy), epochs=40)
testlabels1 = to_categorical(testY)
testlabels = np.array(testlabels1)
y_pred=model.predict(testX)
pred=np.argmax(y_pred, axis=1)
ground=np.argmax(testlabels, axis=1)
print(classification_report(ground,pred))
accuracy=accuracy_score(testY, pred)
print('Test Accuracy:', accuracy)
get_acc = his.history['accuracy']
value_acc = his.history['val_accuracy']
get_loss = his.history['loss']
validation_loss = his.history['val_loss']
epochs = range(len(get_acc))
plt.plot(epochs, get_acc, 'r', label='Accuracy of Training data')
plt.plot(epochs, value_acc, 'b', label='Accuracy of Validation data')
plt.title('Training vs validation accuracy')
plt.legend(loc=0)
plt.figure()
plt.show()
epochs = range(len(get_loss))
plt.plot(epochs, get_loss, 'r', label='Loss of Training data')
plt.plot(epochs, validation_loss, 'b', label='Loss of Validation data')
plt.title('Training vs validation loss')
plt.legend(loc=0)
plt.figure()
plt.show()
path0 = "./Animals/Leopard/pexels-photo-88234.jpeg"
load_img(path0,target_size=(200,200))
image=load_img(path0,target_size=(32,32))
image=img_to_array(image)
image=image/255.0
prediction_image=np.expand_dims(image, axis=0)
print(prediction_image.shape)
prediction=model.predict(prediction_image)
value=np.argmax(prediction)
move_name=reverse_mapping[value]
print("Prediction is {}.".format(move_name))
trainX=np.array(dataset)[M[0:(m//4)*3]].reshape(1485, 32*32*3)
testX=np.array(dataset)[M[(m//4)*3:]].reshape(495, 32*32*3)
trainY=np.array(datalabel)[M[0:(m//4)*3]]
testY=np.array(datalabel)[M[(m//4)*3:]]
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils import check_random_state
from sklearn.preprocessing import LabelEncoder
def projection_simplex(v, z=1):
n_features = v.shape[0]
u = np.sort(v)[::-1]
cssv = np.cumsum(u) - z
ind = np.arange(n_features) + 1
cond = u - cssv / ind > 0
rho = ind[cond][-1]
theta = cssv[cond][-1] / float(rho)
w = np.maximum(v - theta, 0)
return w
class MulticlassSVM(BaseEstimator, ClassifierMixin):
def __init__(self, C=1, max_iter=50, tol=0.05,
random_state=None, verbose=0):
self.C = C
self.max_iter = max_iter
self.tol = tol,
self.random_state = random_state
self.verbose = verbose
def _partial_gradient(self, X, y, i):
# Partial gradient for the ith sample.
g = np.dot(X[i], self.coef_.T) + 1
g[y[i]] -= 1
return g
def _violation(self, g, y, i):
# Optimality violation for the ith sample.
smallest = np.inf
for k in range(g.shape[0]):
if k == y[i] and self.dual_coef_[k, i] >= self.C:
continue
elif k != y[i] and self.dual_coef_[k, i] >= 0:
continue
smallest = min(smallest, g[k])
return g.max() - smallest
def _solve_subproblem(self, g, y, norms, i):
# Prepare inputs to the projection.
Ci = np.zeros(g.shape[0])
Ci[y[i]] = self.C
beta_hat = norms[i] * (Ci - self.dual_coef_[:, i]) + g / norms[i]
z = self.C * norms[i]
# Compute projection onto the simplex.
beta = projection_simplex(beta_hat, z)
return Ci - self.dual_coef_[:, i] - beta / norms[i]
def fit(self, X, y):
n_samples, n_features = X.shape
# Normalize labels.
self._label_encoder = LabelEncoder()
y = self._label_encoder.fit_transform(y)
# Initialize primal and dual coefficients.
n_classes = len(self._label_encoder.classes_)
self.dual_coef_ = np.zeros((n_classes, n_samples), dtype=np.float64)
self.coef_ = np.zeros((n_classes, n_features))
# Pre-compute norms.
norms = np.sqrt(np.sum(X ** 2, axis=1))
# Shuffle sample indices.
rs = check_random_state(self.random_state)
ind = np.arange(n_samples)
rs.shuffle(ind)
violation_init = None
for it in range(self.max_iter):
violation_sum = 0
for ii in range(n_samples):
i = ind[ii]
# All-zero samples can be safely ignored.
if norms[i] == 0:
continue
g = self._partial_gradient(X, y, i)
v = self._violation(g, y, i)
violation_sum += v
if v < 1e-12:
continue
# Solve subproblem for the ith sample.
delta = self._solve_subproblem(g, y, norms, i)
# Update primal and dual coefficients.
self.coef_ += (delta * X[i][:, np.newaxis]).T
self.dual_coef_[:, i] += delta
if it == 0:
violation_init = violation_sum
vratio = violation_sum / violation_init
if self.verbose >= 1:
pass
# print("iter", it + 1, "violation", vratio)
if vratio < self.tol:
if self.verbose >= 1:
print("Converged")
break
return self
def predict(self, X):
decision = np.dot(X, self.coef_.T)
pred = decision.argmax(axis=1)
return self._label_encoder.inverse_transform(pred)
if __name__ == '__main__':
clf = MulticlassSVM(C=0.1, tol=0.01, max_iter=100, random_state=0, verbose=1)
clf.fit(trainX, trainY)
print(clf.score(testX, testY))
predictions = clf.predict(testX)
def accuracy(y_true, y_pred):
accuracy = np.sum(y_true==y_pred) / len(y_true)
return accuracy
print("SVM Accuracy: ", accuracy(testY, predictions))
print(classification_report(predictions, testY))
print(f"The model is {accuracy_score(predictions, testY)*100}% accurate")
confusion_matrix(predictions, testY)
# Calculate distance between two points
def minkowski_distance(a, b, p=1):
# Store the number of dimensions
dim = len(a)
# Set initial distance to 0
distance = 0
# Calculate minkowski distance using parameter p
for d in range(dim):
distance += abs(a[d] - b[d])**p
distance = distance**(1/p)
return distance
# Test the function
minkowski_distance(a=trainX[0], b=trainX[1], p=1)
X_train = pd.DataFrame(trainX)
y_train = pd.Series(trainY)
X_test = pd.DataFrame(testX)
y_test = pd.Series(testY)
%%time
def knn_predict(X_train, X_test, y_train, y_test, k, p):
# print(y_train.index)
# Counter to help with label voting
from collections import Counter
# Make predictions on the test data
# Need output of 1 prediction per test data point
y_hat_test = []
for test_point in X_test.values:
distances = []
for train_point in X_train.values:
distance = minkowski_distance(test_point, train_point, p=p)
distances.append(distance)
# Store distances in a dataframe
df_dists = pd.DataFrame(data=distances, columns=['dist'],
index=y_train.index)
# Sort distances, and only consider the k closest points
df_nn = df_dists.sort_values(by=['dist'], axis=0)[:k]
# print(df_nn)
# Create counter object to track the labels of k closest neighbors
counter = Counter(y_train[df_nn.index])
# Get most common label of all the nearest neighbors
prediction = counter.most_common()[0][0]
# Append prediction to output list
y_hat_test.append(prediction)
return y_hat_test
# Make predictions on test dataset
y_hat_test = knn_predict(X_train, X_test, y_train, y_test, k=5, p=1)
print(y_hat_test)
print(classification_report(y_hat_test, y_test))
print(f"The model is {accuracy_score(y_hat_test, y_test)*100}% accurate")
confusion_matrix(y_hat_test, y_test)