from google.colab import drive
drive.mount('/content/drive')
# !cp -r /content/drive/MyDrive/classification2/ /content/drive/MyDrive/classification/
#!rm -rf '/content/drive/MyDrive/classification'
# !mkdir '/content/drive/MyDrive/classification'
%cd '/content/drive/MyDrive/classification'
# downloading the dataset from Kaggle
!pip install kaggle
# !mkdir .kaggle
# !touch .kaggle/kaggle.json
!mv .kaggle /root/
!mkdir ~/.kaggle
# !touch ~/.kaggle/kaggle.json
!mv kaggle.json ~/.kaggle
# downloading the dataset from Kaggle
#!pip install kaggle
#!mkdir .kaggle
#!touch .kaggle/kaggle.json
#!mv .kaggle /root/
#!mkdir ~/.kaggle
#!touch ~/.kaggle/kaggle.json
#!kaggle datasets download -d maricinnamon/caltech101-airplanes-motorbikes-schooners
# downloading the dataset from Kaggle
!kaggle datasets download -d maricinnamon/caltech101-airplanes-motorbikes-schooners
# # once the zip file is downloaded, extracting the contents inside it
# images_path = '/content/drive/MyDrive/classification/caltech101-airplanes-motorbikes-schooners.zip'
# from zipfile import ZipFile
# with ZipFile(images_path, 'r') as zip:
# zip.extractall()
%cd /content/drive/MyDrive/classification/
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
# Initialising the CNN classifier
classifier = Sequential()
# Add a Convolution layer with 32 kernels of 3X3 shape with activation function ReLU
classifier.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), activation = 'relu', padding = 'same'))
# Add a Max Pooling layer of size 2X2
classifier.add(MaxPooling2D(pool_size = (2, 2)))
# Add another Convolution layer with 32 kernels of 3X3 shape with activation function ReLU
classifier.add(Conv2D(32, (3, 3), activation = 'relu', padding = 'same'))
# Adding another pooling layer
classifier.add(MaxPooling2D(pool_size = (2, 2)))
# Add another Convolution layer with 32 kernels of 3X3 shape with activation function ReLU
classifier.add(Conv2D(32, (3, 3), activation = 'relu', padding = 'same'))
# Adding another pooling layer
classifier.add(MaxPooling2D(pool_size = (2, 2)))
# Flattening the layer before fully connected layers
classifier.add(Flatten())
# Adding a fully connected layer with 512 neurons
classifier.add(Dense(units = 512, activation = 'relu'))
# Adding dropout with probability 0.5
classifier.add(Dropout(0.5))
# Adding a fully connected layer with 128 neurons
classifier.add(Dense(units = 128, activation = 'relu'))
# The final output layer with 3 neurons to predict the categorical classifcation
classifier.add(Dense(units = 3, activation = 'softmax'))
# compile the model created above
opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.001, amsgrad=False)
classifier.compile(optimizer = opt, loss = 'categorical_crossentropy', metrics = ['accuracy'])
ImageDataGenerator is a powerful preprocessing utility to generate training and
testing data with common data augmentation techniques. It can also be used to
generate training data from Images stored in hierarchical directory structures
For more options of ImageDataGenerator go to https://keras.io/preprocessing/image/
# import os
# import random
# import shutil
# # splitting the dataset into train, test
# source = '/content/drive/MyDrive/classification/train_test_split/train/schooner/'
# dest = '/content/drive/MyDrive/classification/train_test_split/test/schooner/'
# files = os.listdir(source)
# no_of_files = len(files) // 5
# print(no_of_files)
# for file_name in random.sample(files, no_of_files):
# shutil.move(os.path.join(source, file_name), dest)
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# Create data generator for training data with data augmentation and normalizing all
# values by 255
train_datagen = ImageDataGenerator(rescale = 1./255,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True)
test_datagen = ImageDataGenerator(rescale = 1./255)
# Setting training data generator's source directory
# Setting the target size to resize all the images to (64,64) as the model input layer expects 32X32 images
training_set = train_datagen.flow_from_directory('./train_test_split/train',
target_size = (64, 64),
batch_size = 32,
class_mode = 'categorical')
# Setting testing data generator's source directory
test_set = test_datagen.flow_from_directory('./train_test_split/test',
target_size = (64, 64),
batch_size = 32,
class_mode = 'categorical')
# There are 1338 training images and 333 test images in total
history = classifier.fit_generator(training_set,
steps_per_epoch = int(training_set.samples//32) ,
epochs = 20,
validation_data = test_set,
validation_steps = int(test_set.samples//32))
Always save the model and its weights after training
classifier.save('./classifier.h5')
classifier.save_weights('./classifier_weights.h5')
Visualize the metrics produced by the model trained
import matplotlib.pyplot as plt
plt.style.use("ggplot")
plt.figure(figsize=(17, 10))
# plot the loss for both the training and validation data
plt.title("Loss & Val Loss")
plt.xlabel("Epoch â„–")
plt.ylabel("Loss")
plt.plot(history.epoch, history.history["loss"], label="loss")
plt.plot(history.epoch, history.history["val_loss"], label="val_loss")
plt.legend()
plt.show()
# create a new figure for the accuracies
import matplotlib.pyplot as plt
# N = len(history.epoch)
plt.style.use("ggplot")
plt.figure(figsize=(17, 10))
plt.plot(history.epoch, history.history["accuracy"], label="acc")
plt.plot(history.epoch, history.history["val_accuracy"], label="val_acc")
plt.title("Accuracy & Val Accuracy")
plt.xlabel("Epoch â„–")
plt.ylabel("Accuracy")
plt.legend()
Load the pre-trained saved model
from tensorflow.keras.models import load_model
import numpy as np
from tensorflow.keras.preprocessing import image
# Load the pre trained model from the HDF5 file saved previously
pretrained_model = load_model('./classifier.h5')
pretrained_model.load_weights('./classifier_weights.h5')
Testing the model on a test image from one of the test folders
import cv2
test_image = cv2.imread('./train_test_split/test/airplanes/image_0776.jpg')
# Resize the image to 64X64 shape to be compatible with the model
test_image = cv2.resize(test_image,(64,64))
# Check if the size of the Image array is compatible with Keras model
print(test_image.shape)
# If not compatible expand the dimensions to match with the Keras Input
test_image = np.expand_dims(test_image, axis = 0)
test_image =test_image*1/255.0
#Check the size of the Image array again
print('After expand_dims: '+ str(test_image.shape))
#Predict the result of the test image
result = classifier.predict(test_image)
# Check the indices Image Data Generator has allotted to each folder
classes_dict = training_set.class_indices
print(classes_dict)
# Creating a list of classes in test set for showing the result as the folder name
prediction_class = []
for class_name,index in classes_dict.items():
prediction_class.append(class_name)
print(result[0])
# Index of the class with maximum probability
predicted_index = np.argmax(result[0])
# Print the name of the class
print(prediction_class[predicted_index])
Generating a report on the test data
# Re-initalizing the test data generator with shuffle=False to create the confusion matrix
test_set = test_datagen.flow_from_directory('./train_test_split/test/',
target_size = (64, 64),
batch_size = 32,
shuffle=False,
class_mode = 'categorical')
# Predict the whole generator to get predictions
Y_pred = classifier.predict_generator(test_set, int(333/32+1))
# Find out the predictions classes with maximum probability
y_pred = np.argmax(Y_pred, axis=1)
# Utilities for confusion matrix
from sklearn.metrics import classification_report, confusion_matrix
# Printing the confusion matrix based on the actual data vs predicted data.
print(confusion_matrix(test_set.classes, y_pred))
# Printing the classification report
print(classification_report(test_set.classes, y_pred, target_names=prediction_class))
#
import imutils
import os
import cv2
import datetime
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.models import load_model
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.preprocessing import LabelBinarizer
import pickle
These variables will store out input data, target labels and also names of image files.
data = []
labels = []
imagePaths = []
Annotation file has classes (names of folders).
images_path = "/content/drive/MyDrive/classification/caltech101_classification"
classes = ["Motorbikes", "airplanes", "schooner"]
# counts number of images in each class
def classes_counter(labels, class_name):
counter = 0
for l in labels:
if l == class_name:
counter += 1
return counter
for cl in classes:
images_list = []
path_new = images_path + "/" + cl + "/"
print(path_new)
# get the list of the available images
for image in os.listdir(path_new):
# get only images that are located in folder
if (image.endswith(".jpg")):
images_list.append(image)
# sort image_path in ascending order
images_list = sorted(images_list)
# loop over the images
for img in images_list:
label = cl
image_path = os.path.sep.join([images_path, cl, img])
image = cv2.imread(image_path)
(h, w) = image.shape[:2]
# load the image
image = load_img(image_path, target_size=(224, 224))
image = img_to_array(image)
data.append(image)
labels.append(label)
imagePaths.append(image_path)
# show the output image
imgplot = plt.imshow(image.astype('uint8'))
plt.show()
Let's check how many images are in each class.
counter_mtb = classes_counter(labels, "Motorbikes")
counter_arp = classes_counter(labels, "airplanes")
counter_sch = classes_counter(labels, "schooner")
counter_mtb, counter_arp, counter_sch
Here we get the maximum value of number of images.
max_number = max(counter_mtb, counter_arp, counter_sch)
max_number
As we can see, we don't have so much shooners, so we need to augment them. Also, I think, we'll create two more pictures of motorbikes in order to have also 800 pics. We'll do scaling and rotating.
def make_scale(img):
# scale range
scale_val = random.uniform(0.8, 1.2)
imgScaled = cv2.resize(img.copy(),
None,
fx=scale_val,
fy=scale_val)
return imgScaled
def make_rotate(img):
(h, w) = img.shape[:2]
# degrees range
rotate_val = random.uniform(-5, 5)
# image center
center = (w / 2, h / 2)
# Rotation Matrix
M = cv2.getRotationMatrix2D(center,
rotate_val,
scale=1)
imgRotated = cv2.warpAffine(img.copy(),
M,
(w, h))
return imgRotated
Also, we need to check the number of images in each class in order to equalize number of images in each class. That's why we'll do an augmentation.
def augment_data(counter, max_number, class_name):
# while we don't have a lot of images
while counter < max_number:
# loop through each image in list
for img in data:
# check the number of images again
if counter < max_number:
# make scaling
imgAug = img.copy()
imgAug = make_scale(imgAug)
# temporary save the new image
cv2.imwrite("imgAug.jpg", imgAug)
# load the new image
imgAug = load_img("imgAug.jpg", target_size=(224, 224))
imgAug = img_to_array(imgAug)
# delete it from memory
os.remove("imgAug.jpg")
# add new image, it's label and path
data.append(imgAug)
labels.append(class_name)
imagePaths.append(image_path)
# recalculate a counter
counter = classes_counter(labels, class_name)
else:
break
# make rotating
if counter < max_number:
imgAug = img.copy()
imgAug = make_rotate(imgAug)
# temporary save the new image
cv2.imwrite("imgAug.jpg", imgAug)
# load the new image
imgAug = load_img("imgAug.jpg", target_size=(224, 224))
imgAug = img_to_array(imgAug)
# delete it from memory
os.remove("imgAug.jpg")
# add new image and it's label and path
data.append(imgAug)
labels.append(class_name)
imagePaths.append(image_path)
# recalculate a counter
counter = classes_counter(labels, class_name)
else:
break
Let's apply the augmentation to "Motorbikes" and "schooner" classes.
augment_data(counter_mtb, max_number, "Motorbikes")
augment_data(counter_sch, max_number, "schooner")
Let's check how many images are in each class after augmentation.
counter_mtb = classes_counter(labels, "Motorbikes")
counter_arp = classes_counter(labels, "airplanes")
counter_sch = classes_counter(labels, "schooner")
counter_mtb, counter_arp, counter_sch
As you can see, now we have an equal number of images in each class.
Also, we need to normalize data (convert from range [0, 255] to [0, 1]).
# convert from the range [0, 255] to [0, 1]
data = np.array(data, dtype="float32") / 255.0
Let's convert everything else to numpy arrays also.
# convert to numpy array
labels = np.array(labels)
imagePaths = np.array(imagePaths)
After that we convert our class labels to [one-hot encoding].
# one-hot encoding on the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
labels
And, in general, we need to check: if it is binary classification (two classes) or multiclass classification (three or more classes).
if len(lb.classes_) == 2:
print("two classes")
labels = to_categorical(labels)
Here we divide data to train and test sets. I decided to divide into 95% to 5% respectively.
split = train_test_split(data,
labels,
imagePaths,
test_size=0.05,
random_state=42)
And unpack split variable to different variables.
# unpack the data split
(trainImages, testImages) = split[:2]
(trainLabels, testLabels) = split[2:4]
(trainPaths, testPaths) = split[4:]
Also, we can save names of test images in a .txt file* in order to test neural network later.
f = open("testing_multiclass.txt", "w")
f.write("\n".join(testPaths))
f.close()
Here we'll use VGG16 neural network.
vgg = VGG16(weights="imagenet",
include_top=False,
input_tensor=Input(shape=(224, 224, 3)))
# freeze all layers of VGG in order not to train them
vgg.trainable = False
# flatten the max-pooling output of VGG
flatten = vgg.output
flatten = Flatten()(flatten)
And for class prediction (classification task) we'll use a softmax activation function.
# construct a second fully-connected layer header to predict the class label
softmaxHead = Dense(512, activation="relu")(flatten)
softmaxHead = Dropout(0.5)(softmaxHead)
softmaxHead = Dense(512, activation="relu")(softmaxHead)
softmaxHead = Dropout(0.5)(softmaxHead)
softmaxHead = Dense(len(lb.classes_),
activation="softmax",
name="class_label")(softmaxHead)
Finally, we need to add this output to our VGG16 model.
model = Model(
inputs=vgg.input,
outputs=(softmaxHead))
Also, we need to define some hyperparameters (learning rate, number of epochs, size of batch).
INIT_LR = 1e-4
NUM_EPOCHS = 40
BATCH_SIZE = 32
Then we define a dictionary to set the loss method: categorical crossentropy for the class label.
losses = {
"class_label": "categorical_crossentropy",
}
We need to construct a dictionary for our target training output.
trainTargets = {
"class_label": trainLabels,
}
We need to construct a second dictionary, this one for our target testing output.
testTargets = {
"class_label": testLabels,
}
Also, we would like to save only the best model from all epochs:
model_path = "model.h5"
model_checkpoint_callback = ModelCheckpoint(
filepath=model_path,
monitor='val_accuracy',
mode='max',
save_best_only=True)
In the end, we initialize the optimizer, compile the model, and show the model summary.
opt = Adam(INIT_LR)
model.compile(loss=losses,
optimizer=opt,
metrics=["accuracy"])
print(model.summary())
Here we train our VGG16 network for class label prediction.
H = model.fit(
trainImages, trainTargets,
validation_data=(testImages, testTargets),
batch_size=BATCH_SIZE,
epochs=NUM_EPOCHS,
callbacks=[model_checkpoint_callback],
verbose=1)
f = open("lb.pickle", "wb")
f.write(pickle.dumps(lb))
f.close()
Here we'll visualize loss and accuracy.
lossNames = ["loss"]
N = np.arange(0, NUM_EPOCHS)
plt.style.use("ggplot")
plt.figure(figsize=(17, 10))
# plot the loss for both the training and validation data
plt.title("Loss & Val Loss")
plt.xlabel("Epoch â„–")
plt.ylabel("Loss")
plt.plot(N, H.history["loss"], label="loss")
plt.plot(N, H.history["val_loss"], label="val_loss")
plt.legend()
plt.show()
# create a new figure for the accuracies
plt.style.use("ggplot")
plt.figure(figsize=(17, 10))
plt.plot(N, H.history["accuracy"], label="acc")
plt.plot(N, H.history["val_accuracy"], label="val_acc")
plt.title("Accuracy & Val Accuracy")
plt.xlabel("Epoch â„–")
plt.ylabel("Accuracy")
plt.legend()
Let's load filenames of test images.
path = "testing_multiclass.txt"
filenames = open(path).read().strip().split("\n")
imagePaths = []
for f in filenames:
imagePaths.append(f)
Let's load the VGG16 model and label binarizer.
model = load_model("./model.h5")
lb = pickle.loads(open("./lb.pickle", "rb").read())
Let's predict class of test images.
# counter for viewing images
cntr = 0
for imagePath in imagePaths:
# load the input image
image = load_img(imagePath, target_size=(224, 224))
image = img_to_array(image) / 255.0
image = np.expand_dims(image, axis=0)
# predict classes
(labelPreds) = model.predict(image)
# determine the class label
# with the largest predicted
# probability
i = np.argmax(labelPreds, axis=1)
label = lb.classes_[i][0]
# load the input image (in OpenCV format)
image = cv2.imread(imagePath)
image = imutils.resize(image, width=600)
(h, w) = image.shape[:2]
# show the output image
print("class label = ", label)
imgplot = plt.imshow(cv2.cvtColor(image,
cv2.COLOR_BGR2RGB).astype('uint8'))
plt.show()
# increment counter
cntr += 1
# view only first 10
# test images
if (cntr > 10):
break
As you can see, our pre-trained model makes predictions quite correctly!