به نام خدا

استخراج ویژگی با یک شبکه عمیق از قبل آموزش داده شده

In [1]:
import keras
import cv2
import os
from keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input

conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(224, 224, 3))
Using TensorFlow backend.
In [2]:
conv_base.summary()
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         
=================================================================
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________
In [3]:
base_dir = os.path.expanduser("./hand_dataset")
os.makedirs(base_dir, exist_ok=True)
base_dir
Out[3]:
'./hand_dataset'
In [6]:
cls ="2"
c = 0
path = os.path.join(base_dir, cls)
os.makedirs(path, exist_ok=True)

cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    frame = cv2.resize(frame, (224, 224))
    cv2.imshow('Webcam', frame)
    key = cv2.waitKey(1)
    if  key == 13: #13 is the Enter Key
        break
    elif key == 32: #32 is the Space Key
        c += 1
        image_path = os.path.join(path, str(c)+".png")
        print("save to: ", image_path)
        cv2.imwrite(image_path, frame)

cap.release()
cv2.destroyAllWindows()     
save to:  ./hand_dataset\2\1.png
save to:  ./hand_dataset\2\2.png
save to:  ./hand_dataset\2\3.png
save to:  ./hand_dataset\2\4.png
save to:  ./hand_dataset\2\5.png
save to:  ./hand_dataset\2\6.png
save to:  ./hand_dataset\2\7.png
save to:  ./hand_dataset\2\8.png
save to:  ./hand_dataset\2\9.png
save to:  ./hand_dataset\2\10.png
save to:  ./hand_dataset\2\11.png
save to:  ./hand_dataset\2\12.png
save to:  ./hand_dataset\2\13.png
save to:  ./hand_dataset\2\14.png
save to:  ./hand_dataset\2\15.png
save to:  ./hand_dataset\2\16.png
save to:  ./hand_dataset\2\17.png
save to:  ./hand_dataset\2\18.png
save to:  ./hand_dataset\2\19.png
save to:  ./hand_dataset\2\20.png
save to:  ./hand_dataset\2\21.png
save to:  ./hand_dataset\2\22.png
save to:  ./hand_dataset\2\23.png
save to:  ./hand_dataset\2\24.png
save to:  ./hand_dataset\2\25.png
save to:  ./hand_dataset\2\26.png
save to:  ./hand_dataset\2\27.png
save to:  ./hand_dataset\2\28.png
save to:  ./hand_dataset\2\29.png
save to:  ./hand_dataset\2\30.png
In [9]:
total = 0
for root, dirs, files in os.walk("./hand_dataset"):
    total += len(files)
print("total files in a directory: ", total)
total files in a directory:  32
In [11]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator


datagen = ImageDataGenerator(rescale=1./255)
batch_size = 25

def extract_features(directory, sample_count):
    features = np.zeros(shape=(sample_count, 7, 7, 512))
    labels = np.zeros(shape=(sample_count))
    generator = datagen.flow_from_directory(
        directory,
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='binary')
    i = 0
    for inputs_batch, labels_batch in generator:
        features_batch = conv_base.predict(inputs_batch)
        features[i * batch_size : (i + 1) * batch_size] = features_batch
        labels[i * batch_size : (i + 1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= sample_count:
            # Note that since generators yield data indefinitely in a loop,
            # we must `break` after every image has been seen once.
            break
    return features, labels, generator.class_indices

train_features, train_labels, dictionary = extract_features(base_dir, total)
Found 32 images belonging to 2 classes.
In [12]:
train_features = np.reshape(train_features, (-1, 7 * 7 * 512))
In [13]:
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(train_features, train_labels) 
Out[13]:
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=3, p=2,
           weights='uniform')
In [14]:
dictionary
Out[14]:
{'5': 0, 'botri': 1}
In [15]:
def get_class_name(l):
    for name, label in dictionary.items():
        if label == l:
            return name
In [16]:
get_class_name(0)
Out[16]:
'5'
In [17]:
import cv2
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    frame = cv2.resize(frame, (224, 224))
    x = np.expand_dims(frame, axis=0)
    x = preprocess_input(x)
    feature = conv_base.predict(x)
    feature = np.reshape(feature, (-1, 7 * 7 * 512))
    predicted = neigh.predict(feature)
    text = get_class_name(predicted[0])
    cv2.putText(frame,  text, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), lineType=cv2.LINE_AA)
    cv2.imshow('Webcam', frame)
    if cv2.waitKey(1) == 13: #13 is the Enter Key
        break
cap.release()
cv2.destroyAllWindows()     
دوره مقدماتی یادگیری عمیق
علیرضا اخوان پور
پنج شنبه، ۲۵ بهمن ۱۳۹۷
Class.Vision - AkhavanPour.ir - GitHub