We need an object detector to detect the object so we can use the position in object tracking.
We use a pre-trained Yolov3 on MS coco to save time. You can select your own detector, just get the position.
Be sure to restart the runtime if instructed
!pip3 install imageai
!pip3 install keras
!pip3 install tensorflow
If all links below expired, please download the pre-trained model from imageai's official website: https://imageai.readthedocs.io/en/latest/detection/index.html
!wget https://dl.acytoo.com/yolo.h5 # if this site is down, use https://play.acytoo.com/yolo.h5 instead
Define a general detector model: yolov3, trained on ms coco dataset.
Yolo(You Only Look Once) is one stage concolutional neural network for peforming object detection in real time. Compared to two stage CNNs, Yolo is faster and consumes less computing resources, but it's precision is lower than, it's a Speed-Accuracy Trade-Off.
Imageai is a python module that has many pre-trained object detection model.
For detailed information about imageai, please refer to https://imageai.readthedocs.io/en/latest/detection/index.html
from imageai.Detection import ObjectDetection
detector = ObjectDetection()
detector.setModelTypeAsYOLOv3()
detector.setModelPath("yolo.h5")
detector.loadModel()
Yolo trained on ms coco can detect more than 170 kinds of object, we only need ball here, so after perform yolo on the input image, we select the objects that detedted as ball.
Ball detector, input is image, return probility and location
def ball_detector(img, threshold=30):
"""
:param img: image matrix
:param threshold: min probability that one object is detected as ball
:return: [probability], [location]
"""
img_ori_dec, detections = detector.detectObjectsFromImage(input_image=img, input_type = "array",
output_type="array", minimum_percentage_probability=threshold)
prob = []
loc = []
for item in detections:
if item['name'] == 'sports ball':
prob.append(item['percentage_probability'])
loc.append(item['box_points'])
return prob, loc
%matplotlib inline
import matplotlib.pyplot as plt
import cv2
!wget https://dl.acytoo.com/ball.mp4
!wget https://dl.acytoo.com/multiObject.avi
In above detector, our detector result is denoted as left top(x1, y1) and right bottom(x2, y2) vertexes, in order to apply kalman filter, we need to find the centroid of object(the ball). The centroid of ball can be calculated as $$ x_c = x_1 + \frac{(x_2 - x_1)}{2}\\ y_c = y_2 + \frac{(y_1 - y_2)}{2}\\ $$
def get_centroid(loc):
if loc is None:
return None
return int(loc[0] + (loc[2] - loc[0]) / 2), int(loc[3] + (loc[1] - loc[3]) / 2)
import numpy as np
from scipy.optimize import linear_sum_assignment # to assign detectors to objects
class KalmanFilter():
def __init__(self, dt=0.1, u_x=1, u_y=1,
std_acc=1, x_std_meas=0.1, y_std_meas=0.1):
"""
:param dt: sampling time (time for 1 cycle)
:param u_x: acceleration in x-direction
:param u_y: acceleration in y-direction
:param std_acc: process noise magnitude
:param x_std_meas: standard deviation of the measurement in x-direction
:param y_std_meas: standard deviation of the measurement in y-direction
"""
self.dt = dt # Define sampling time
self.u = np.matrix([[u_x],[u_y]]) # Define the control input variables
self.x = np.matrix([[0], [0], [0], [0]]) # Initial State
# Define the State Transition Matrix A
self.A = np.matrix([[1, 0, self.dt, 0],
[0, 1, 0, self.dt],
[0, 0, 1, 0],
[0, 0, 0, 1]])
# Define the Control Input Matrix B
self.B = np.matrix([[(self.dt**2)/2, 0],
[0,(self.dt**2)/2],
[self.dt,0],
[0,self.dt]])
# Define Measurement Mapping Matrix
self.H = np.matrix([[1, 0, 0, 0],
[0, 1, 0, 0]])
#Initial Process Noise Covariance
self.Q = np.matrix([[(self.dt**4)/4, 0, (self.dt**3)/2, 0],
[0, (self.dt**4)/4, 0, (self.dt**3)/2],
[(self.dt**3)/2, 0, self.dt**2, 0],
[0, (self.dt**3)/2, 0, self.dt**2]]) * std_acc**2
#Initial Measurement Noise Covariance
self.R = np.matrix([[x_std_meas**2,0],
[0, y_std_meas**2]])
#Initial Covariance Matrix
self.P = np.eye(self.A.shape[1])
def predict(self):
"""
:return: numpy matrix
"""
# Update time state
self.x = np.dot(self.A, self.x) + np.dot(self.B, self.u)
# Calculate error covariance
self.P = np.dot(np.dot(self.A, self.P), self.A.T) + self.Q
return self.x[0:2]
def update(self, z):
"""
:param z: np.array
:return: numpy matrix
"""
# S = H*P*H'+R
S = np.dot(self.H, np.dot(self.P, self.H.T)) + self.R
# Calculate the Kalman Gain
# K = P * H'* inv(H*P*H'+R)
K = np.dot(np.dot(self.P, self.H.T), np.linalg.inv(S))
self.x = np.round(self.x + np.dot(K, (z - np.dot(self.H, self.x))))
I = np.eye(self.H.shape[1])
# Update error covariance matrix
self.P = (I - (K * self.H)) * self.P
return self.x[0:2]
Green square is measurement, and red dot is prediction
def predict_ball(src, dest):
"""
:param src: source video for detection and predicton
:param dest: output video after processing
detection: gree square box
prediction: red dot
"""
COLOR_RED = (255, 0, 0)
COLOR_GREEN = (0, 255, 0)
############################ init kalman filter ##############################
kalman = KalmanFilter()
# set first point, the first location is VERY important
kalman.update(np.array([[906], [311]], dtype=np.float32))
vid = cv2.VideoCapture(src)
if not vid.isOpened():
print("File open error, please download video")
############################ params for saving video #########################
fps = int(vid.get(cv2.CAP_PROP_FPS)) # fps
height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) # height
width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) # width
size = (width, height)
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
out = cv2.VideoWriter(dest, fourcc, fps, size)
current_measurement = None
current_prediction = (905, 311)
while vid.isOpened(): # read all video frames
ret, frame = vid.read()
if not ret:
break
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # convert bgr to rgb
_, loc = ball_detector(frame)
lpx, lpy = int(current_prediction[0]), int(current_prediction[1])
frame = cv2.circle(frame, (lpx, lpy), 0, COLOR_RED, 20) # plot prediction dot
if len(loc) > 0: # detection -> draw bounding box, update kalman filter
frame = cv2.rectangle(frame, (loc[0][0], loc[0][1]), (loc[0][2], loc[0][3]), COLOR_GREEN, 2)
# update kalman
current_measurement = np.array(get_centroid(loc[0]), dtype=np.float32).reshape(2, 1)
kalman.update(current_measurement)
(x, y) = kalman.predict()
current_prediction = (int(x), int(y))
plt.imshow(frame, interpolation='nearest')
plt.show()
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
out.write(frame)
vid.release() # release video object
out.release()
sin = 'ball.mp4'
sin_out = 'sin_track.avi'
print("processing sin")
predict_ball(sin, sin_out)