Notebook

In [6]:

import os
from os.path import join
import argparse
import subprocess
import cv2
from tqdm import tqdm


DATASET_PATHS = {
    'original': 'original_sequences',
    'Deepfakes': 'manipulated_sequences/Deepfakes',
    'Face2Face': 'manipulated_sequences/Face2Face',
    'FaceSwap': 'manipulated_sequences/FaceSwap',
    'DeepFakeDetection': 'manipulated_sequences/DeepFakeDetection',
    'DeepFakeDetection_original': 'original_sequences/actors'
}
COMPRESSION = ['c0', 'c23', 'c40']

In [7]:

# mount to drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

In [9]:

# change working directory
os.chdir('/content/drive/MyDrive/AM205_FinalProject/')

In [10]:

def extract_frames(data_path, output_path, method='cv2', interval=100):
    """Method to extract frames, either with ffmpeg or opencv. FFmpeg won't
    start from 0 so we would have to rename if we want to keep the filenames
    coherent."""
    os.makedirs(output_path, exist_ok=True)
    if method == 'ffmpeg':
        subprocess.check_output(
            'ffmpeg -i {} {}'.format(
                data_path, join(output_path, '%04d.png')),
            shell=True, stderr=subprocess.STDOUT)
    elif method == 'cv2':
        reader = cv2.VideoCapture(data_path)
        frame_num = 0
        i = 0
        while reader.isOpened():
            success, image = reader.read()
            if not success:
                break
            if i % interval == 0:
              cv2.imwrite(join(output_path, '{:04d}.png'.format(frame_num)), image)
              frame_num += 1
            i = i + 1
        reader.release()
    else:
        raise Exception('Wrong extract frames method: {}'.format(method))


def extract_method_videos(data_path, dataset, compression):
    """Extracts all videos of a specified method and compression in the
    FaceForensics++ file structure"""
    videos_path = join(data_path, DATASET_PATHS[dataset], compression, 'videos')
    images_path = join(data_path, DATASET_PATHS[dataset], compression, 'images')
    for video in tqdm(os.listdir(videos_path)):
        image_folder = video.split('.')[0]
        extract_frames(join(videos_path, video),
                       join(images_path, image_folder))

In [ ]:

from argparse import Namespace
p = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter )
p.add_argument('--data_path', type=str)
p.add_argument('--dataset', '-d', type=str,
                choices=list(DATASET_PATHS.keys()) + ['all'],
                default='all')
p.add_argument('--compression', '-c', type=str, choices=COMPRESSION,
                default='c0')

#args = Namespace(data_path='/content/drive/My Drive/AM205_FinalProject/data_videos', dataset='DeepFakeDetection_original', compression='c40')
args = Namespace(data_path='/content/drive/My Drive/AM205_FinalProject/data_videos', dataset='DeepFakeDetection', compression='c40')

if args.dataset == 'all':
    for dataset in DATASET_PATHS.keys():
        args.dataset = dataset
        extract_method_videos(**vars(args))
else:
    extract_method_videos(**vars(args))

 85%|████████▍ | 2606/3068 [5:42:00<1:00:04,  7.80s/it]

In [ ]: