fixed merge conflict

2023-10-12 21:17:07 -07:00 · 2023-10-12 21:17:07 -07:00 · 43d36335a6
commit 43d36335a6
parent e35245cd8c
12 changed files with 12124 additions and 12135 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,13 +1,13 @@
-.direnv/*
-venv/*
-output/*
-.envrc
-training_data/data*
-training_data/info*
-training_data/training_data_*/
-training_data/*.vec
-training_data/backgrounds.txt
-training_data/negatives
-training_data/opencv
-validation/cascade*
+.direnv/*
+venv/*
+output/*
+.envrc
+training_data/data*
+training_data/info*
+training_data/training_data_*/
+training_data/*.vec
+training_data/backgrounds.txt
+training_data/negatives
+training_data/opencv
+validation/cascade*
 validation/*.xlsx
--- a/Main.py
+++ b/Main.py
@ -1,153 +1,153 @@
-import cv2
-import argparse
-import time
-import os
-import datetime
-
-def dir_path(string):
-    if os.path.exists(string):
-        return string
-    else:
-        raise NotADirectoryError(string)
-
-def init_argparse() -> argparse.ArgumentParser:
-    parser = argparse.ArgumentParser(
-        prog="FaceDetection",
-        usage="%(prog)s [OPTION]",
-        description="Run face localization"
-    )
-    parser.add_argument(
-        "-v", "--version", action="version", version=f"{parser.prog} version 1.0.1"
-    )
-    parser.add_argument(
-        "-d", "--dashboard", action='store_true', help="Flag to enable live dashboard with statistics - requires terminal width of 90 columns or greater"
-    )
-    parser.add_argument(
-        "-o", "--output", action='store_true', help="show the resultant directions"
-    )
-    parser.add_argument(
-        "-f", "--file", type=dir_path, nargs="?", help="File to scan instead of using the camera. Useful for generating training data"
-    )
-    parser.add_argument(
-        "-s", "--no-screen", action='store_true', help="Do not show the successful frames"
-    )
-    parser.add_argument(
-        "-t", "--training-data", action='store_true', help="When set, saves successful face-location images and coordinates to use for future training data"
-    )
-    parser.add_argument(
-        '--validate', action="store_true", help="if set, outputs frame_count and box coords for located faces for future validation"
-    )
-    return parser
-
-multiplication_factor = 0.05
-
-def get_adjustment_amount(imgSize, currentX, currentY, currentW, currentH):
-
-    current_top_left = [currentX, currentY]
-    current_bottom_right = [currentX + currentW, currentY + currentH]
-
-    current_top_right = [currentX + currentW, currentY]
-
-    # find the difference between the left gap and the right gap, divide it by two, and multiply it by the speed scale
-    horizontal_adjustment = multiplication_factor * (currentX - (imgSize[0] - current_top_right[0])) / 2
-    vertical_adjustment = multiplication_factor * (currentY - (imgSize[0] - current_bottom_right[1])) / 2
-
-    return [horizontal_adjustment, vertical_adjustment]
-
-frames_searched = 1
-faces_found = 0
-start_time = datetime.datetime.now()
-
-def draw_dashboard(keep_stat_line = False):
-    global frames_searched, faces_found, start_time
-
-    elapsed_time = datetime.datetime.now() - start_time
-
-    hours, remainder = divmod(elapsed_time.total_seconds(), 3600)
-    minutes, seconds = divmod(remainder, 60)
-
-    f_found = f"{faces_found} Faces found".ljust(16, ' ')
-    f_searched = f"{frames_searched} Frames searched".ljust(21, ' ')
-    success_rate = f"{round((faces_found / frames_searched) * 100, 1)}% Success rate".ljust(16, ' ')
-
-    if keep_stat_line:
-        print(f"{f_found} | {f_searched} | {success_rate} | {round(hours)}h {round(minutes)}m {round(seconds)}s elapsed", flush=True)
-    else:
-        print(f"{f_found} | {f_searched} | {success_rate} | {round(hours)}h {round(minutes)}m {round(seconds)}s elapsed", end="\r", flush=True)
-
-
-parser = init_argparse()
-args = parser.parse_args()
-
-if args.file:
-    cap = cv2.VideoCapture(args.file)
-else:
-    cap = cv2.VideoCapture(0, cv2.IMREAD_GRAYSCALE) # instead of grayscale you can also use -1, 0, or 1.
-faceCascade = cv2.CascadeClassifier(r"./cascades/cascade_5.xml") # CHECK THIS FIRST TROUBLE SHOOTING
-
-datestamp = "{:%Y_%m_%d %H_%M_%S}".format(datetime.datetime.now())
-output_dir = r"./output/" + datestamp + r"/"
-
-
-if args.training_data: 
-    if not os.path.exists(output_dir):
-        os.makedirs(output_dir)
-    with open(output_dir + r"found_faces.csv", 'a') as fd:
-        fd.write(f"frame_name, x, y, width, height\n")
-
-tmp, frm = cap.read()
-height, width, channels = frm.shape
-if (args.file):
-    print(f"Image is {height} tall and {width} wide")
-frame_count = 0
-start_timestamp = time.strftime("%Y%m%d-%H%M%S")
-# print(f"{height*.25}, {width}")
-del tmp, frm
-#Color is 1, grayscale is 0, and the unchanged is -1
-while(True):
-    ret, frame = cap.read()
-    frames_searched += 1
-    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
- 
-    # Detect faces in the image
-    faces = faceCascade.detectMultiScale(
-    gray,
-    scaleFactor=1.2,
-    minNeighbors=2,
-    # minSize=(70, 90)
-    minSize=(200, 200)
-    )
-
-    # Draw a rectangle around the faces
-    for (x, y, w, h) in faces:
-        if args.training_data: 
-            frame_name = frames_searched
-            with open(output_dir + r"found_faces.csv", 'a') as fd:
-                fd.write(f"frame_{frame_name}.jpg, {x}, {y}, {w}, {h}\n")
-            cv2.imwrite(output_dir + f"frame_{frame_name}.jpg", frame)
-
-        if args.validate:
-            with open(f"./validation/{start_timestamp}-validation.txt", 'a') as output_validation_file:
-                output_validation_file.write(f"{frame_count}, {x}, {y}, {x+w}, {y+h}\n")
-
-        faces_found += 1
-        adjustment_required = get_adjustment_amount([width, height], x, y, w, h)
-        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 255, 255))
-
-        if args.output:
-            print(f"Adjust right: {adjustment_required[0]}".ljust(90, ' '), flush=True)
-            print(f"Adjust up   : {adjustment_required[1]}", flush=True)
-
-    if not args.no_screen: 
-        cv2.imshow('frame', frame)
-
-    if args.dashboard:
-        draw_dashboard()
-
-    if cv2.waitKey(1) & 0xFF == ord('q'):
-        break
-
-    frame_count += 1
-
-draw_dashboard(keep_stat_line=True)
-cap.release()
+import cv2
+import argparse
+import time
+import os
+import datetime
+
+def dir_path(string):
+    if os.path.exists(string):
+        return string
+    else:
+        raise NotADirectoryError(string)
+
+def init_argparse() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="FaceDetection",
+        usage="%(prog)s [OPTION]",
+        description="Run face localization"
+    )
+    parser.add_argument(
+        "-v", "--version", action="version", version=f"{parser.prog} version 1.0.1"
+    )
+    parser.add_argument(
+        "-d", "--dashboard", action='store_true', help="Flag to enable live dashboard with statistics - requires terminal width of 90 columns or greater"
+    )
+    parser.add_argument(
+        "-o", "--output", action='store_true', help="show the resultant directions"
+    )
+    parser.add_argument(
+        "-f", "--file", type=dir_path, nargs="?", help="File to scan instead of using the camera. Useful for generating training data"
+    )
+    parser.add_argument(
+        "-s", "--no-screen", action='store_true', help="Do not show the successful frames"
+    )
+    parser.add_argument(
+        "-t", "--training-data", action='store_true', help="When set, saves successful face-location images and coordinates to use for future training data"
+    )
+    parser.add_argument(
+        '--validate', action="store_true", help="if set, outputs frame_count and box coords for located faces for future validation"
+    )
+    return parser
+
+multiplication_factor = 0.05
+
+def get_adjustment_amount(imgSize, currentX, currentY, currentW, currentH):
+
+    current_top_left = [currentX, currentY]
+    current_bottom_right = [currentX + currentW, currentY + currentH]
+
+    current_top_right = [currentX + currentW, currentY]
+
+    # find the difference between the left gap and the right gap, divide it by two, and multiply it by the speed scale
+    horizontal_adjustment = multiplication_factor * (currentX - (imgSize[0] - current_top_right[0])) / 2
+    vertical_adjustment = multiplication_factor * (currentY - (imgSize[0] - current_bottom_right[1])) / 2
+
+    return [horizontal_adjustment, vertical_adjustment]
+
+frames_searched = 1
+faces_found = 0
+start_time = datetime.datetime.now()
+
+def draw_dashboard(keep_stat_line = False):
+    global frames_searched, faces_found, start_time
+
+    elapsed_time = datetime.datetime.now() - start_time
+
+    hours, remainder = divmod(elapsed_time.total_seconds(), 3600)
+    minutes, seconds = divmod(remainder, 60)
+
+    f_found = f"{faces_found} Faces found".ljust(16, ' ')
+    f_searched = f"{frames_searched} Frames searched".ljust(21, ' ')
+    success_rate = f"{round((faces_found / frames_searched) * 100, 1)}% Success rate".ljust(16, ' ')
+
+    if keep_stat_line:
+        print(f"{f_found} | {f_searched} | {success_rate} | {round(hours)}h {round(minutes)}m {round(seconds)}s elapsed", flush=True)
+    else:
+        print(f"{f_found} | {f_searched} | {success_rate} | {round(hours)}h {round(minutes)}m {round(seconds)}s elapsed", end="\r", flush=True)
+
+
+parser = init_argparse()
+args = parser.parse_args()
+
+if args.file:
+    cap = cv2.VideoCapture(args.file)
+else:
+    cap = cv2.VideoCapture(0, cv2.IMREAD_GRAYSCALE) # instead of grayscale you can also use -1, 0, or 1.
+faceCascade = cv2.CascadeClassifier(r"./cascades/cascade_10.xml") # CHECK THIS FIRST TROUBLE SHOOTING
+
+datestamp = "{:%Y_%m_%d %H_%M_%S}".format(datetime.datetime.now())
+output_dir = r"./output/" + datestamp + r"/"
+
+
+if args.training_data: 
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    with open(output_dir + r"found_faces.csv", 'a') as fd:
+        fd.write(f"frame_name, x, y, width, height\n")
+
+tmp, frm = cap.read()
+height, width, channels = frm.shape
+if (args.file):
+    print(f"Image is {height} tall and {width} wide")
+frame_count = 0
+start_timestamp = time.strftime("%Y%m%d-%H%M%S")
+# print(f"{height*.25}, {width}")
+del tmp, frm
+#Color is 1, grayscale is 0, and the unchanged is -1
+while(True):
+    ret, frame = cap.read()
+    frames_searched += 1
+    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+ 
+    # Detect faces in the image
+    faces = faceCascade.detectMultiScale(
+    gray,
+    scaleFactor=1.2,
+    minNeighbors=2,
+    # minSize=(70, 90)
+    minSize=(200, 200)
+    )
+
+    # Draw a rectangle around the faces
+    for (x, y, w, h) in faces:
+        if args.training_data: 
+            frame_name = frames_searched
+            with open(output_dir + r"found_faces.csv", 'a') as fd:
+                fd.write(f"frame_{frame_name}.jpg, {x}, {y}, {w}, {h}\n")
+            cv2.imwrite(output_dir + f"frame_{frame_name}.jpg", frame)
+
+        if args.validate:
+            with open(f"./validation/{start_timestamp}-validation.txt", 'a') as output_validation_file:
+                output_validation_file.write(f"{frame_count}, {x}, {y}, {x+w}, {y+h}\n")
+
+        faces_found += 1
+        adjustment_required = get_adjustment_amount([width, height], x, y, w, h)
+        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 255, 255))
+
+        if args.output:
+            print(f"Adjust right: {adjustment_required[0]}".ljust(90, ' '), flush=True)
+            print(f"Adjust up   : {adjustment_required[1]}", flush=True)
+
+    if not args.no_screen: 
+        cv2.imshow('frame', frame)
+
+    if args.dashboard:
+        draw_dashboard()
+
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break
+
+    frame_count += 1
+
+draw_dashboard(keep_stat_line=True)
+cap.release()
--- a/cascades/cascade_1.xml
+++ b/cascades/cascade_1.xml
--- a/cascades/cascade_10.xml
+++ b/cascades/cascade_10.xml
--- a/cascades/cascade_2.xml
+++ b/cascades/cascade_2.xml
--- a/cascades/cascade_5.xml
+++ b/cascades/cascade_5.xml
--- a/quickstart.txt
+++ b/quickstart.txt
@ -1,25 +0,0 @@
-NixOS/Nix: 
-If you run on a Nix or NixOS environment, you can use the included shell.nix file to create a nix shell to run this in.
-
-Windows/Other Linux:
-This program was developed with python 3.11, so please use that version of python to create the virtual environment. After making sure you are using the correct python version, run the following commands:
-
-python -m venv venv
-
-to create a new virtual environment ".\venv" 
-
-now enter the the virtual environment by running either .\venv\Scripts\Activate.ps1 or ./venv/Scripts/activate depending on if you use windows and install the following packages (found in requirements.txt)
-
-pip install numpy
-pip install opencv-python
-
-Now you can run the program. It is recommended to run the program with -d and -o set while testing. This enables the dashboard which shows live statistics, and output, which shows the calculated adjustments required to center the face in the frame.
-
-
-Training Data:
-https://www.kaggle.com/datasets/utkarshsaxenadn/landscape-recognition-image-dataset-12k-images
-
-
-create positives from the negatives: \opencv\build\x64\vc15\bin\opencv_createsamples.exe -img .\positives\face_1.png -bg .\bg.txt -info info/info.lst -pngoutput info -maxxangle 0.8 -maxyangle 0.8 -maxzangle 0.8 -num 1950
-Create vec files from positives: .\opencv\build\x64\vc15\bin\opencv_createsamples.exe -info .\info\info.lst -num 1950 -w 80 -h 80 -vec positives-80.vec
-(I created a 20, 40, and 80) we have 1650 positives
--- a/requirements.txt
+++ b/requirements.txt
@ -1,2 +1,2 @@
-numpy
+numpy
 opencv-python
--- a/training_data/training_data_setup.py
+++ b/training_data/training_data_setup.py
@ -1,120 +1,120 @@
-from PIL import Image
-import os
-import subprocess
-import shutil
-
-backgrounds_file_path = "backgrounds.txt"
-info_base_path = r"./info"
-negatives_path = r"./negatives"
-positives_path = r"./positives"
-training_data_base = r"./training_data_"
-
-opencv_path = r".\opencv\build\x64\vc15\bin\opencv_createsamples.exe"
-
-set_sizes = [1, 2, 5, 10]
-
-max_xangle = 0.5
-max_yangle = 0.5
-max_zangle = 0.5
-
-w, h = 25, 18
-
-class InfoEntry:
-    info_lst_line: str 
-    image_path: str
-
-    def __init__(self, info_line, file_path):
-        self.info_lst_line = info_line 
-        self.image_path = file_path
-
-    def __str__(self):
-        return f"Image Entry: {self.info_lst_line}, {self.image_path}"
-
-
-max_x = 750
-max_y = 800
-
-# remove too small images
-for image in os.listdir("./negatives"):
-    im = Image.open(f"./negatives/{image}")
-    width, height = im.size
-    del im
-    if width <= max_x:
-        os.remove(f"./negatives/{image}")
-    elif height <= max_y:
-        os.remove(f"./negatives/{image}")
-
-# remove any existing file and assume old data
-if os.path.exists(backgrounds_file_path):
-    os.remove(backgrounds_file_path)
-
-# regenerate the available negatives list
-count_negatives = len(os.listdir(negatives_path))
-for img in os.listdir(negatives_path):
-    line = f"{negatives_path}/" + img + "\n"
-    with open(backgrounds_file_path, 'a') as f:
-        f.write(line)
-
-info_dirs = []
-
-if len(os.listdir(positives_path)) > max(set_sizes):
-    print("Your set sizes were larger than the available positive images!")
-    quit(2)
-
-for img in os.listdir(positives_path):
-    i = len(info_dirs)
-    info_dir = f"{info_base_path}{i}"
-
-    com = f"{opencv_path} -img positives/" + str(i) + ".png -bg backgrounds.txt -info " + info_dir + "/info.lst" + \
-    " -pngoutput " + info_dir + " -maxxangle " + str(max_xangle) + " -maxyangle " + str(max_yangle) + " -maxzangle " + str(max_zangle) + \
-    " -num " + str(count_negatives)
-
-    if not os.path.exists(info_dir):
-        subprocess.call(com, shell=True)
-
-    info_dirs.append(info_dir)
-
-for i in set_sizes:
-    if not os.path.exists(training_data_base + str(i)):
-        os.makedirs(training_data_base + str(i))
-
-def join_info_folders(info_dirs: list, output_dir: str):
-    info_dir: str
-    cur_entry_name = 0
-    for info_dir in info_dirs:
-        info_lines = []
-        with open(info_dir + "/info.lst", 'r') as info_file:
-            for line in info_file.readlines():
-                image_path = f"{info_dir}/{line.split(' ')[0]}"
-                info_lines.append(InfoEntry(line.strip(), image_path))
-
-        item: InfoEntry
-        for item in info_lines:
-            shutil.copy(item.image_path, f"{output_dir}/{str(cur_entry_name)}.jpg")
-            with open(f"{output_dir}/info.lst", 'a') as info_file:
-                to_write = []
-                to_write.append(str(cur_entry_name) + ".jpg")
-                to_write = to_write + item.info_lst_line.split(" ")[1:]
-                to_write.append("\n")
-                info_file.write(" ".join(to_write))
-            cur_entry_name += 1
-
-for i in set_sizes:
-    join_info_folders(info_dirs[:i], training_data_base + str(i)) 
-
-commands = []
-
-for i in set_sizes:
-    num_positives = len(os.listdir(training_data_base + str(i)))
-    if os.path.exists(training_data_base + str(i) + ".vec"):
-        os.remove(training_data_base + str(i) + ".vec")
-    com = f"{opencv_path} -info {training_data_base + str(i)}\info.lst -num {num_positives} -w {w} -h {h} -vec {training_data_base + str(i)}.vec"
-    subprocess.call(com, shell=True)
-    commands.append(f".\opencv\\build\\x64\\vc15\\bin\opencv_traincascade.exe -data data_{str(i)} -vec .\\{training_data_base + str(i)}.vec -bg .\\{backgrounds_file_path} -numPos {num_positives} -numNeg {num_positives / 2} -numStages 15 -w {w} -h {h}")
-
-    if not os.path.exists(".\data_" + str(i)):
-        os.makedirs(".\data_" + str(i))
-
-for i in commands:
-    print(f"You are ready to train the models with: \n {i}")
-
+from PIL import Image
+import os
+import subprocess
+import shutil
+
+backgrounds_file_path = "backgrounds.txt"
+info_base_path = r"./info"
+negatives_path = r"./negatives"
+positives_path = r"./positives"
+training_data_base = r"./training_data_"
+
+opencv_path = r".\opencv\build\x64\vc15\bin\opencv_createsamples.exe"
+
+set_sizes = [1, 2, 5, 10]
+
+max_xangle = 0.5
+max_yangle = 0.5
+max_zangle = 0.5
+
+w, h = 25, 18
+
+class InfoEntry:
+    info_lst_line: str 
+    image_path: str
+
+    def __init__(self, info_line, file_path):
+        self.info_lst_line = info_line 
+        self.image_path = file_path
+
+    def __str__(self):
+        return f"Image Entry: {self.info_lst_line}, {self.image_path}"
+
+
+max_x = 750
+max_y = 800
+
+# remove too small images
+for image in os.listdir("./negatives"):
+    im = Image.open(f"./negatives/{image}")
+    width, height = im.size
+    del im
+    if width <= max_x:
+        os.remove(f"./negatives/{image}")
+    elif height <= max_y:
+        os.remove(f"./negatives/{image}")
+
+# remove any existing file and assume old data
+if os.path.exists(backgrounds_file_path):
+    os.remove(backgrounds_file_path)
+
+# regenerate the available negatives list
+count_negatives = len(os.listdir(negatives_path))
+for img in os.listdir(negatives_path):
+    line = f"{negatives_path}/" + img + "\n"
+    with open(backgrounds_file_path, 'a') as f:
+        f.write(line)
+
+info_dirs = []
+
+if len(os.listdir(positives_path)) > max(set_sizes):
+    print("Your set sizes were larger than the available positive images!")
+    quit(2)
+
+for img in os.listdir(positives_path):
+    i = len(info_dirs)
+    info_dir = f"{info_base_path}{i}"
+
+    com = f"{opencv_path} -img positives/" + str(i) + ".png -bg backgrounds.txt -info " + info_dir + "/info.lst" + \
+    " -pngoutput " + info_dir + " -maxxangle " + str(max_xangle) + " -maxyangle " + str(max_yangle) + " -maxzangle " + str(max_zangle) + \
+    " -num " + str(count_negatives)
+
+    if not os.path.exists(info_dir):
+        subprocess.call(com, shell=True)
+
+    info_dirs.append(info_dir)
+
+for i in set_sizes:
+    if not os.path.exists(training_data_base + str(i)):
+        os.makedirs(training_data_base + str(i))
+
+def join_info_folders(info_dirs: list, output_dir: str):
+    info_dir: str
+    cur_entry_name = 0
+    for info_dir in info_dirs:
+        info_lines = []
+        with open(info_dir + "/info.lst", 'r') as info_file:
+            for line in info_file.readlines():
+                image_path = f"{info_dir}/{line.split(' ')[0]}"
+                info_lines.append(InfoEntry(line.strip(), image_path))
+
+        item: InfoEntry
+        for item in info_lines:
+            shutil.copy(item.image_path, f"{output_dir}/{str(cur_entry_name)}.jpg")
+            with open(f"{output_dir}/info.lst", 'a') as info_file:
+                to_write = []
+                to_write.append(str(cur_entry_name) + ".jpg")
+                to_write = to_write + item.info_lst_line.split(" ")[1:]
+                to_write.append("\n")
+                info_file.write(" ".join(to_write))
+            cur_entry_name += 1
+
+for i in set_sizes:
+    join_info_folders(info_dirs[:i], training_data_base + str(i)) 
+
+commands = []
+
+for i in set_sizes:
+    num_positives = len(os.listdir(training_data_base + str(i)))
+    if os.path.exists(training_data_base + str(i) + ".vec"):
+        os.remove(training_data_base + str(i) + ".vec")
+    com = f"{opencv_path} -info {training_data_base + str(i)}\info.lst -num {num_positives} -w {w} -h {h} -vec {training_data_base + str(i)}.vec"
+    subprocess.call(com, shell=True)
+    commands.append(f".\opencv\\build\\x64\\vc15\\bin\opencv_traincascade.exe -data data_{str(i)} -vec .\\{training_data_base + str(i)}.vec -bg .\\{backgrounds_file_path} -numPos {num_positives} -numNeg {num_positives / 2} -numStages 15 -w {w} -h {h}")
+
+    if not os.path.exists(".\data_" + str(i)):
+        os.makedirs(".\data_" + str(i))
+
+for i in commands:
+    print(f"You are ready to train the models with: \n {i}")
+
--- a/validation/compare_to_gt.py
+++ b/validation/compare_to_gt.py
@ -1,97 +1,111 @@
-import argparse
-import os
-
-def dir_path(string):
-    if os.path.exists(string):
-        return string
-    else:
-        raise NotADirectoryError(string)
-
-def init_argparse() -> argparse.ArgumentParser:
-    parser = argparse.ArgumentParser(
-        prog="FaceDetection",
-        usage="%(prog)s [OPTION]",
-        description="Run face localization"
-    )
-    parser.add_argument(
-        "ground_truth", type=dir_path, help="ground truth file"
-    )
-    parser.add_argument(
-        "test_file", type=dir_path, help="file to compare to the ground truth"
-    )
-    parser.add_argument(
-        "out_file", help="the file to write the output to"
-    )
-    return parser
-
-multiplication_factor = 0.05
-def get_adjustment_amount(imgSize, x1, y1, x2, y2):
-
-    # find the difference between the left gap and the right gap, divide it by two, and multiply it by the speed scale
-    horizontal_adjustment = multiplication_factor * (x1 - (imgSize[0] - x2)) / 2
-    vertical_adjustment = multiplication_factor * (y1 - (imgSize[0] - y2)) / 2
-
-    return [horizontal_adjustment, vertical_adjustment]
-
-parser = init_argparse()
-args = parser.parse_args()
-
-class FrameBox:
-    frame_number: int 
-    top_left: tuple 
-    bottom_right: tuple
-
-    def __init__(self, frame_number, x1, y1, x2, y2):
-        self.frame_number = frame_number
-        self.top_left = (x1, y1)
-        self.bottom_right = (x2, y2)
-
-ground_truth = []
-with open(args.ground_truth, 'r') as gt_file:
-    lines = gt_file.readlines()
-    for line in lines:
-        items = line.split(",")
-        ground_truth.append(FrameBox(int(items[0].strip()), int(items[1].strip()), int(items[2].strip()), int(items[3].strip()), int(items[4].strip())))
-
-test_results = []
-with open(args.test_file, 'r') as test_file:
-    lines = test_file.readlines()
-    for line in lines:
-        items = line.split(",")
-        test_results.append(FrameBox(int(items[0].strip()), int(items[1].strip()), int(items[2].strip()), int(items[3].strip()), int(items[4].strip())))
-
-test_fb: FrameBox
-gt_frame: FrameBox
-last_frame_num = 0
-average_sum = 0
-average_count = 0
-print("length of test file: " + str(len(test_results)))
-for test_fb in test_results:
-    # make
-    def bring_up_gt():
-        if test_fb.frame_number > ground_truth[0].frame_number:
-            ground_truth.pop(0)
-            bring_up_gt()
-
-    bring_up_gt()
-
-    assert(ground_truth[0].frame_number == test_fb.frame_number)
-    gt_frame = ground_truth[0]
-
-    gt_adjustment = get_adjustment_amount((1920, 1000), gt_frame.top_left[0], gt_frame.top_left[1], gt_frame.bottom_right[0], gt_frame.bottom_right[1])
-    test_adjustment = get_adjustment_amount((1920, 1000), test_fb.top_left[0], test_fb.top_left[1], test_fb.bottom_right[0], test_fb.bottom_right[1])
-
-    if last_frame_num != test_fb.frame_number and average_count != 0:
-        with open(args.out_file, 'a') as out_file:
-            out_file.write(f"{average_sum},\n")
-        average_sum = 0
-        average_count = 0
-
-    average_count += 1
-    # get the average 
-    average_sum += abs(( (gt_adjustment[0] - test_adjustment[0]) + (gt_adjustment[1] - test_adjustment[1]) ) / 2)
-    
-    last_frame_num = test_fb.frame_number
-    
-
+import argparse
+import os
+
+def dir_path(string):
+    if os.path.exists(string):
+        return string
+    else:
+        raise NotADirectoryError(string)
+
+def init_argparse() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="FaceDetection",
+        usage="%(prog)s [OPTION]",
+        description="Run face localization"
+    )
+    parser.add_argument(
+        "ground_truth", type=dir_path, help="ground truth file"
+    )
+    parser.add_argument(
+        "test_file", type=dir_path, help="file to compare to the ground truth"
+    )
+    parser.add_argument(
+        "out_file", help="the file to write the output to"
+    )
+    parser.add_argument(
+        "-f", "--faces_count_file", help="the file output the number of faces found in each frame"
+    )
+    return parser
+
+multiplication_factor = 0.05
+def get_adjustment_amount(imgSize, x1, y1, x2, y2):
+
+    # find the difference between the left gap and the right gap, divide it by two, and multiply it by the speed scale
+    horizontal_adjustment = multiplication_factor * (x1 - (imgSize[0] - x2)) / 2
+    vertical_adjustment = multiplication_factor * (y1 - (imgSize[0] - y2)) / 2
+
+    return [horizontal_adjustment, vertical_adjustment]
+
+parser = init_argparse()
+args = parser.parse_args()
+
+class FrameBox:
+    frame_number: int 
+    top_left: tuple 
+    bottom_right: tuple
+
+    def __init__(self, frame_number, x1, y1, x2, y2):
+        self.frame_number = frame_number
+        self.top_left = (x1, y1)
+        self.bottom_right = (x2, y2)
+
+ground_truth = []
+with open(args.ground_truth, 'r') as gt_file:
+    lines = gt_file.readlines()
+    for line in lines:
+        items = line.split(",")
+        ground_truth.append(FrameBox(int(items[0].strip()), int(items[1].strip()), int(items[2].strip()), int(items[3].strip()), int(items[4].strip())))
+
+test_results = []
+with open(args.test_file, 'r') as test_file:
+    lines = test_file.readlines()
+    for line in lines:
+        items = line.split(",")
+        test_results.append(FrameBox(int(items[0].strip()), int(items[1].strip()), int(items[2].strip()), int(items[3].strip()), int(items[4].strip())))
+
+test_fb: FrameBox
+gt_frame: FrameBox
+last_frame_num = 0
+average_sum = 0
+average_count = 0
+print("length of test file: " + str(len(test_results)))
+for test_fb in test_results:
+    # make
+    def bring_up_gt():
+        if test_fb.frame_number > ground_truth[0].frame_number:
+            ground_truth.pop(0)
+
+            if test_fb.frame_number > ground_truth[0].frame_number:
+                # we need to include the empty frames too
+                if args.faces_count_file:
+                    with open(args.faces_count_file, 'a') as out_file:
+                        out_file.write(f"{ground_truth[0].frame_number}, 0,\n")
+
+            bring_up_gt()
+
+    bring_up_gt()
+
+    assert(ground_truth[0].frame_number == test_fb.frame_number)
+    gt_frame = ground_truth[0]
+
+    gt_adjustment = get_adjustment_amount((1920, 1000), gt_frame.top_left[0], gt_frame.top_left[1], gt_frame.bottom_right[0], gt_frame.bottom_right[1])
+    test_adjustment = get_adjustment_amount((1920, 1000), test_fb.top_left[0], test_fb.top_left[1], test_fb.bottom_right[0], test_fb.bottom_right[1])
+
+    if last_frame_num != test_fb.frame_number :
+        if average_count > 0:
+            with open(args.out_file, 'a') as out_file:
+                out_file.write(f"{average_sum},\n")
+        if args.faces_count_file:
+            with open(args.faces_count_file, 'a') as out_file:
+                out_file.write(f"{test_fb.frame_number}, {average_count},\n")
+        average_sum = 0
+        average_count = 0
+
+    average_count += 1
+    # get the average 
+    average_sum += abs(( (gt_adjustment[0] - test_adjustment[0]) + (gt_adjustment[1] - test_adjustment[1]) ) / 2)
+    
+    last_frame_num = test_fb.frame_number
+    
+
    
--- a/validation/create_ground_truth.py
+++ b/validation/create_ground_truth.py
@ -1,66 +1,66 @@
-import cv2
-import sys
- 
-tracker = cv2.TrackerMIL_create()
-
-# Read video
-video = cv2.VideoCapture("./validation/TestVideo.mp4")
-
-# Exit if video not opened.
-if not video.isOpened():
-    print("Could not open video")
-    sys.exit()
-
-# Read first frame.
-ok, frame = video.read()
-if not ok:
-    print('Cannot read video file')
-    sys.exit()
-    
-# Define an initial bounding box
-bbox = (857, 189, 346, 434)
-
-# Initialize tracker with first frame and bounding box
-ok = tracker.init(frame, bbox)
-
-frame_count = 0
-while True:
-    # Read a new frame
-    ok, frame = video.read()
-    if not ok:
-        break
-        
-    # Start timer
-    timer = cv2.getTickCount()
-
-    # Update tracker
-    ok, bbox = tracker.update(frame)
-
-    # Calculate Frames per second (FPS)
-    fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer);
-
-    # Draw bounding box
-    if ok:
-        # Tracking success
-        p1 = (int(bbox[0]), int(bbox[1]))
-        p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
-
-        with open("./ground_truth.txt", 'a') as output_file:
-            output_file.write(f"{frame_count}, {p1[0]}, {p1[1]}, {p2[0]}, {p2[1]}\n")
-
-        cv2.rectangle(frame, p1, p2, (255,0,0), 2, 1)
-    else :
-        # Tracking failure
-        cv2.putText(frame, "Tracking failure detected", (100,80), cv2.FONT_HERSHEY_SIMPLEX, 0.75,(0,0,255),2)
-    
-    # Display FPS on frame
-    cv2.putText(frame, "FPS : " + str(int(fps)), (100,50), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (50,170,50), 2)
-
-    # Display result
-    cv2.imshow("Tracking", frame)
-
-    frame_count += 1
-
-    # Exit if ESC pressed
-    k = cv2.waitKey(1) & 0xff
+import cv2
+import sys
+ 
+tracker = cv2.TrackerMIL_create()
+
+# Read video
+video = cv2.VideoCapture("./validation/TestVideo.mp4")
+
+# Exit if video not opened.
+if not video.isOpened():
+    print("Could not open video")
+    sys.exit()
+
+# Read first frame.
+ok, frame = video.read()
+if not ok:
+    print('Cannot read video file')
+    sys.exit()
+    
+# Define an initial bounding box
+bbox = (857, 189, 346, 434)
+
+# Initialize tracker with first frame and bounding box
+ok = tracker.init(frame, bbox)
+
+frame_count = 0
+while True:
+    # Read a new frame
+    ok, frame = video.read()
+    if not ok:
+        break
+        
+    # Start timer
+    timer = cv2.getTickCount()
+
+    # Update tracker
+    ok, bbox = tracker.update(frame)
+
+    # Calculate Frames per second (FPS)
+    fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer);
+
+    # Draw bounding box
+    if ok:
+        # Tracking success
+        p1 = (int(bbox[0]), int(bbox[1]))
+        p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
+
+        with open("./ground_truth.txt", 'a') as output_file:
+            output_file.write(f"{frame_count}, {p1[0]}, {p1[1]}, {p2[0]}, {p2[1]}\n")
+
+        cv2.rectangle(frame, p1, p2, (255,0,0), 2, 1)
+    else :
+        # Tracking failure
+        cv2.putText(frame, "Tracking failure detected", (100,80), cv2.FONT_HERSHEY_SIMPLEX, 0.75,(0,0,255),2)
+    
+    # Display FPS on frame
+    cv2.putText(frame, "FPS : " + str(int(fps)), (100,50), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (50,170,50), 2)
+
+    # Display result
+    cv2.imshow("Tracking", frame)
+
+    frame_count += 1
+
+    # Exit if ESC pressed
+    k = cv2.waitKey(1) & 0xff
    if k == 27 : break
--- a/validation/ground_truth.txt
+++ b/validation/ground_truth.txt