fixed merge conflict

This commit is contained in:
Nickiel12 2023-10-12 21:17:07 -07:00
parent e35245cd8c
commit 43d36335a6
12 changed files with 12124 additions and 12135 deletions

24
.gitignore vendored
View file

@ -1,13 +1,13 @@
.direnv/*
venv/*
output/*
.envrc
training_data/data*
training_data/info*
training_data/training_data_*/
training_data/*.vec
training_data/backgrounds.txt
training_data/negatives
training_data/opencv
validation/cascade*
.direnv/*
venv/*
output/*
.envrc
training_data/data*
training_data/info*
training_data/training_data_*/
training_data/*.vec
training_data/backgrounds.txt
training_data/negatives
training_data/opencv
validation/cascade*
validation/*.xlsx

306
Main.py
View file

@ -1,153 +1,153 @@
import cv2
import argparse
import time
import os
import datetime
def dir_path(string):
if os.path.exists(string):
return string
else:
raise NotADirectoryError(string)
def init_argparse() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="FaceDetection",
usage="%(prog)s [OPTION]",
description="Run face localization"
)
parser.add_argument(
"-v", "--version", action="version", version=f"{parser.prog} version 1.0.1"
)
parser.add_argument(
"-d", "--dashboard", action='store_true', help="Flag to enable live dashboard with statistics - requires terminal width of 90 columns or greater"
)
parser.add_argument(
"-o", "--output", action='store_true', help="show the resultant directions"
)
parser.add_argument(
"-f", "--file", type=dir_path, nargs="?", help="File to scan instead of using the camera. Useful for generating training data"
)
parser.add_argument(
"-s", "--no-screen", action='store_true', help="Do not show the successful frames"
)
parser.add_argument(
"-t", "--training-data", action='store_true', help="When set, saves successful face-location images and coordinates to use for future training data"
)
parser.add_argument(
'--validate', action="store_true", help="if set, outputs frame_count and box coords for located faces for future validation"
)
return parser
multiplication_factor = 0.05
def get_adjustment_amount(imgSize, currentX, currentY, currentW, currentH):
current_top_left = [currentX, currentY]
current_bottom_right = [currentX + currentW, currentY + currentH]
current_top_right = [currentX + currentW, currentY]
# find the difference between the left gap and the right gap, divide it by two, and multiply it by the speed scale
horizontal_adjustment = multiplication_factor * (currentX - (imgSize[0] - current_top_right[0])) / 2
vertical_adjustment = multiplication_factor * (currentY - (imgSize[0] - current_bottom_right[1])) / 2
return [horizontal_adjustment, vertical_adjustment]
frames_searched = 1
faces_found = 0
start_time = datetime.datetime.now()
def draw_dashboard(keep_stat_line = False):
global frames_searched, faces_found, start_time
elapsed_time = datetime.datetime.now() - start_time
hours, remainder = divmod(elapsed_time.total_seconds(), 3600)
minutes, seconds = divmod(remainder, 60)
f_found = f"{faces_found} Faces found".ljust(16, ' ')
f_searched = f"{frames_searched} Frames searched".ljust(21, ' ')
success_rate = f"{round((faces_found / frames_searched) * 100, 1)}% Success rate".ljust(16, ' ')
if keep_stat_line:
print(f"{f_found} | {f_searched} | {success_rate} | {round(hours)}h {round(minutes)}m {round(seconds)}s elapsed", flush=True)
else:
print(f"{f_found} | {f_searched} | {success_rate} | {round(hours)}h {round(minutes)}m {round(seconds)}s elapsed", end="\r", flush=True)
parser = init_argparse()
args = parser.parse_args()
if args.file:
cap = cv2.VideoCapture(args.file)
else:
cap = cv2.VideoCapture(0, cv2.IMREAD_GRAYSCALE) # instead of grayscale you can also use -1, 0, or 1.
faceCascade = cv2.CascadeClassifier(r"./cascades/cascade_5.xml") # CHECK THIS FIRST TROUBLE SHOOTING
datestamp = "{:%Y_%m_%d %H_%M_%S}".format(datetime.datetime.now())
output_dir = r"./output/" + datestamp + r"/"
if args.training_data:
if not os.path.exists(output_dir):
os.makedirs(output_dir)
with open(output_dir + r"found_faces.csv", 'a') as fd:
fd.write(f"frame_name, x, y, width, height\n")
tmp, frm = cap.read()
height, width, channels = frm.shape
if (args.file):
print(f"Image is {height} tall and {width} wide")
frame_count = 0
start_timestamp = time.strftime("%Y%m%d-%H%M%S")
# print(f"{height*.25}, {width}")
del tmp, frm
#Color is 1, grayscale is 0, and the unchanged is -1
while(True):
ret, frame = cap.read()
frames_searched += 1
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Detect faces in the image
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.2,
minNeighbors=2,
# minSize=(70, 90)
minSize=(200, 200)
)
# Draw a rectangle around the faces
for (x, y, w, h) in faces:
if args.training_data:
frame_name = frames_searched
with open(output_dir + r"found_faces.csv", 'a') as fd:
fd.write(f"frame_{frame_name}.jpg, {x}, {y}, {w}, {h}\n")
cv2.imwrite(output_dir + f"frame_{frame_name}.jpg", frame)
if args.validate:
with open(f"./validation/{start_timestamp}-validation.txt", 'a') as output_validation_file:
output_validation_file.write(f"{frame_count}, {x}, {y}, {x+w}, {y+h}\n")
faces_found += 1
adjustment_required = get_adjustment_amount([width, height], x, y, w, h)
cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 255, 255))
if args.output:
print(f"Adjust right: {adjustment_required[0]}".ljust(90, ' '), flush=True)
print(f"Adjust up : {adjustment_required[1]}", flush=True)
if not args.no_screen:
cv2.imshow('frame', frame)
if args.dashboard:
draw_dashboard()
if cv2.waitKey(1) & 0xFF == ord('q'):
break
frame_count += 1
draw_dashboard(keep_stat_line=True)
cap.release()
import cv2
import argparse
import time
import os
import datetime
def dir_path(string):
if os.path.exists(string):
return string
else:
raise NotADirectoryError(string)
def init_argparse() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="FaceDetection",
usage="%(prog)s [OPTION]",
description="Run face localization"
)
parser.add_argument(
"-v", "--version", action="version", version=f"{parser.prog} version 1.0.1"
)
parser.add_argument(
"-d", "--dashboard", action='store_true', help="Flag to enable live dashboard with statistics - requires terminal width of 90 columns or greater"
)
parser.add_argument(
"-o", "--output", action='store_true', help="show the resultant directions"
)
parser.add_argument(
"-f", "--file", type=dir_path, nargs="?", help="File to scan instead of using the camera. Useful for generating training data"
)
parser.add_argument(
"-s", "--no-screen", action='store_true', help="Do not show the successful frames"
)
parser.add_argument(
"-t", "--training-data", action='store_true', help="When set, saves successful face-location images and coordinates to use for future training data"
)
parser.add_argument(
'--validate', action="store_true", help="if set, outputs frame_count and box coords for located faces for future validation"
)
return parser
multiplication_factor = 0.05
def get_adjustment_amount(imgSize, currentX, currentY, currentW, currentH):
current_top_left = [currentX, currentY]
current_bottom_right = [currentX + currentW, currentY + currentH]
current_top_right = [currentX + currentW, currentY]
# find the difference between the left gap and the right gap, divide it by two, and multiply it by the speed scale
horizontal_adjustment = multiplication_factor * (currentX - (imgSize[0] - current_top_right[0])) / 2
vertical_adjustment = multiplication_factor * (currentY - (imgSize[0] - current_bottom_right[1])) / 2
return [horizontal_adjustment, vertical_adjustment]
frames_searched = 1
faces_found = 0
start_time = datetime.datetime.now()
def draw_dashboard(keep_stat_line = False):
global frames_searched, faces_found, start_time
elapsed_time = datetime.datetime.now() - start_time
hours, remainder = divmod(elapsed_time.total_seconds(), 3600)
minutes, seconds = divmod(remainder, 60)
f_found = f"{faces_found} Faces found".ljust(16, ' ')
f_searched = f"{frames_searched} Frames searched".ljust(21, ' ')
success_rate = f"{round((faces_found / frames_searched) * 100, 1)}% Success rate".ljust(16, ' ')
if keep_stat_line:
print(f"{f_found} | {f_searched} | {success_rate} | {round(hours)}h {round(minutes)}m {round(seconds)}s elapsed", flush=True)
else:
print(f"{f_found} | {f_searched} | {success_rate} | {round(hours)}h {round(minutes)}m {round(seconds)}s elapsed", end="\r", flush=True)
parser = init_argparse()
args = parser.parse_args()
if args.file:
cap = cv2.VideoCapture(args.file)
else:
cap = cv2.VideoCapture(0, cv2.IMREAD_GRAYSCALE) # instead of grayscale you can also use -1, 0, or 1.
faceCascade = cv2.CascadeClassifier(r"./cascades/cascade_10.xml") # CHECK THIS FIRST TROUBLE SHOOTING
datestamp = "{:%Y_%m_%d %H_%M_%S}".format(datetime.datetime.now())
output_dir = r"./output/" + datestamp + r"/"
if args.training_data:
if not os.path.exists(output_dir):
os.makedirs(output_dir)
with open(output_dir + r"found_faces.csv", 'a') as fd:
fd.write(f"frame_name, x, y, width, height\n")
tmp, frm = cap.read()
height, width, channels = frm.shape
if (args.file):
print(f"Image is {height} tall and {width} wide")
frame_count = 0
start_timestamp = time.strftime("%Y%m%d-%H%M%S")
# print(f"{height*.25}, {width}")
del tmp, frm
#Color is 1, grayscale is 0, and the unchanged is -1
while(True):
ret, frame = cap.read()
frames_searched += 1
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Detect faces in the image
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.2,
minNeighbors=2,
# minSize=(70, 90)
minSize=(200, 200)
)
# Draw a rectangle around the faces
for (x, y, w, h) in faces:
if args.training_data:
frame_name = frames_searched
with open(output_dir + r"found_faces.csv", 'a') as fd:
fd.write(f"frame_{frame_name}.jpg, {x}, {y}, {w}, {h}\n")
cv2.imwrite(output_dir + f"frame_{frame_name}.jpg", frame)
if args.validate:
with open(f"./validation/{start_timestamp}-validation.txt", 'a') as output_validation_file:
output_validation_file.write(f"{frame_count}, {x}, {y}, {x+w}, {y+h}\n")
faces_found += 1
adjustment_required = get_adjustment_amount([width, height], x, y, w, h)
cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 255, 255))
if args.output:
print(f"Adjust right: {adjustment_required[0]}".ljust(90, ' '), flush=True)
print(f"Adjust up : {adjustment_required[1]}", flush=True)
if not args.no_screen:
cv2.imshow('frame', frame)
if args.dashboard:
draw_dashboard()
if cv2.waitKey(1) & 0xFF == ord('q'):
break
frame_count += 1
draw_dashboard(keep_stat_line=True)
cap.release()

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,25 +0,0 @@
NixOS/Nix:
If you run on a Nix or NixOS environment, you can use the included shell.nix file to create a nix shell to run this in.
Windows/Other Linux:
This program was developed with python 3.11, so please use that version of python to create the virtual environment. After making sure you are using the correct python version, run the following commands:
python -m venv venv
to create a new virtual environment ".\venv"
now enter the the virtual environment by running either .\venv\Scripts\Activate.ps1 or ./venv/Scripts/activate depending on if you use windows and install the following packages (found in requirements.txt)
pip install numpy
pip install opencv-python
Now you can run the program. It is recommended to run the program with -d and -o set while testing. This enables the dashboard which shows live statistics, and output, which shows the calculated adjustments required to center the face in the frame.
Training Data:
https://www.kaggle.com/datasets/utkarshsaxenadn/landscape-recognition-image-dataset-12k-images
create positives from the negatives: \opencv\build\x64\vc15\bin\opencv_createsamples.exe -img .\positives\face_1.png -bg .\bg.txt -info info/info.lst -pngoutput info -maxxangle 0.8 -maxyangle 0.8 -maxzangle 0.8 -num 1950
Create vec files from positives: .\opencv\build\x64\vc15\bin\opencv_createsamples.exe -info .\info\info.lst -num 1950 -w 80 -h 80 -vec positives-80.vec
(I created a 20, 40, and 80) we have 1650 positives

View file

@ -1,2 +1,2 @@
numpy
numpy
opencv-python

View file

@ -1,120 +1,120 @@
from PIL import Image
import os
import subprocess
import shutil
backgrounds_file_path = "backgrounds.txt"
info_base_path = r"./info"
negatives_path = r"./negatives"
positives_path = r"./positives"
training_data_base = r"./training_data_"
opencv_path = r".\opencv\build\x64\vc15\bin\opencv_createsamples.exe"
set_sizes = [1, 2, 5, 10]
max_xangle = 0.5
max_yangle = 0.5
max_zangle = 0.5
w, h = 25, 18
class InfoEntry:
info_lst_line: str
image_path: str
def __init__(self, info_line, file_path):
self.info_lst_line = info_line
self.image_path = file_path
def __str__(self):
return f"Image Entry: {self.info_lst_line}, {self.image_path}"
max_x = 750
max_y = 800
# remove too small images
for image in os.listdir("./negatives"):
im = Image.open(f"./negatives/{image}")
width, height = im.size
del im
if width <= max_x:
os.remove(f"./negatives/{image}")
elif height <= max_y:
os.remove(f"./negatives/{image}")
# remove any existing file and assume old data
if os.path.exists(backgrounds_file_path):
os.remove(backgrounds_file_path)
# regenerate the available negatives list
count_negatives = len(os.listdir(negatives_path))
for img in os.listdir(negatives_path):
line = f"{negatives_path}/" + img + "\n"
with open(backgrounds_file_path, 'a') as f:
f.write(line)
info_dirs = []
if len(os.listdir(positives_path)) > max(set_sizes):
print("Your set sizes were larger than the available positive images!")
quit(2)
for img in os.listdir(positives_path):
i = len(info_dirs)
info_dir = f"{info_base_path}{i}"
com = f"{opencv_path} -img positives/" + str(i) + ".png -bg backgrounds.txt -info " + info_dir + "/info.lst" + \
" -pngoutput " + info_dir + " -maxxangle " + str(max_xangle) + " -maxyangle " + str(max_yangle) + " -maxzangle " + str(max_zangle) + \
" -num " + str(count_negatives)
if not os.path.exists(info_dir):
subprocess.call(com, shell=True)
info_dirs.append(info_dir)
for i in set_sizes:
if not os.path.exists(training_data_base + str(i)):
os.makedirs(training_data_base + str(i))
def join_info_folders(info_dirs: list, output_dir: str):
info_dir: str
cur_entry_name = 0
for info_dir in info_dirs:
info_lines = []
with open(info_dir + "/info.lst", 'r') as info_file:
for line in info_file.readlines():
image_path = f"{info_dir}/{line.split(' ')[0]}"
info_lines.append(InfoEntry(line.strip(), image_path))
item: InfoEntry
for item in info_lines:
shutil.copy(item.image_path, f"{output_dir}/{str(cur_entry_name)}.jpg")
with open(f"{output_dir}/info.lst", 'a') as info_file:
to_write = []
to_write.append(str(cur_entry_name) + ".jpg")
to_write = to_write + item.info_lst_line.split(" ")[1:]
to_write.append("\n")
info_file.write(" ".join(to_write))
cur_entry_name += 1
for i in set_sizes:
join_info_folders(info_dirs[:i], training_data_base + str(i))
commands = []
for i in set_sizes:
num_positives = len(os.listdir(training_data_base + str(i)))
if os.path.exists(training_data_base + str(i) + ".vec"):
os.remove(training_data_base + str(i) + ".vec")
com = f"{opencv_path} -info {training_data_base + str(i)}\info.lst -num {num_positives} -w {w} -h {h} -vec {training_data_base + str(i)}.vec"
subprocess.call(com, shell=True)
commands.append(f".\opencv\\build\\x64\\vc15\\bin\opencv_traincascade.exe -data data_{str(i)} -vec .\\{training_data_base + str(i)}.vec -bg .\\{backgrounds_file_path} -numPos {num_positives} -numNeg {num_positives / 2} -numStages 15 -w {w} -h {h}")
if not os.path.exists(".\data_" + str(i)):
os.makedirs(".\data_" + str(i))
for i in commands:
print(f"You are ready to train the models with: \n {i}")
from PIL import Image
import os
import subprocess
import shutil
backgrounds_file_path = "backgrounds.txt"
info_base_path = r"./info"
negatives_path = r"./negatives"
positives_path = r"./positives"
training_data_base = r"./training_data_"
opencv_path = r".\opencv\build\x64\vc15\bin\opencv_createsamples.exe"
set_sizes = [1, 2, 5, 10]
max_xangle = 0.5
max_yangle = 0.5
max_zangle = 0.5
w, h = 25, 18
class InfoEntry:
info_lst_line: str
image_path: str
def __init__(self, info_line, file_path):
self.info_lst_line = info_line
self.image_path = file_path
def __str__(self):
return f"Image Entry: {self.info_lst_line}, {self.image_path}"
max_x = 750
max_y = 800
# remove too small images
for image in os.listdir("./negatives"):
im = Image.open(f"./negatives/{image}")
width, height = im.size
del im
if width <= max_x:
os.remove(f"./negatives/{image}")
elif height <= max_y:
os.remove(f"./negatives/{image}")
# remove any existing file and assume old data
if os.path.exists(backgrounds_file_path):
os.remove(backgrounds_file_path)
# regenerate the available negatives list
count_negatives = len(os.listdir(negatives_path))
for img in os.listdir(negatives_path):
line = f"{negatives_path}/" + img + "\n"
with open(backgrounds_file_path, 'a') as f:
f.write(line)
info_dirs = []
if len(os.listdir(positives_path)) > max(set_sizes):
print("Your set sizes were larger than the available positive images!")
quit(2)
for img in os.listdir(positives_path):
i = len(info_dirs)
info_dir = f"{info_base_path}{i}"
com = f"{opencv_path} -img positives/" + str(i) + ".png -bg backgrounds.txt -info " + info_dir + "/info.lst" + \
" -pngoutput " + info_dir + " -maxxangle " + str(max_xangle) + " -maxyangle " + str(max_yangle) + " -maxzangle " + str(max_zangle) + \
" -num " + str(count_negatives)
if not os.path.exists(info_dir):
subprocess.call(com, shell=True)
info_dirs.append(info_dir)
for i in set_sizes:
if not os.path.exists(training_data_base + str(i)):
os.makedirs(training_data_base + str(i))
def join_info_folders(info_dirs: list, output_dir: str):
info_dir: str
cur_entry_name = 0
for info_dir in info_dirs:
info_lines = []
with open(info_dir + "/info.lst", 'r') as info_file:
for line in info_file.readlines():
image_path = f"{info_dir}/{line.split(' ')[0]}"
info_lines.append(InfoEntry(line.strip(), image_path))
item: InfoEntry
for item in info_lines:
shutil.copy(item.image_path, f"{output_dir}/{str(cur_entry_name)}.jpg")
with open(f"{output_dir}/info.lst", 'a') as info_file:
to_write = []
to_write.append(str(cur_entry_name) + ".jpg")
to_write = to_write + item.info_lst_line.split(" ")[1:]
to_write.append("\n")
info_file.write(" ".join(to_write))
cur_entry_name += 1
for i in set_sizes:
join_info_folders(info_dirs[:i], training_data_base + str(i))
commands = []
for i in set_sizes:
num_positives = len(os.listdir(training_data_base + str(i)))
if os.path.exists(training_data_base + str(i) + ".vec"):
os.remove(training_data_base + str(i) + ".vec")
com = f"{opencv_path} -info {training_data_base + str(i)}\info.lst -num {num_positives} -w {w} -h {h} -vec {training_data_base + str(i)}.vec"
subprocess.call(com, shell=True)
commands.append(f".\opencv\\build\\x64\\vc15\\bin\opencv_traincascade.exe -data data_{str(i)} -vec .\\{training_data_base + str(i)}.vec -bg .\\{backgrounds_file_path} -numPos {num_positives} -numNeg {num_positives / 2} -numStages 15 -w {w} -h {h}")
if not os.path.exists(".\data_" + str(i)):
os.makedirs(".\data_" + str(i))
for i in commands:
print(f"You are ready to train the models with: \n {i}")

View file

@ -1,97 +1,111 @@
import argparse
import os
def dir_path(string):
if os.path.exists(string):
return string
else:
raise NotADirectoryError(string)
def init_argparse() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="FaceDetection",
usage="%(prog)s [OPTION]",
description="Run face localization"
)
parser.add_argument(
"ground_truth", type=dir_path, help="ground truth file"
)
parser.add_argument(
"test_file", type=dir_path, help="file to compare to the ground truth"
)
parser.add_argument(
"out_file", help="the file to write the output to"
)
return parser
multiplication_factor = 0.05
def get_adjustment_amount(imgSize, x1, y1, x2, y2):
# find the difference between the left gap and the right gap, divide it by two, and multiply it by the speed scale
horizontal_adjustment = multiplication_factor * (x1 - (imgSize[0] - x2)) / 2
vertical_adjustment = multiplication_factor * (y1 - (imgSize[0] - y2)) / 2
return [horizontal_adjustment, vertical_adjustment]
parser = init_argparse()
args = parser.parse_args()
class FrameBox:
frame_number: int
top_left: tuple
bottom_right: tuple
def __init__(self, frame_number, x1, y1, x2, y2):
self.frame_number = frame_number
self.top_left = (x1, y1)
self.bottom_right = (x2, y2)
ground_truth = []
with open(args.ground_truth, 'r') as gt_file:
lines = gt_file.readlines()
for line in lines:
items = line.split(",")
ground_truth.append(FrameBox(int(items[0].strip()), int(items[1].strip()), int(items[2].strip()), int(items[3].strip()), int(items[4].strip())))
test_results = []
with open(args.test_file, 'r') as test_file:
lines = test_file.readlines()
for line in lines:
items = line.split(",")
test_results.append(FrameBox(int(items[0].strip()), int(items[1].strip()), int(items[2].strip()), int(items[3].strip()), int(items[4].strip())))
test_fb: FrameBox
gt_frame: FrameBox
last_frame_num = 0
average_sum = 0
average_count = 0
print("length of test file: " + str(len(test_results)))
for test_fb in test_results:
# make
def bring_up_gt():
if test_fb.frame_number > ground_truth[0].frame_number:
ground_truth.pop(0)
bring_up_gt()
bring_up_gt()
assert(ground_truth[0].frame_number == test_fb.frame_number)
gt_frame = ground_truth[0]
gt_adjustment = get_adjustment_amount((1920, 1000), gt_frame.top_left[0], gt_frame.top_left[1], gt_frame.bottom_right[0], gt_frame.bottom_right[1])
test_adjustment = get_adjustment_amount((1920, 1000), test_fb.top_left[0], test_fb.top_left[1], test_fb.bottom_right[0], test_fb.bottom_right[1])
if last_frame_num != test_fb.frame_number and average_count != 0:
with open(args.out_file, 'a') as out_file:
out_file.write(f"{average_sum},\n")
average_sum = 0
average_count = 0
average_count += 1
# get the average
average_sum += abs(( (gt_adjustment[0] - test_adjustment[0]) + (gt_adjustment[1] - test_adjustment[1]) ) / 2)
last_frame_num = test_fb.frame_number
import argparse
import os
def dir_path(string):
if os.path.exists(string):
return string
else:
raise NotADirectoryError(string)
def init_argparse() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="FaceDetection",
usage="%(prog)s [OPTION]",
description="Run face localization"
)
parser.add_argument(
"ground_truth", type=dir_path, help="ground truth file"
)
parser.add_argument(
"test_file", type=dir_path, help="file to compare to the ground truth"
)
parser.add_argument(
"out_file", help="the file to write the output to"
)
parser.add_argument(
"-f", "--faces_count_file", help="the file output the number of faces found in each frame"
)
return parser
multiplication_factor = 0.05
def get_adjustment_amount(imgSize, x1, y1, x2, y2):
# find the difference between the left gap and the right gap, divide it by two, and multiply it by the speed scale
horizontal_adjustment = multiplication_factor * (x1 - (imgSize[0] - x2)) / 2
vertical_adjustment = multiplication_factor * (y1 - (imgSize[0] - y2)) / 2
return [horizontal_adjustment, vertical_adjustment]
parser = init_argparse()
args = parser.parse_args()
class FrameBox:
frame_number: int
top_left: tuple
bottom_right: tuple
def __init__(self, frame_number, x1, y1, x2, y2):
self.frame_number = frame_number
self.top_left = (x1, y1)
self.bottom_right = (x2, y2)
ground_truth = []
with open(args.ground_truth, 'r') as gt_file:
lines = gt_file.readlines()
for line in lines:
items = line.split(",")
ground_truth.append(FrameBox(int(items[0].strip()), int(items[1].strip()), int(items[2].strip()), int(items[3].strip()), int(items[4].strip())))
test_results = []
with open(args.test_file, 'r') as test_file:
lines = test_file.readlines()
for line in lines:
items = line.split(",")
test_results.append(FrameBox(int(items[0].strip()), int(items[1].strip()), int(items[2].strip()), int(items[3].strip()), int(items[4].strip())))
test_fb: FrameBox
gt_frame: FrameBox
last_frame_num = 0
average_sum = 0
average_count = 0
print("length of test file: " + str(len(test_results)))
for test_fb in test_results:
# make
def bring_up_gt():
if test_fb.frame_number > ground_truth[0].frame_number:
ground_truth.pop(0)
if test_fb.frame_number > ground_truth[0].frame_number:
# we need to include the empty frames too
if args.faces_count_file:
with open(args.faces_count_file, 'a') as out_file:
out_file.write(f"{ground_truth[0].frame_number}, 0,\n")
bring_up_gt()
bring_up_gt()
assert(ground_truth[0].frame_number == test_fb.frame_number)
gt_frame = ground_truth[0]
gt_adjustment = get_adjustment_amount((1920, 1000), gt_frame.top_left[0], gt_frame.top_left[1], gt_frame.bottom_right[0], gt_frame.bottom_right[1])
test_adjustment = get_adjustment_amount((1920, 1000), test_fb.top_left[0], test_fb.top_left[1], test_fb.bottom_right[0], test_fb.bottom_right[1])
if last_frame_num != test_fb.frame_number :
if average_count > 0:
with open(args.out_file, 'a') as out_file:
out_file.write(f"{average_sum},\n")
if args.faces_count_file:
with open(args.faces_count_file, 'a') as out_file:
out_file.write(f"{test_fb.frame_number}, {average_count},\n")
average_sum = 0
average_count = 0
average_count += 1
# get the average
average_sum += abs(( (gt_adjustment[0] - test_adjustment[0]) + (gt_adjustment[1] - test_adjustment[1]) ) / 2)
last_frame_num = test_fb.frame_number

View file

@ -1,66 +1,66 @@
import cv2
import sys
tracker = cv2.TrackerMIL_create()
# Read video
video = cv2.VideoCapture("./validation/TestVideo.mp4")
# Exit if video not opened.
if not video.isOpened():
print("Could not open video")
sys.exit()
# Read first frame.
ok, frame = video.read()
if not ok:
print('Cannot read video file')
sys.exit()
# Define an initial bounding box
bbox = (857, 189, 346, 434)
# Initialize tracker with first frame and bounding box
ok = tracker.init(frame, bbox)
frame_count = 0
while True:
# Read a new frame
ok, frame = video.read()
if not ok:
break
# Start timer
timer = cv2.getTickCount()
# Update tracker
ok, bbox = tracker.update(frame)
# Calculate Frames per second (FPS)
fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer);
# Draw bounding box
if ok:
# Tracking success
p1 = (int(bbox[0]), int(bbox[1]))
p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
with open("./ground_truth.txt", 'a') as output_file:
output_file.write(f"{frame_count}, {p1[0]}, {p1[1]}, {p2[0]}, {p2[1]}\n")
cv2.rectangle(frame, p1, p2, (255,0,0), 2, 1)
else :
# Tracking failure
cv2.putText(frame, "Tracking failure detected", (100,80), cv2.FONT_HERSHEY_SIMPLEX, 0.75,(0,0,255),2)
# Display FPS on frame
cv2.putText(frame, "FPS : " + str(int(fps)), (100,50), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (50,170,50), 2)
# Display result
cv2.imshow("Tracking", frame)
frame_count += 1
# Exit if ESC pressed
k = cv2.waitKey(1) & 0xff
import cv2
import sys
tracker = cv2.TrackerMIL_create()
# Read video
video = cv2.VideoCapture("./validation/TestVideo.mp4")
# Exit if video not opened.
if not video.isOpened():
print("Could not open video")
sys.exit()
# Read first frame.
ok, frame = video.read()
if not ok:
print('Cannot read video file')
sys.exit()
# Define an initial bounding box
bbox = (857, 189, 346, 434)
# Initialize tracker with first frame and bounding box
ok = tracker.init(frame, bbox)
frame_count = 0
while True:
# Read a new frame
ok, frame = video.read()
if not ok:
break
# Start timer
timer = cv2.getTickCount()
# Update tracker
ok, bbox = tracker.update(frame)
# Calculate Frames per second (FPS)
fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer);
# Draw bounding box
if ok:
# Tracking success
p1 = (int(bbox[0]), int(bbox[1]))
p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
with open("./ground_truth.txt", 'a') as output_file:
output_file.write(f"{frame_count}, {p1[0]}, {p1[1]}, {p2[0]}, {p2[1]}\n")
cv2.rectangle(frame, p1, p2, (255,0,0), 2, 1)
else :
# Tracking failure
cv2.putText(frame, "Tracking failure detected", (100,80), cv2.FONT_HERSHEY_SIMPLEX, 0.75,(0,0,255),2)
# Display FPS on frame
cv2.putText(frame, "FPS : " + str(int(fps)), (100,50), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (50,170,50), 2)
# Display result
cv2.imshow("Tracking", frame)
frame_count += 1
# Exit if ESC pressed
k = cv2.waitKey(1) & 0xff
if k == 27 : break

File diff suppressed because it is too large Load diff