WGU-Capstone/training_data/training_data_setup.py

121 lines
4 KiB
Python
Raw Normal View History

2023-10-12 21:17:07 -07:00
from PIL import Image
import os
import subprocess
import shutil
backgrounds_file_path = "backgrounds.txt"
info_base_path = r"./info"
negatives_path = r"./negatives"
positives_path = r"./positives"
training_data_base = r"./training_data_"
opencv_path = r".\opencv\build\x64\vc15\bin\opencv_createsamples.exe"
set_sizes = [1, 2, 5, 10]
max_xangle = 0.5
max_yangle = 0.5
max_zangle = 0.5
w, h = 25, 18
class InfoEntry:
info_lst_line: str
image_path: str
def __init__(self, info_line, file_path):
self.info_lst_line = info_line
self.image_path = file_path
def __str__(self):
return f"Image Entry: {self.info_lst_line}, {self.image_path}"
max_x = 750
max_y = 800
# remove too small images
for image in os.listdir("./negatives"):
im = Image.open(f"./negatives/{image}")
width, height = im.size
del im
if width <= max_x:
os.remove(f"./negatives/{image}")
elif height <= max_y:
os.remove(f"./negatives/{image}")
# remove any existing file and assume old data
if os.path.exists(backgrounds_file_path):
os.remove(backgrounds_file_path)
# regenerate the available negatives list
count_negatives = len(os.listdir(negatives_path))
for img in os.listdir(negatives_path):
line = f"{negatives_path}/" + img + "\n"
with open(backgrounds_file_path, 'a') as f:
f.write(line)
info_dirs = []
if len(os.listdir(positives_path)) > max(set_sizes):
print("Your set sizes were larger than the available positive images!")
quit(2)
for img in os.listdir(positives_path):
i = len(info_dirs)
info_dir = f"{info_base_path}{i}"
com = f"{opencv_path} -img positives/" + str(i) + ".png -bg backgrounds.txt -info " + info_dir + "/info.lst" + \
" -pngoutput " + info_dir + " -maxxangle " + str(max_xangle) + " -maxyangle " + str(max_yangle) + " -maxzangle " + str(max_zangle) + \
" -num " + str(count_negatives)
if not os.path.exists(info_dir):
subprocess.call(com, shell=True)
info_dirs.append(info_dir)
for i in set_sizes:
if not os.path.exists(training_data_base + str(i)):
os.makedirs(training_data_base + str(i))
def join_info_folders(info_dirs: list, output_dir: str):
info_dir: str
cur_entry_name = 0
for info_dir in info_dirs:
info_lines = []
with open(info_dir + "/info.lst", 'r') as info_file:
for line in info_file.readlines():
image_path = f"{info_dir}/{line.split(' ')[0]}"
info_lines.append(InfoEntry(line.strip(), image_path))
item: InfoEntry
for item in info_lines:
shutil.copy(item.image_path, f"{output_dir}/{str(cur_entry_name)}.jpg")
with open(f"{output_dir}/info.lst", 'a') as info_file:
to_write = []
to_write.append(str(cur_entry_name) + ".jpg")
to_write = to_write + item.info_lst_line.split(" ")[1:]
to_write.append("\n")
info_file.write(" ".join(to_write))
cur_entry_name += 1
for i in set_sizes:
join_info_folders(info_dirs[:i], training_data_base + str(i))
commands = []
for i in set_sizes:
num_positives = len(os.listdir(training_data_base + str(i)))
if os.path.exists(training_data_base + str(i) + ".vec"):
os.remove(training_data_base + str(i) + ".vec")
com = f"{opencv_path} -info {training_data_base + str(i)}\info.lst -num {num_positives} -w {w} -h {h} -vec {training_data_base + str(i)}.vec"
subprocess.call(com, shell=True)
commands.append(f".\opencv\\build\\x64\\vc15\\bin\opencv_traincascade.exe -data data_{str(i)} -vec .\\{training_data_base + str(i)}.vec -bg .\\{backgrounds_file_path} -numPos {num_positives} -numNeg {num_positives / 2} -numStages 15 -w {w} -h {h}")
if not os.path.exists(".\data_" + str(i)):
os.makedirs(".\data_" + str(i))
for i in commands:
print(f"You are ready to train the models with: \n {i}")