WGU-Capstone/training_data/training_data_setup.py

from PIL import Image
import os
import subprocess
import shutil

backgrounds_file_path = "backgrounds.txt"
info_base_path = r"./info"
negatives_path = r"./negatives"
positives_path = r"./positives"
training_data_base = r"./training_data_"

opencv_path = r".\opencv\build\x64\vc15\bin\opencv_createsamples.exe"

set_sizes = [1, 2, 5, 10]

max_xangle = 0.5
max_yangle = 0.5
max_zangle = 0.5

w, h = 25, 18

class InfoEntry:
    info_lst_line: str 
    image_path: str

    def __init__(self, info_line, file_path):
        self.info_lst_line = info_line 
        self.image_path = file_path

    def __str__(self):
        return f"Image Entry: {self.info_lst_line}, {self.image_path}"


max_x = 750
max_y = 800

# remove too small images
for image in os.listdir("./negatives"):
    im = Image.open(f"./negatives/{image}")
    width, height = im.size
    del im
    if width <= max_x:
        os.remove(f"./negatives/{image}")
    elif height <= max_y:
        os.remove(f"./negatives/{image}")

# remove any existing file and assume old data
if os.path.exists(backgrounds_file_path):
    os.remove(backgrounds_file_path)

# regenerate the available negatives list
count_negatives = len(os.listdir(negatives_path))
for img in os.listdir(negatives_path):
    line = f"{negatives_path}/" + img + "\n"
    with open(backgrounds_file_path, 'a') as f:
        f.write(line)

info_dirs = []

if len(os.listdir(positives_path)) > max(set_sizes):
    print("Your set sizes were larger than the available positive images!")
    quit(2)

for img in os.listdir(positives_path):
    i = len(info_dirs)
    info_dir = f"{info_base_path}{i}"

    com = f"{opencv_path} -img positives/" + str(i) + ".png -bg backgrounds.txt -info " + info_dir + "/info.lst" + \
    " -pngoutput " + info_dir + " -maxxangle " + str(max_xangle) + " -maxyangle " + str(max_yangle) + " -maxzangle " + str(max_zangle) + \
    " -num " + str(count_negatives)

    if not os.path.exists(info_dir):
        subprocess.call(com, shell=True)

    info_dirs.append(info_dir)

for i in set_sizes:
    if not os.path.exists(training_data_base + str(i)):
        os.makedirs(training_data_base + str(i))

def join_info_folders(info_dirs: list, output_dir: str):
    info_dir: str
    cur_entry_name = 0
    for info_dir in info_dirs:
        info_lines = []
        with open(info_dir + "/info.lst", 'r') as info_file:
            for line in info_file.readlines():
                image_path = f"{info_dir}/{line.split(' ')[0]}"
                info_lines.append(InfoEntry(line.strip(), image_path))

        item: InfoEntry
        for item in info_lines:
            shutil.copy(item.image_path, f"{output_dir}/{str(cur_entry_name)}.jpg")
            with open(f"{output_dir}/info.lst", 'a') as info_file:
                to_write = []
                to_write.append(str(cur_entry_name) + ".jpg")
                to_write = to_write + item.info_lst_line.split(" ")[1:]
                to_write.append("\n")
                info_file.write(" ".join(to_write))
            cur_entry_name += 1

for i in set_sizes:
    join_info_folders(info_dirs[:i], training_data_base + str(i)) 

commands = []

for i in set_sizes:
    num_positives = len(os.listdir(training_data_base + str(i)))
    if os.path.exists(training_data_base + str(i) + ".vec"):
        os.remove(training_data_base + str(i) + ".vec")
    com = f"{opencv_path} -info {training_data_base + str(i)}\info.lst -num {num_positives} -w {w} -h {h} -vec {training_data_base + str(i)}.vec"
    subprocess.call(com, shell=True)
    commands.append(f".\opencv\\build\\x64\\vc15\\bin\opencv_traincascade.exe -data data_{str(i)} -vec .\\{training_data_base + str(i)}.vec -bg .\\{backgrounds_file_path} -numPos {num_positives} -numNeg {num_positives / 2} -numStages 15 -w {w} -h {h}")

    if not os.path.exists(".\data_" + str(i)):
        os.makedirs(".\data_" + str(i))

for i in commands:
    print(f"You are ready to train the models with: \n {i}")
fixed merge conflict 2023-10-12 21:17:07 -07:00			`from PIL import Image`
			`import os`
			`import subprocess`
			`import shutil`

			`backgrounds_file_path = "backgrounds.txt"`
			`info_base_path = r"./info"`
			`negatives_path = r"./negatives"`
			`positives_path = r"./positives"`
			`training_data_base = r"./training_data_"`

			`opencv_path = r".\opencv\build\x64\vc15\bin\opencv_createsamples.exe"`

			`set_sizes = [1, 2, 5, 10]`

			`max_xangle = 0.5`
			`max_yangle = 0.5`
			`max_zangle = 0.5`

			`w, h = 25, 18`

			`class InfoEntry:`
			`info_lst_line: str`
			`image_path: str`

			`def __init__(self, info_line, file_path):`
			`self.info_lst_line = info_line`
			`self.image_path = file_path`

			`def __str__(self):`
			`return f"Image Entry: {self.info_lst_line}, {self.image_path}"`


			`max_x = 750`
			`max_y = 800`

			`# remove too small images`
			`for image in os.listdir("./negatives"):`
			`im = Image.open(f"./negatives/{image}")`
			`width, height = im.size`
			`del im`
			`if width <= max_x:`
			`os.remove(f"./negatives/{image}")`
			`elif height <= max_y:`
			`os.remove(f"./negatives/{image}")`

			`# remove any existing file and assume old data`
			`if os.path.exists(backgrounds_file_path):`
			`os.remove(backgrounds_file_path)`

			`# regenerate the available negatives list`
			`count_negatives = len(os.listdir(negatives_path))`
			`for img in os.listdir(negatives_path):`
			`line = f"{negatives_path}/" + img + "\n"`
			`with open(backgrounds_file_path, 'a') as f:`
			`f.write(line)`

			`info_dirs = []`

			`if len(os.listdir(positives_path)) > max(set_sizes):`
			`print("Your set sizes were larger than the available positive images!")`
			`quit(2)`

			`for img in os.listdir(positives_path):`
			`i = len(info_dirs)`
			`info_dir = f"{info_base_path}{i}"`

			`com = f"{opencv_path} -img positives/" + str(i) + ".png -bg backgrounds.txt -info " + info_dir + "/info.lst" + \`
			`" -pngoutput " + info_dir + " -maxxangle " + str(max_xangle) + " -maxyangle " + str(max_yangle) + " -maxzangle " + str(max_zangle) + \`
			`" -num " + str(count_negatives)`

			`if not os.path.exists(info_dir):`
			`subprocess.call(com, shell=True)`

			`info_dirs.append(info_dir)`

			`for i in set_sizes:`
			`if not os.path.exists(training_data_base + str(i)):`
			`os.makedirs(training_data_base + str(i))`

			`def join_info_folders(info_dirs: list, output_dir: str):`
			`info_dir: str`
			`cur_entry_name = 0`
			`for info_dir in info_dirs:`
			`info_lines = []`
			`with open(info_dir + "/info.lst", 'r') as info_file:`
			`for line in info_file.readlines():`
			`image_path = f"{info_dir}/{line.split(' ')[0]}"`
			`info_lines.append(InfoEntry(line.strip(), image_path))`

			`item: InfoEntry`
			`for item in info_lines:`
			`shutil.copy(item.image_path, f"{output_dir}/{str(cur_entry_name)}.jpg")`
			`with open(f"{output_dir}/info.lst", 'a') as info_file:`
			`to_write = []`
			`to_write.append(str(cur_entry_name) + ".jpg")`
			`to_write = to_write + item.info_lst_line.split(" ")[1:]`
			`to_write.append("\n")`
			`info_file.write(" ".join(to_write))`
			`cur_entry_name += 1`

			`for i in set_sizes:`
			`join_info_folders(info_dirs[:i], training_data_base + str(i))`

			`commands = []`

			`for i in set_sizes:`
			`num_positives = len(os.listdir(training_data_base + str(i)))`
			`if os.path.exists(training_data_base + str(i) + ".vec"):`
			`os.remove(training_data_base + str(i) + ".vec")`
			`com = f"{opencv_path} -info {training_data_base + str(i)}\info.lst -num {num_positives} -w {w} -h {h} -vec {training_data_base + str(i)}.vec"`
			`subprocess.call(com, shell=True)`
			`commands.append(f".\opencv\\build\\x64\\vc15\\bin\opencv_traincascade.exe -data data_{str(i)} -vec .\\{training_data_base + str(i)}.vec -bg .\\{backgrounds_file_path} -numPos {num_positives} -numNeg {num_positives / 2} -numStages 15 -w {w} -h {h}")`

			`if not os.path.exists(".\data_" + str(i)):`
			`os.makedirs(".\data_" + str(i))`

			`for i in commands:`
			`print(f"You are ready to train the models with: \n {i}")`