96 lines
No EOL
3.4 KiB
Python
96 lines
No EOL
3.4 KiB
Python
import cv2
|
|
from websocket import create_connection
|
|
import argparse
|
|
import math
|
|
from ultralytics import YOLO
|
|
|
|
# object classes
|
|
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
|
|
"traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
|
|
"dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
|
|
"handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
|
|
"baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
|
|
"fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
|
|
"carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
|
|
"diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
|
|
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
|
|
"teddy bear", "hair drier", "toothbrush"
|
|
]
|
|
|
|
|
|
def main(websocket_addr):
|
|
# Connect to websocket server
|
|
ws = create_connection(websocket_addr)
|
|
|
|
handshake = ws.recv()
|
|
|
|
if handshake == "Type?":
|
|
ws.send("Type: Automated")
|
|
else:
|
|
ws.close()
|
|
return
|
|
|
|
print("Handshake complete")
|
|
|
|
# Load YOLO model
|
|
model = YOLO('yolov8s.pt') # Load an official Detect model
|
|
|
|
# Open webcam
|
|
cap = cv2.VideoCapture(0)
|
|
cap.set(3, 640)
|
|
cap.set(4, 480)
|
|
|
|
while cap.isOpened():
|
|
ret, frame = cap.read()
|
|
frame = cv2.resize(frame, (640, 480))
|
|
if not ret:
|
|
break
|
|
|
|
# Perform object detection
|
|
results = model.track(frame, persist=True)
|
|
|
|
for r in results:
|
|
lines = ""
|
|
boxes = r.boxes
|
|
for box in boxes:
|
|
if box.cls[0].item() == 0 and not box.id is None:
|
|
# bounding box
|
|
id = box.id.int().cpu().tolist()
|
|
x1, y1, x2, y2 = box.xyxy[0]
|
|
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values
|
|
|
|
# put box in cam
|
|
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 255), 3)
|
|
|
|
# class name
|
|
cls = int(box.cls[0])
|
|
|
|
# object details
|
|
org = [x1, y1]
|
|
org2 = [x1, y1+50]
|
|
font = cv2.FONT_HERSHEY_SIMPLEX
|
|
fontScale = 1
|
|
color = (255, 0, 0)
|
|
color_w = (255, 255, 255)
|
|
thickness = 2
|
|
|
|
cv2.putText(frame, classNames[cls], org, font, fontScale, color, thickness)
|
|
cv2.putText(frame, str(id), org2, font, fontScale, color_w, thickness)
|
|
|
|
lines += f"{id} {x1}:{y1} {x2}:{y2}\n"
|
|
|
|
ws.send(lines)
|
|
|
|
cv2.imshow('Webcam', frame)
|
|
if cv2.waitKey(1) == ord('q'):
|
|
break
|
|
|
|
cap.release()
|
|
cv2.destroyAllWindows()
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description='Detect people from webcam feed and send data to websocket server')
|
|
parser.add_argument('websocket_addr', type=str, help='Websocket server address')
|
|
args = parser.parse_args()
|
|
|
|
main(args.websocket_addr) |