track.py

# limit the number of cpus used by high performance libraries
import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"

import sys
sys.path.insert(0, './yolov5')

from yolov5.models.experimental import attempt_load
from yolov5.utils.downloads import attempt_download
from yolov5.utils.datasets import LoadImages, LoadStreams
from yolov5.utils.general import check_img_size, non_max_suppression, scale_coords, check_imshow, xyxy2xywh
from yolov5.utils.torch_utils import select_device, time_sync
from yolov5.utils.plots import Annotator, colors
from deep_sort_pytorch.utils.parser import get_config
from deep_sort_pytorch.deep_sort import DeepSort
import argparse
import os
import platform
import shutil
import time
from pathlib import Path
import cv2
import torch
import torch.backends.cudnn as cudnn
import math
import pymongo
import datetime

##############################################################################
# connect with mongoDB
client = pymongo.MongoClient("mongodb+srv://dbuser:dbuser@countingrecords.zcgy9.mongodb.net/records?retryWrites=true&w=majority")
db = client.records
collection = db.records

# Return true if line segments AB and CD intersect
def intersect(A, B, C, D):
    return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D)
# Return true if ABC is counterclockwise
def ccw(A, B, C):
    return (C[1]-A[1]) * (B[0]-A[0]) > (B[1]-A[1]) * (C[0]-A[0])
##############################################################################

def detect(opt):
    out, source, yolo_weights, deep_sort_weights, show_vid, save_vid, save_txt, imgsz, evaluate, half = \
        opt.output, opt.source, opt.yolo_weights, opt.deep_sort_weights, opt.show_vid, opt.save_vid, \
            opt.save_txt, opt.img_size, opt.evaluate, opt.half
    webcam = source == '0' or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    attempt_download(deep_sort_weights, repo='mikel-brostrom/Yolov5_DeepSort_Pytorch')
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    half &= device.type != 'cpu'  # half precision only supported on CUDA

    # The MOT16 evaluation runs multiple inference streams in parallel, each one writing to
    # its own .txt file. Hence, in that case, the output folder is not restored
    if not evaluate:
        if os.path.exists(out):
            pass
            shutil.rmtree(out)  # delete output folder
        os.makedirs(out)  # make new output folder

    # Load model
    model = attempt_load(yolo_weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    names = model.module.names if hasattr(model, 'module') else model.names  # get class names
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    # Check if environment supports image displays
    if show_vid:
        show_vid = check_imshow()

    if webcam:
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    if device.type != 'cpu':
        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
    t0 = time.time()

    save_path = str(Path(out))
    # extract what is in between the last '/' and last '.'
    txt_file_name = source.split('/')[-1].split('.')[0]
    txt_path = str(Path(out)) + '/' + txt_file_name + '.txt'


    # initialize line, counter, memory ############################################
    lineblue = [(0, 100), (1000, 100)]
    linered = [(0, 250), (1000, 250)]
    memory = {}
    previous1 = {}
    previous2 = {}
    previous3 = {}
    expectedin = []
    expectedout = []
    min_count_in = 0
    max_count_in = 0
    all_count_in = 0
    total_count_in = 0
    min_count_out = 0
    max_count_out = 0
    all_count_out = 0
    total_count_out = 0
    certainty = 0
    ######################################################################


    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_sync()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(
            pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_sync()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            annotator = Annotator(im0, line_width=2, pil=not ascii)


            # draw line ############################################################
            cv2.line(im0,lineblue[0],lineblue[1],(255,0,0),2)
            cv2.line(im0,linered[0],linered[1],(0,0,255),2)
            print(expectedin)
            print(expectedout)
            ########################################################################


            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(
                    img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                xywhs = xyxy2xywh(det[:, 0:4])
                confs = det[:, 4]
                clss = det[:, 5]

                # pass detections to deepsort
                outputs = deepsort.update(xywhs.cpu(), confs.cpu(), clss.cpu(), im0)
                
                # initialize ###########################################################
                index_id = []
                boxes = []
                previous3 = previous2
                previous2 = previous1
                previous1 = memory.copy()
                memory = {}
                ########################################################################

                # draw boxes for visualization
                if len(outputs) > 0:
                    for j, (output, conf) in enumerate(zip(outputs, confs)): 
                        
                        bboxes = output[0:4]
                        id = output[4]
                        cls = output[5]

                        c = int(cls)  # integer class
                        label = f'{id} {names[c]} {conf:.2f}'
                        annotator.box_label(bboxes, label, color=colors(c, True))

                    # count in, out ###############################################                    
                    for output in outputs:
                        boxes.append([output[0],output[1],output[2],output[3]])
                        index_id.append(output[-2])
                        memory[index_id[-1]] = boxes[-1]                      

                    i = int(0)

                    for box in boxes:
                        # extract the bounding box coordinates
                        (x, y) = (int(box[0]), int(box[1]))
                        (w, h) = (int(box[2]), int(box[3]))
                        # get the middle coordinate of the box
                        p0 = (int(x + (w-x)/2), int(y + (h-y)/2))

                        # previous1
                        if index_id[i] in previous1:
                            previous_box1 = previous1[index_id[i]]
                            # extract the previous bounding box coordinates
                            (x1, y1) = (int(previous_box1[0]), int(previous_box1[1]))
                            (w1, h1) = (int(previous_box1[2]), int(previous_box1[3]))
                            p1 = (int(x1 + (w1-x1)/2), int(y1 + (h1-y1)/2))
                            # track line
                            cv2.line(im0,p0,p1,(255,0,255),1)

                            # previous2
                            if index_id[i] in previous2:
                                previous_box2 = previous2[index_id[i]]
                                (x2, y2) = (int(previous_box2[0]), int(previous_box2[1]))
                                (w2, h2) = (int(previous_box2[2]), int(previous_box2[3]))
                                p2 = (int(x2 + (w2-x2)/2), int(y2 + (h2-y2)/2))
                                cv2.line(im0,p1,p2,(255,0,0),1)

                                # previous3
                                if index_id[i] in previous3:
                                    previous_box3 = previous3[index_id[i]]
                                    (x3, y3) = (int(previous_box3[0]), int(previous_box3[1]))
                                    (w3, h3) = (int(previous_box3[2]), int(previous_box3[3]))
                                    p3 = (int(x3 + (w3-x3)/2), int(y3 + (h3-y3)/2))
                                    cv2.line(im0,p2,p3,(0,255,0),1)

                            # count if p0-p1 and blue line are intersect
                            if intersect(p0, p1, lineblue[0], lineblue[1]):
                                # if p0's y coordinate is higher than p1's y coordinate
                                if p0[1] > p1[1]:
                                    all_count_in += 1
                                    expectedin.append((index_id[i], p0))
                                
                                else:
                                    all_count_out += 1
                                    resultidx = [index for (index, tuple) in enumerate(expectedout) if tuple[0] == index_id[i]]
                                    if resultidx:
                                        expectedout.pop(resultidx[0])
                                        min_count_out += 1 

                            # count if p0-p1 and red line are intersect                            
                            if intersect(p0, p1, linered[0], linered[1]):
                                # if p0's y coordinate is higher than p1's y coordinate
                                if p0[1] > p1[1]:
                                    all_count_in += 1 
                                    resultidx = [index for (index, tuple) in enumerate(expectedin) if tuple[0] == index_id[i]]
                                    if resultidx:
                                        expectedin.pop(resultidx[0])
                                        min_count_in += 1
                                else:
                                    all_count_out += 1
                                    expectedout.append((index_id[i], p0))
                        i += 1
                    #################################################################

                        if save_txt:
                            # to MOT format
                            bbox_left = output[0]
                            bbox_top = output[1]
                            bbox_w = output[2] - output[0]
                            bbox_h = output[3] - output[1]
                            # Write MOT compliant results to file
                            with open(txt_path, 'a') as f:
                               f.write(('%g ' * 10 + '\n') % (frame_idx, id, bbox_left,
                                                           bbox_top, bbox_w, bbox_h, -1, -1, -1, -1))  # label format

            else:
                deepsort.increment_ages()

            # print in, out, total ###################################################
            max_count_in = all_count_in - min_count_in
            max_count_out = all_count_out - min_count_out
            total_count_in = (min_count_in + max_count_in)/2
            total_count_out = (min_count_out + max_count_out)/2
            if total_count_in + total_count_out != 0:
                certainty = (min_count_in + min_count_out)/(total_count_in + total_count_out) * 100
            cv2.putText(im0, 'In : {}'.format(total_count_in),(40,330),cv2.FONT_HERSHEY_COMPLEX,1.0,(255,255,255),2)
            cv2.putText(im0, 'Out : {}'.format(total_count_out), (40,360),cv2.FONT_HERSHEY_COMPLEX,1.0,(255,255,255),2)
            cv2.putText(im0, 'Round In : {}'.format(math.ceil(total_count_in)),(40,390),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,0),2)
            cv2.putText(im0, 'Round out : {}'.format(math.ceil(total_count_out)),(40,420),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,0),2)
            cv2.putText(im0, 'Certainty : {}%'.format(certainty),(40,450),cv2.FONT_HERSHEY_COMPLEX,1.0,(50,50,50),2)

            collection.insert_one({
                "in_count" : math.ceil(total_count_in),
                "out_count" : math.ceil(total_count_out),
                "total_count": math.ceil(total_count_in) - math.ceil(total_count_out),
                "time" : datetime.datetime.now()
            })
            ##########################################################################

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            im0 = annotator.result()
            if show_vid:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_vid:
                if vid_path != save_path:  # new video
                    vid_path = save_path
                    if isinstance(vid_writer, cv2.VideoWriter):
                        vid_writer.release()  # release previous video writer
                    if vid_cap:  # video
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                    else:  # stream
                        fps, w, h = 30, im0.shape[1], im0.shape[0]
                        save_path += '.mp4'

                    vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
                vid_writer.write(im0)

    if save_txt or save_vid:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--yolo_weights', nargs='+', type=str, default='yolov5/weights/yolov5l.pt', help='model.pt path(s)')
    parser.add_argument('--deep_sort_weights', type=str, default='deep_sort_pytorch/deep_sort/deep/checkpoint/ckpt.t7', help='ckpt.t7 path')
    # file/folder, 0 for webcam
    parser.add_argument('--source', type=str, default='0', help='source')
    parser.add_argument('--output', type=str, default='inference/output', help='output folder')  # output folder
    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
    parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
    parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--show-vid', action='store_true', help='display tracking video results')
    parser.add_argument('--save-vid', action='store_true', help='save video tracking results')
    parser.add_argument('--save-txt', action='store_true', help='save MOT compliant results to *.txt')
    # class 0 is person, 1 is bycicle, 2 is car... 79 is oven
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 16 17')
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    parser.add_argument('--augment', action='store_true', help='augmented inference')
    parser.add_argument('--evaluate', action='store_true', help='augmented inference')
    parser.add_argument("--config_deepsort", type=str, default="deep_sort_pytorch/configs/deep_sort.yaml")
    parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")
    args = parser.parse_args()
    args.img_size = check_img_size(args.img_size)

    with torch.no_grad():
        detect(args)