Commit a678caa2 authored by Pavlo Beylin's avatar Pavlo Beylin
Browse files

Include IoU into distance and loss calculation.

parent 3e896e33
import datetime
import numpy as np
from PIL import Image
import torch
import cv2
import time
import math
import matplotlib
from torch import optim
......@@ -21,7 +24,6 @@ from patch_transformer import PatchTransformer, PatchApplier
model = torch.hub.load('ultralytics/yolov5', 'yolov5l') # or yolov5m, yolov5l, yolov5x, cu
# model = torch.hub.load('ultralytics/yolov3', 'yolov3')
MIN_THRESHOLD = 0.00001
......@@ -45,10 +47,12 @@ PATCH_SIZE = 300
total_variation = TotalVariation()
def show(img):
plt.imshow(img.detach().cpu())
plt.show()
def debug_preds():
detected_classes = [int(results.pred[0][i][-1]) for i in range(0, len(results.pred[0]))]
# print(detected_classes)
......@@ -103,18 +107,48 @@ def get_avg_prediction(res, cls_nr):
return avg_prediction / (ctr if ctr > 0 else 1)
# source https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/
def bb_intersection_over_union(boxA, boxB):
# determine the (x, y)-coordinates of the intersection rectangle
xA = max(boxA[0], boxB[0])
yA = max(boxA[1], boxB[1])
xB = min(boxA[2], boxB[2])
yB = min(boxA[3], boxB[3])
# compute the area of intersection rectangle
interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
# compute the area of both the prediction and ground-truth
# rectangles
boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
# compute the intersection over union by taking the intersection
# area and dividing it by the sum of prediction + ground-truth
# areas - the interesection area
iou = interArea / float(boxAArea + boxBArea - interArea)
# return the intersection over union value
return iou
def save_image(image):
print("save image called!")
im = transforms.ToPILImage('RGB')(image)
plt.imshow(im)
plt.show()
im.save(f"saved_patches/{time.time()}.jpg")
def get_best_prediction(true_box, res, cls_nr):
min_distance = float("inf")
max_iou = float(0)
best_prediction = None
for pred in res:
pred_dist = torch.dist(true_box.cuda(), pred[:4])
# pred_dist = torch.dist(true_box.cuda(), pred[:4])
pred_iou = bb_intersection_over_union(true_box, pred[:4].float())
if pred_dist < min_distance and pred[5:].max() > 0.1:
min_distance = pred_dist
if pred_iou >= max_iou: # and pred[5:].max() > 0.1:
max_iou = pred_iou
best_prediction = pred[cls_nr + 5]
return best_prediction
print(f"max found iou: {max_iou}")
return max_iou, best_prediction
if __name__ == "__main__":
......@@ -141,75 +175,97 @@ if __name__ == "__main__":
patch.requires_grad = True
optimizer = optim.Adam([patch], lr=0.0001, amsgrad=True)
gradient_sum = 0
img_size_x = 640
img_size_y = 480
ctr = -1
pred = -1
move = False
rotate = False
transform_interval = 10
move = True
rotate = True
transform_interval = 1
angle_step = 5
frame_read = False
fix_frame = False
patch_transformer.maxangle = math.pi
patch_transformer.minangle = - math.pi
loss = None
while True:
ctr += 1
ret, frame = cap.read()
if not (fix_frame and frame_read):
ret, frame = cap.read()
with torch.set_grad_enabled(True):
# with torch.autograd.detect_anomaly():
# resize our captured frame if we need
frame = cv2.resize(frame, None, fx=1.0, fy=1.0, interpolation=cv2.INTER_AREA)
frame = torch.tensor(frame, dtype=torch.float32, requires_grad=True).cuda()
if not (fix_frame and frame_read):
# resize our captured frame if we need
frame = cv2.resize(frame, None, fx=1.0, fy=1.0, interpolation=cv2.INTER_AREA)
frame_original = torch.tensor(frame, dtype=torch.float32, requires_grad=True).cuda()
frame = frame_original.clone()
frame_read = True
results = None
for _ in range(transform_interval):
ctr += 1
# transform patch (every transform_interval of frames)
if ctr % 1 == 0:
trans_patch = patch_transformer(patch.cuda(), torch.ones([1, 14, 5]).cuda(), img_size_x, img_size_y,
do_rotate=rotate, rand_loc=move)
trans_patch = torch.transpose(trans_patch[0][0].T, 0, 1)
# extract bounding box (x1, y1, x2, y2)
bounding_box = extract_bounding_box(trans_patch)
# apply patch
frame = patch_applier(frame_original, trans_patch)
# transform patch (every couple of frames)
if ctr % transform_interval == 0:
# print("{} {}".format(float(patch.min()), float(patch.max())))
trans_patch = patch_transformer(patch.cuda(), torch.ones([1, 14, 5]).cuda(), img_size_x, img_size_y,
do_rotate=rotate, rand_loc=move)
trans_patch = torch.transpose(trans_patch[0][0].T, 0, 1)
# detect object on our frame
if ctr % 1 == 0 or results is None:
results, raw_results = model.forward_pt(frame)
# extract bounding box (x1, y1, x2, y2)
bounding_box = extract_bounding_box(trans_patch)
# print("True BB: {} {} {} {}".format(int(bounding_box[0]), int(bounding_box[1]), int(bounding_box[2]),
# int(bounding_box[3])))
if ctr % 1 == 0:
# debug_preds()
pass
# apply patch
frame = patch_applier(frame, trans_patch)
iou, pred = get_best_prediction(bounding_box, raw_results, 15) # get cats
# pred = get_best_prediction(bounding_box, raw_results, 42) # get forked
# detect object on our frame
results, raw_results = model.forward_pt(frame)
# pred = get_avg_prediction(raw_results, 15) # make everything cats
# pred = get_avg_prediction(raw_results, 0) # make everything person
if ctr % 1 == 0:
# debug_preds()
pass
if pred is not None:
# print("P:{}".format(pred))
# pred = get_best_prediction(bounding_box, raw_results, 15) # get cats
# pred = get_best_prediction(bounding_box, raw_results, 42) # get forked
# loss
loss = -1 * pred # optimize class
# loss = 1 * pred # adversarial
pred = get_avg_prediction(raw_results, 15) # make everything cats
pred = get_avg_prediction(raw_results, 0) # make everything person
# total variation loss component
tv_loss = total_variation(patch)
loss += tv_loss
if pred is not None:
print("P:{}".format(pred))
# IoU loss component (low iou = high loss)
loss += 0.1 * (1 - iou)
# loss
# loss = -1 * pred # optimize class
loss = 1 * pred # adversarial
if not isinstance(loss, torch.Tensor):
continue
# Total Variation Loss
tv_loss = total_variation(patch)
loss += tv_loss
if loss is None:
print("loss is None")
continue
if not isinstance(loss, torch.Tensor):
continue
loss.backward(retain_graph=True)
loss = None
gradient_sum += patch.grad
loss.backward(retain_graph=True)
# sgn_grads = torch.sign(optimizer.param_groups[0]['params'][0].grad)
# optimizer.param_groups[0]['params'][0].grad = sgn_grads
# optimizer.step()
patch.data -= torch.sign(patch.grad) * 0.001
patch.data = patch.detach().clone().clamp(MIN_THRESHOLD, 0.99999).data
# sgn_grads = torch.sign(optimizer.param_groups[0]['params'][0].grad)
# optimizer.param_groups[0]['params'][0].grad = sgn_grads
# optimizer.step()
patch.data -= torch.sign(gradient_sum) * 0.001
patch.data = patch.detach().clone().clamp(MIN_THRESHOLD, 0.99999).data
gradient_sum = 0
# show us frame with detection
# cv2.imshow("img", results_np.render()[0])
......@@ -235,6 +291,19 @@ if __name__ == "__main__":
transform_interval -= 1
transform_interval = max(transform_interval, 1)
print("Transform Interval: {}".format(transform_interval))
if key == ord("9"):
patch_transformer.maxangle = min(patch_transformer.maxangle + (math.pi * angle_step / 180), math.pi)
patch_transformer.minangle = max(patch_transformer.minangle - (math.pi * angle_step / 180), -math.pi)
print("Transformer MaxAngle: {}°".format(patch_transformer.maxangle / math.pi * 180))
if key == ord("3"):
patch_transformer.maxangle = max(patch_transformer.maxangle - (math.pi * angle_step / 180), 0)
patch_transformer.minangle = min(patch_transformer.minangle + (math.pi * angle_step / 180), 0)
print("Transformer MaxAngle: {}°".format(patch_transformer.maxangle / math.pi * 180))
if key == ord("s"):
save_image(patch)
if key == ord("f"):
fix_frame = not fix_frame
print("Fix Frame: {}".format(fix_frame))
# calculate FPS
fps += 1
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment