main.py 14.7 KB
Newer Older
1
2
import datetime

Pavlo Beylin's avatar
Pavlo Beylin committed
3
import numpy as np
Pavlo Beylin's avatar
Pavlo Beylin committed
4
from PIL import Image
Pavlo Beylin's avatar
Pavlo Beylin committed
5
6
7
import torch
import cv2
import time
8
import math
Pavlo Beylin's avatar
Pavlo Beylin committed
9
import matplotlib
10
from torch import optim
Pavlo Beylin's avatar
Pavlo Beylin committed
11

12
import CSM
Pavlo Beylin's avatar
Pavlo Beylin committed
13
14
import models
from models.common import Detections
15
from utils.external import TotalVariation
Pavlo Beylin's avatar
Pavlo Beylin committed
16
17
from utils.general import scale_coords

Pavlo Beylin's avatar
Pavlo Beylin committed
18
matplotlib.use('TkAgg')
Pavlo Beylin's avatar
Pavlo Beylin committed
19
20
21
import matplotlib.pyplot as plt

# Model
Pavlo Beylin's avatar
Pavlo Beylin committed
22
23
24
25
from torchvision.transforms import transforms

from patch_transformer import PatchTransformer, PatchApplier

Pavlo Beylin's avatar
Pavlo Beylin committed
26
model = torch.hub.load('ultralytics/yolov5', 'yolov5l')  # or yolov5m, yolov5l, yolov5x, cu
Pavlo Beylin's avatar
Pavlo Beylin committed
27

Pavlo Beylin's avatar
Pavlo Beylin committed
28
29
# model = torch.hub.load('ultralytics/yolov3', 'yolov3')

30
MIN_THRESHOLD = 0.00001
Pavlo Beylin's avatar
Pavlo Beylin committed
31

32
coco_class_names = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus",
Pavlo Beylin's avatar
Pavlo Beylin committed
33
34
35
36
37
38
39
40
41
42
           "train", "truck", "boat", "traffic light", "fire hydrant",
           "stop sign", "parking meter", "bench", "bird", "cat", "dog",
           "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
           "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
           "skis", "snowboard", "sports ball", "kite", "baseball bat",
           "baseball glove", "skateboard", "surfboard", "tennis racket",
           "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
           "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
           "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant",
           "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote",
43
44
                    "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
                    "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]
Pavlo Beylin's avatar
Pavlo Beylin committed
45

Pavlo Beylin's avatar
Pavlo Beylin committed
46
PATH = "saved_patches/realcat.jpg"
47
PATH = "saved_patches/fatcat.jpg"
Pavlo Beylin's avatar
Pavlo Beylin committed
48
PATH = "saved_patches/smallcat.jpg"
49
PATH = "saved_patches/person.jpg"
Pavlo Beylin's avatar
Pavlo Beylin committed
50
PATCH_SIZE = 300
Pavlo Beylin's avatar
Pavlo Beylin committed
51

52
53
total_variation = TotalVariation()

54

55
56
57
58
59
60
61
def show(imgs):
    f, axarr = plt.subplots(2, len(imgs))
    for i in range(len(imgs)):
        try:
            axarr[0, i].imshow(imgs[i].detach().cpu())
        except:
            pass
62
    plt.show()
Pavlo Beylin's avatar
Pavlo Beylin committed
63

64

Pavlo Beylin's avatar
Pavlo Beylin committed
65
def debug_preds():
66
    detected_classes = [int(detections.pred[0][i][-1]) for i in range(0, len(detections.pred[0]))]
Pavlo Beylin's avatar
Pavlo Beylin committed
67
    # print(detected_classes)
68
    for det in detections.pred[0]:
Pavlo Beylin's avatar
Pavlo Beylin committed
69
70
71
72
73
74
        if int(det[-1]) == 15:  # cat
            print("Pred BB: ", end="")
            # print("x1:y1 : {}:{}".format(float(det[0]), float(det[1])))
            # print("x2:y2 : {}:{}".format(float(det[2]), float(det[3])))
            print("{} {} {} {} ({}):".format(
                int(det[0]), int(det[1]), int(det[2]), int(det[3]), float(det[-2])))
Pavlo Beylin's avatar
Pavlo Beylin committed
75
76


Pavlo Beylin's avatar
Pavlo Beylin committed
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# from https://github.com/wangzh0ng/adversarial_yolo2
def read_image(path):
    """
    Read an input image to be used as a patch

    :param path: Path to the image to be read.
    :return: Returns the transformed patch as a pytorch Tensor.
    """
    patch_img = Image.open(path).convert('RGB')
    tf = transforms.Resize((PATCH_SIZE, PATCH_SIZE))
    patch_img = tf(patch_img)
    tf = transforms.ToTensor()

    return tf(patch_img)


Pavlo Beylin's avatar
Pavlo Beylin committed
93
def extract_bounding_box(patch):
94
    mask = torch.where(patch < MIN_THRESHOLD, torch.zeros(patch.shape).cuda(), torch.ones(patch.shape).cuda()).sum(2)
Pavlo Beylin's avatar
Pavlo Beylin committed
95

Pavlo Beylin's avatar
Pavlo Beylin committed
96
97
98
99
    bb_x1 = torch.nonzero(mask.sum(0))[0]
    bb_y1 = torch.nonzero(mask.sum(1))[0]
    bb_x2 = torch.nonzero(mask.sum(0))[-1]
    bb_y2 = torch.nonzero(mask.sum(1))[-1]
Pavlo Beylin's avatar
Pavlo Beylin committed
100

Pavlo Beylin's avatar
Pavlo Beylin committed
101
    return torch.stack([bb_x1, bb_y1, bb_x2, bb_y2]).sum(1)
Pavlo Beylin's avatar
Pavlo Beylin committed
102
103


104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def get_avg_prediction(res, cls_nr):
    avg_prediction = 0

    ctr = 0
    if res is None:
        return 0

    for pred in res:
        if pred[5:].max() > 0.4 or True:
            ctr += 1
            avg_prediction += pred[cls_nr + 5]

    return avg_prediction / (ctr if ctr > 0 else 1)


119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# source https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/
def bb_intersection_over_union(boxA, boxB):
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    # compute the area of intersection rectangle
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)
    # return the intersection over union value
    return iou

139

140
141
142
143
144
145
146
def save_image(image):
    print("save image called!")
    im = transforms.ToPILImage('RGB')(image)
    plt.imshow(im)
    plt.show()
    im.save(f"saved_patches/{time.time()}.jpg")

147

Pavlo Beylin's avatar
Pavlo Beylin committed
148
149
def get_best_prediction(true_box, res, cls_nr):
    min_distance = float("inf")
150
    max_iou = float(0)
Pavlo Beylin's avatar
Pavlo Beylin committed
151
152
    best_prediction = None

153
    for pred in res:
154
155
        # pred_dist = torch.dist(true_box.cuda(), pred[:4])
        pred_iou = bb_intersection_over_union(true_box, pred[:4].float())
156

157
158
        if pred_iou >= max_iou:  # and pred[5:].max() > 0.1:
            max_iou = pred_iou
159
            best_prediction = pred[cls_nr + 5]
Pavlo Beylin's avatar
Pavlo Beylin committed
160

161
162
    # print(f"max found iou: {max_iou}")

163
    return max_iou, best_prediction
Pavlo Beylin's avatar
Pavlo Beylin committed
164
165


166
167
def calculate_csms(frame, predictions):

168
    imgs_and_logits = []
169

170
171
    for i in range(len(predictions.pred[0])):
        x1, y1, x2, y2, conf = predictions.pred[0][i][:5].float()
172
        pred_img_section = frame.flip(2)[int(y1):int(y2), int(x1):int(x2), :]
173
174
        tup = (pred_img_section, predictions.logits[i], frame, x1, y1, x2, y2)
        imgs_and_logits.append(tup)
175
176


177
178
    # TODO insert non_max_suppression
    imgs, csms, cls = CSM.calc_yolo_person_csms(imgs_and_logits, rescale_factor=0, loss_rescale_factor=1000)
179
180

    return imgs, csms, cls
181
182


Pavlo Beylin's avatar
Pavlo Beylin committed
183
184
185
186
187
if __name__ == "__main__":
    # init
    patch_transformer = PatchTransformer().cuda()
    patch_applier = PatchApplier().cuda()

Pavlo Beylin's avatar
Pavlo Beylin committed
188
189
190
191
192
193
194
195
196
197
198
199
200
201
    # set start time to current time
    start_time = time.time()

    # displays the frame rate every 2 second
    display_time = 2

    # Set primary FPS to 0
    fps = 0

    # we create the video capture object cap
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        raise IOError("We cannot open webcam")

Pavlo Beylin's avatar
Pavlo Beylin committed
202
    patch = read_image(PATH)
203
    # patch = torch.rand_like(patch)
Pavlo Beylin's avatar
Pavlo Beylin committed
204
    patch.requires_grad = True
Pavlo Beylin's avatar
Pavlo Beylin committed
205

206
    optimizer = optim.Adam([patch], lr=0.0001, amsgrad=True)
207
    gradient_sum = 0
208

209
210
    # img_size_x = 640
    img_size_x = 480
Pavlo Beylin's avatar
Pavlo Beylin committed
211
212
    img_size_y = 480

213
214
215
216
217
218
219
220
221
222
    # Launch Settings
    # move = True
    # rotate = True
    # taper = True
    # resize = True
    # squeeze = True
    # gauss = True
    # obfuscate = True
    # stretch = True

Pavlo Beylin's avatar
Pavlo Beylin committed
223
224
    move = False
    rotate = False
225
    taper = False
Pavlo Beylin's avatar
Pavlo Beylin committed
226
    resize = True
Pavlo Beylin's avatar
Pavlo Beylin committed
227
    squeeze = False
228
229
230
    gauss = False
    obfuscate = False
    stretch = False
231
232
    transform_interval = 1
    angle_step = 5
Pavlo Beylin's avatar
Pavlo Beylin committed
233
    tv_factor = 1
234
235
236

    ctr = -1
    pred = -1
237
238
    frame_read = False
    fix_frame = False
239
240
    patch_transformer.maxangle = 5 / 180 * math.pi
    patch_transformer.minangle = - 5 / 180 * math.pi
241
    loss = None
Pavlo Beylin's avatar
Pavlo Beylin committed
242
    while True:
243
244
        if not (fix_frame and frame_read):
            ret, frame = cap.read()
Pavlo Beylin's avatar
Pavlo Beylin committed
245

246
247
248
            # cut image
            frame = frame[:, :img_size_x, :]

Pavlo Beylin's avatar
Pavlo Beylin committed
249
        with torch.set_grad_enabled(True):
250
            # with torch.autograd.detect_anomaly():
251

252
253
254
            if not (fix_frame and frame_read):
                # resize our captured frame if we need
                frame = cv2.resize(frame, None, fx=1.0, fy=1.0, interpolation=cv2.INTER_AREA)
255
                frame_original = torch.tensor(frame, dtype=torch.float32, requires_grad=True, device="cuda")
256
257
258
                frame = frame_original.clone()
                frame_read = True

259
            detections = None
260
261
262
263
264
265
            for _ in range(transform_interval):
                ctr += 1

                # transform patch (every transform_interval of frames)
                if ctr % 1 == 0:
                    trans_patch = patch_transformer(patch.cuda(), torch.ones([1, 14, 5]).cuda(), img_size_x, img_size_y,
Pavlo Beylin's avatar
Pavlo Beylin committed
266
                                                    do_rotate=rotate, rand_loc=move, rand_size=resize,
267
268
                                                    rand_squeeze=squeeze, gauss=gauss, obfuscate=obfuscate,
                                                    stretch=stretch, do_taper=taper)
269
270

                    # extract bounding box (x1, y1, x2, y2)
271
272
273
274
                    try:
                        bounding_box = extract_bounding_box(trans_patch)
                    except Exception:
                        print("zero-sized patch ... ")
275
276
277

                # apply patch
                frame = patch_applier(frame_original, trans_patch)
Pavlo Beylin's avatar
Pavlo Beylin committed
278

279
                # detect object on our frame
280
281
                if ctr % 1 == 0 or detections is None:
                    detections, raw_results = model.forward_pt(frame)
Pavlo Beylin's avatar
Pavlo Beylin committed
282

283
284
285
                if ctr % 1 == 0:
                    # debug_preds()
                    pass
Pavlo Beylin's avatar
Pavlo Beylin committed
286

287
                # calculate Cosine Similarity Matrix
288
289
                # imgs, csms, clss = calculate_csms(frame, raw_results)
                imgs, csms, clss = calculate_csms(frame, detections)
290
291
292
                for i in range(len(csms)):
                    # show only person predictions
                    if clss[i] == 0:
293
                        show([torch.min(torch.ones_like(imgs[i]), imgs[i]/255), csms[i].T])
294
295
296

                # iou, pred = get_best_prediction(bounding_box, raw_results, 15)  # get cat
                iou, pred = get_best_prediction(bounding_box, raw_results, 0)  # get personal
297
298
299
300
                # iou, pred = get_best_prediction(bounding_box, raw_results, 12)  # get parking meter
                # iou, pred = get_best_prediction(bounding_box, raw_results, 11)  # get stop sign
                # iou, pred = get_best_prediction(bounding_box, raw_results, 8)  # get boat
                # iou, pred = get_best_prediction(bounding_box, raw_results, 62)  # get tv
301
                # pred = get_best_prediction(bounding_box, raw_results, 42)  # get forked
Pavlo Beylin's avatar
Pavlo Beylin committed
302

303
304
                # pred = get_avg_prediction(raw_results, 15)  # make everything cats
                # pred = get_avg_prediction(raw_results, 0)  # make everything person
Pavlo Beylin's avatar
Pavlo Beylin committed
305

306
307
                if pred is not None:
                    # print("P:{}".format(pred))
Pavlo Beylin's avatar
Pavlo Beylin committed
308

309
310
311
                    # loss
                    loss = -1 * pred  # optimize class
                    # loss = 1 * pred  # adversarial
Pavlo Beylin's avatar
Pavlo Beylin committed
312

313
314
                    # total variation loss component
                    tv_loss = total_variation(patch)
Pavlo Beylin's avatar
Pavlo Beylin committed
315
                    loss += tv_factor * tv_loss
316

317
318
                    # IoU loss component (low iou = high loss)
                    loss += 0.1 * (1 - iou)
319

320
321
                    if not isinstance(loss, torch.Tensor):
                        continue
322

323
324
325
            if loss is None:
                print("loss is None")
                continue
326

327
328
329
            loss.backward(retain_graph=True)
            loss = None
            gradient_sum += patch.grad
330

331
332
333
            # sgn_grads = torch.sign(optimizer.param_groups[0]['params'][0].grad)
            # optimizer.param_groups[0]['params'][0].grad = sgn_grads
            # optimizer.step()
334
            patch.data -= torch.sign(gradient_sum) * 0.001
335
336
            patch.data = patch.detach().clone().clamp(MIN_THRESHOLD, 0.99999).data
            gradient_sum = 0
337

Pavlo Beylin's avatar
Pavlo Beylin committed
338
339
            # show us frame with detection
            # cv2.imshow("img", results_np.render()[0])
340
            try:
341
342
343
                cv2.imshow("img", detections.render()[0])
            except Exception as e:
                print(f"catproblem {e}")
344
345
346

            key = cv2.waitKey(25) & 0xFF
            if key == ord("q"):
Pavlo Beylin's avatar
Pavlo Beylin committed
347
348
                cv2.destroyAllWindows()
                break
349
350
351
352
353
354
            if key == ord("u"):
                move = not move
                print("Move: {}".format(move))
            if key == ord("o"):
                rotate = not rotate
                print("Rotate: {}".format(rotate))
Pavlo Beylin's avatar
Pavlo Beylin committed
355
356
357
358
359
360
361
362
363
            if key == ord("t"):
                resize = not resize
                print("Resize: {}".format(resize))
            if key == ord("z"):
                squeeze = not squeeze
                print("Squeeze: {}".format(squeeze))
            if key == ord("g"):
                gauss = not gauss
                print("Gauss: {}".format(gauss))
364
365
366
367
368
369
370
371
372
            if key == ord("p"):
                taper = not taper
                print("Taper: {}".format(taper))
            if key == ord("h"):
                obfuscate = not obfuscate
                print(f"Obfuscate: {obfuscate}")
            if key == ord("e"):
                stretch = not stretch
                print(f"Obfuscate: {obfuscate}")
373
            if key == ord("+"):
Pavlo Beylin's avatar
Pavlo Beylin committed
374
375
376
377
378
                # transform_interval += 1
                patch_transformer.maxsize += 0.01
                patch_transformer.minsize += 0.01
                # print("Transform Interval: {}".format(transform_interval))
                print(f"Size {patch_transformer.minsize}")
379
            if key == ord("-"):
Pavlo Beylin's avatar
Pavlo Beylin committed
380
381
382
383
384
385
                # transform_interval -= 1
                patch_transformer.maxsize -= 0.01
                patch_transformer.minsize -= 0.01
                print(f"Size {patch_transformer.minsize}")
                # transform_interval = max(transform_interval, 1)
                # print("Transform Interval: {}".format(transform_interval))
386
387
388
389
390
391
392
393
394
395
396
397
398
            if key == ord("9"):
                patch_transformer.maxangle = min(patch_transformer.maxangle + (math.pi * angle_step / 180), math.pi)
                patch_transformer.minangle = max(patch_transformer.minangle - (math.pi * angle_step / 180), -math.pi)
                print("Transformer MaxAngle: {}°".format(patch_transformer.maxangle / math.pi * 180))
            if key == ord("3"):
                patch_transformer.maxangle = max(patch_transformer.maxangle - (math.pi * angle_step / 180), 0)
                patch_transformer.minangle = min(patch_transformer.minangle + (math.pi * angle_step / 180), 0)
                print("Transformer MaxAngle: {}°".format(patch_transformer.maxangle / math.pi * 180))
            if key == ord("s"):
                save_image(patch)
            if key == ord("f"):
                fix_frame = not fix_frame
                print("Fix Frame: {}".format(fix_frame))
Pavlo Beylin's avatar
Pavlo Beylin committed
399
400
401
402
403
404
            if key == ord("a"):
                tv_factor += 1
                print("Total Variation Loss Factor: {}".format(tv_factor))
            if key == ord("y"):
                tv_factor -= 1
                print("Total Variation Loss Factor: {}".format(tv_factor))
Pavlo Beylin's avatar
Pavlo Beylin committed
405
406
407
408
409

        # calculate FPS
        fps += 1
        TIME = time.time() - start_time
        if TIME > display_time:
410
            # print("FPS:", fps / TIME)
Pavlo Beylin's avatar
Pavlo Beylin committed
411
412
            fps = 0
            start_time = time.time()
413
        # time.sleep(0.2)
Pavlo Beylin's avatar
Pavlo Beylin committed
414
415
416

    cap.release()
    cv2.destroyAllWindows()