main.py 14.3 KB
Newer Older
1
2
import datetime

Pavlo Beylin's avatar
Pavlo Beylin committed
3
import numpy as np
Pavlo Beylin's avatar
Pavlo Beylin committed
4
from PIL import Image
Pavlo Beylin's avatar
Pavlo Beylin committed
5
6
7
import torch
import cv2
import time
8
import math
Pavlo Beylin's avatar
Pavlo Beylin committed
9
import matplotlib
10
from torch import optim
Pavlo Beylin's avatar
Pavlo Beylin committed
11

12
import CSM
Pavlo Beylin's avatar
Pavlo Beylin committed
13
14
import models
from models.common import Detections
15
from utils.external import TotalVariation
Pavlo Beylin's avatar
Pavlo Beylin committed
16
17
from utils.general import scale_coords

Pavlo Beylin's avatar
Pavlo Beylin committed
18
matplotlib.use('TkAgg')
Pavlo Beylin's avatar
Pavlo Beylin committed
19
20
21
import matplotlib.pyplot as plt

# Model
Pavlo Beylin's avatar
Pavlo Beylin committed
22
23
24
25
from torchvision.transforms import transforms

from patch_transformer import PatchTransformer, PatchApplier

Pavlo Beylin's avatar
Pavlo Beylin committed
26
model = torch.hub.load('ultralytics/yolov5', 'yolov5l')  # or yolov5m, yolov5l, yolov5x, cu
Pavlo Beylin's avatar
Pavlo Beylin committed
27

Pavlo Beylin's avatar
Pavlo Beylin committed
28
29
# model = torch.hub.load('ultralytics/yolov3', 'yolov3')

30
MIN_THRESHOLD = 0.00001
Pavlo Beylin's avatar
Pavlo Beylin committed
31

Pavlo Beylin's avatar
Pavlo Beylin committed
32
33
34
35
36
37
38
39
40
41
42
43
44
45
classes = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus",
           "train", "truck", "boat", "traffic light", "fire hydrant",
           "stop sign", "parking meter", "bench", "bird", "cat", "dog",
           "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
           "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
           "skis", "snowboard", "sports ball", "kite", "baseball bat",
           "baseball glove", "skateboard", "surfboard", "tennis racket",
           "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
           "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
           "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant",
           "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote",
           "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
           "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]

Pavlo Beylin's avatar
Pavlo Beylin committed
46
PATH = "saved_patches/realcat.jpg"
47
PATH = "saved_patches/fatcat.jpg"
Pavlo Beylin's avatar
Pavlo Beylin committed
48
PATH = "saved_patches/smallcat.jpg"
Pavlo Beylin's avatar
Pavlo Beylin committed
49
PATCH_SIZE = 300
Pavlo Beylin's avatar
Pavlo Beylin committed
50

51
52
total_variation = TotalVariation()

53

54
55
56
def show(img):
    plt.imshow(img.detach().cpu())
    plt.show()
Pavlo Beylin's avatar
Pavlo Beylin committed
57

58

Pavlo Beylin's avatar
Pavlo Beylin committed
59
60
def debug_preds():
    detected_classes = [int(results.pred[0][i][-1]) for i in range(0, len(results.pred[0]))]
Pavlo Beylin's avatar
Pavlo Beylin committed
61
    # print(detected_classes)
Pavlo Beylin's avatar
Pavlo Beylin committed
62
    for det in results.pred[0]:
Pavlo Beylin's avatar
Pavlo Beylin committed
63
64
65
66
67
68
        if int(det[-1]) == 15:  # cat
            print("Pred BB: ", end="")
            # print("x1:y1 : {}:{}".format(float(det[0]), float(det[1])))
            # print("x2:y2 : {}:{}".format(float(det[2]), float(det[3])))
            print("{} {} {} {} ({}):".format(
                int(det[0]), int(det[1]), int(det[2]), int(det[3]), float(det[-2])))
Pavlo Beylin's avatar
Pavlo Beylin committed
69
70


Pavlo Beylin's avatar
Pavlo Beylin committed
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# from https://github.com/wangzh0ng/adversarial_yolo2
def read_image(path):
    """
    Read an input image to be used as a patch

    :param path: Path to the image to be read.
    :return: Returns the transformed patch as a pytorch Tensor.
    """
    patch_img = Image.open(path).convert('RGB')
    tf = transforms.Resize((PATCH_SIZE, PATCH_SIZE))
    patch_img = tf(patch_img)
    tf = transforms.ToTensor()

    return tf(patch_img)


Pavlo Beylin's avatar
Pavlo Beylin committed
87
def extract_bounding_box(patch):
88
    mask = torch.where(patch < MIN_THRESHOLD, torch.zeros(patch.shape).cuda(), torch.ones(patch.shape).cuda()).sum(2)
Pavlo Beylin's avatar
Pavlo Beylin committed
89

Pavlo Beylin's avatar
Pavlo Beylin committed
90
91
92
93
    bb_x1 = torch.nonzero(mask.sum(0))[0]
    bb_y1 = torch.nonzero(mask.sum(1))[0]
    bb_x2 = torch.nonzero(mask.sum(0))[-1]
    bb_y2 = torch.nonzero(mask.sum(1))[-1]
Pavlo Beylin's avatar
Pavlo Beylin committed
94

Pavlo Beylin's avatar
Pavlo Beylin committed
95
    return torch.stack([bb_x1, bb_y1, bb_x2, bb_y2]).sum(1)
Pavlo Beylin's avatar
Pavlo Beylin committed
96
97


98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
def get_avg_prediction(res, cls_nr):
    avg_prediction = 0

    ctr = 0
    if res is None:
        return 0

    for pred in res:
        if pred[5:].max() > 0.4 or True:
            ctr += 1
            avg_prediction += pred[cls_nr + 5]

    return avg_prediction / (ctr if ctr > 0 else 1)


113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# source https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/
def bb_intersection_over_union(boxA, boxB):
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    # compute the area of intersection rectangle
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)
    # return the intersection over union value
    return iou

133

134
135
136
137
138
139
140
def save_image(image):
    print("save image called!")
    im = transforms.ToPILImage('RGB')(image)
    plt.imshow(im)
    plt.show()
    im.save(f"saved_patches/{time.time()}.jpg")

141

Pavlo Beylin's avatar
Pavlo Beylin committed
142
143
def get_best_prediction(true_box, res, cls_nr):
    min_distance = float("inf")
144
    max_iou = float(0)
Pavlo Beylin's avatar
Pavlo Beylin committed
145
146
    best_prediction = None

147
    for pred in res:
148
149
        # pred_dist = torch.dist(true_box.cuda(), pred[:4])
        pred_iou = bb_intersection_over_union(true_box, pred[:4].float())
150

151
152
        if pred_iou >= max_iou:  # and pred[5:].max() > 0.1:
            max_iou = pred_iou
153
            best_prediction = pred[cls_nr + 5]
Pavlo Beylin's avatar
Pavlo Beylin committed
154

155
156
    # print(f"max found iou: {max_iou}")

157
    return max_iou, best_prediction
Pavlo Beylin's avatar
Pavlo Beylin committed
158
159


160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def calculate_csms(frame, predictions):

    imgs_and_preds = []

    for pred in predictions:
        x1, y1, x2, y2, conf = pred[:5].float()

        pred_img_section = frame.flip(2)[int(y1):int(y2), int(x1):int(x2), :]
        tup = (pred_img_section, pred, frame, x1, y1, x2, y2)
        # print(tup)
        imgs_and_preds.append(tup)

        # if conf > 0.8:
        #     cls = classes[int(pred[5:].argmax())]
            # print(f"{cls}: {conf} - {pred[:5].float()}")
            # show(frame.flip(2)[int(y1):int(y2), int(x1):int(x2), :] / 255.)
            # print("done")

    imgs, csms = CSM.calc_yolo_csms(imgs_and_preds)


Pavlo Beylin's avatar
Pavlo Beylin committed
181
182
183
184
185
if __name__ == "__main__":
    # init
    patch_transformer = PatchTransformer().cuda()
    patch_applier = PatchApplier().cuda()

Pavlo Beylin's avatar
Pavlo Beylin committed
186
187
188
189
190
191
192
193
194
195
196
197
198
199
    # set start time to current time
    start_time = time.time()

    # displays the frame rate every 2 second
    display_time = 2

    # Set primary FPS to 0
    fps = 0

    # we create the video capture object cap
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        raise IOError("We cannot open webcam")

Pavlo Beylin's avatar
Pavlo Beylin committed
200
    patch = read_image(PATH)
201
    # patch = torch.rand_like(patch)
Pavlo Beylin's avatar
Pavlo Beylin committed
202
    patch.requires_grad = True
Pavlo Beylin's avatar
Pavlo Beylin committed
203

204
    optimizer = optim.Adam([patch], lr=0.0001, amsgrad=True)
205
    gradient_sum = 0
206

207
208
    # img_size_x = 640
    img_size_x = 480
Pavlo Beylin's avatar
Pavlo Beylin committed
209
210
    img_size_y = 480

211
212
213
214
215
216
217
218
219
220
    # Launch Settings
    # move = True
    # rotate = True
    # taper = True
    # resize = True
    # squeeze = True
    # gauss = True
    # obfuscate = True
    # stretch = True

Pavlo Beylin's avatar
Pavlo Beylin committed
221
222
    move = False
    rotate = False
223
    taper = False
Pavlo Beylin's avatar
Pavlo Beylin committed
224
    resize = True
Pavlo Beylin's avatar
Pavlo Beylin committed
225
    squeeze = False
226
227
228
    gauss = False
    obfuscate = False
    stretch = False
229
230
    transform_interval = 1
    angle_step = 5
Pavlo Beylin's avatar
Pavlo Beylin committed
231
    tv_factor = 1
232
233
234

    ctr = -1
    pred = -1
235
236
    frame_read = False
    fix_frame = False
237
238
    patch_transformer.maxangle = 5 / 180 * math.pi
    patch_transformer.minangle = - 5 / 180 * math.pi
239
    loss = None
Pavlo Beylin's avatar
Pavlo Beylin committed
240
    while True:
241
242
        if not (fix_frame and frame_read):
            ret, frame = cap.read()
Pavlo Beylin's avatar
Pavlo Beylin committed
243

244
245
246
            # cut image
            frame = frame[:, :img_size_x, :]

Pavlo Beylin's avatar
Pavlo Beylin committed
247
        with torch.set_grad_enabled(True):
248
            # with torch.autograd.detect_anomaly():
249

250
251
252
253
254
255
256
257
258
259
260
261
262
263
            if not (fix_frame and frame_read):
                # resize our captured frame if we need
                frame = cv2.resize(frame, None, fx=1.0, fy=1.0, interpolation=cv2.INTER_AREA)
                frame_original = torch.tensor(frame, dtype=torch.float32, requires_grad=True).cuda()
                frame = frame_original.clone()
                frame_read = True

            results = None
            for _ in range(transform_interval):
                ctr += 1

                # transform patch (every transform_interval of frames)
                if ctr % 1 == 0:
                    trans_patch = patch_transformer(patch.cuda(), torch.ones([1, 14, 5]).cuda(), img_size_x, img_size_y,
Pavlo Beylin's avatar
Pavlo Beylin committed
264
                                                    do_rotate=rotate, rand_loc=move, rand_size=resize,
265
266
                                                    rand_squeeze=squeeze, gauss=gauss, obfuscate=obfuscate,
                                                    stretch=stretch, do_taper=taper)
267
268

                    # extract bounding box (x1, y1, x2, y2)
269
270
271
272
                    try:
                        bounding_box = extract_bounding_box(trans_patch)
                    except Exception:
                        print("zero-sized patch ... ")
273
274
275

                # apply patch
                frame = patch_applier(frame_original, trans_patch)
Pavlo Beylin's avatar
Pavlo Beylin committed
276

277
278
279
                # detect object on our frame
                if ctr % 1 == 0 or results is None:
                    results, raw_results = model.forward_pt(frame)
Pavlo Beylin's avatar
Pavlo Beylin committed
280

281
282
283
                if ctr % 1 == 0:
                    # debug_preds()
                    pass
Pavlo Beylin's avatar
Pavlo Beylin committed
284

285
286
287
288
289
290
                # calculate Cosine Similarity Matrix
                imgs, csms = calculate_csms(frame, raw_results)
                for i in range(len(imgs)):
                    show(imgs[i])
                    show(csms[i])

291
                iou, pred = get_best_prediction(bounding_box, raw_results, 15)  # get cat
Pavlo Beylin's avatar
Pavlo Beylin committed
292
                # iou, pred = get_best_prediction(bounding_box, raw_results, 0)  # get personal
293
294
295
296
                # iou, pred = get_best_prediction(bounding_box, raw_results, 12)  # get parking meter
                # iou, pred = get_best_prediction(bounding_box, raw_results, 11)  # get stop sign
                # iou, pred = get_best_prediction(bounding_box, raw_results, 8)  # get boat
                # iou, pred = get_best_prediction(bounding_box, raw_results, 62)  # get tv
297
                # pred = get_best_prediction(bounding_box, raw_results, 42)  # get forked
Pavlo Beylin's avatar
Pavlo Beylin committed
298

299
300
                # pred = get_avg_prediction(raw_results, 15)  # make everything cats
                # pred = get_avg_prediction(raw_results, 0)  # make everything person
Pavlo Beylin's avatar
Pavlo Beylin committed
301

302
303
                if pred is not None:
                    # print("P:{}".format(pred))
Pavlo Beylin's avatar
Pavlo Beylin committed
304

305
306
307
                    # loss
                    loss = -1 * pred  # optimize class
                    # loss = 1 * pred  # adversarial
Pavlo Beylin's avatar
Pavlo Beylin committed
308

309
310
                    # total variation loss component
                    tv_loss = total_variation(patch)
Pavlo Beylin's avatar
Pavlo Beylin committed
311
                    loss += tv_factor * tv_loss
312

313
314
                    # IoU loss component (low iou = high loss)
                    loss += 0.1 * (1 - iou)
315

316
317
                    if not isinstance(loss, torch.Tensor):
                        continue
318

319
320
321
            if loss is None:
                print("loss is None")
                continue
322

323
324
325
            loss.backward(retain_graph=True)
            loss = None
            gradient_sum += patch.grad
326

327
328
329
            # sgn_grads = torch.sign(optimizer.param_groups[0]['params'][0].grad)
            # optimizer.param_groups[0]['params'][0].grad = sgn_grads
            # optimizer.step()
330
            patch.data -= torch.sign(gradient_sum) * 0.001  # * 0 # TODO reactivate
331
332
            patch.data = patch.detach().clone().clamp(MIN_THRESHOLD, 0.99999).data
            gradient_sum = 0
333

Pavlo Beylin's avatar
Pavlo Beylin committed
334
335
            # show us frame with detection
            # cv2.imshow("img", results_np.render()[0])
336
337
338
339
            try:
                cv2.imshow("img", results.render()[0])
            except Exception:
                print("catproblem")
340
341
342

            key = cv2.waitKey(25) & 0xFF
            if key == ord("q"):
Pavlo Beylin's avatar
Pavlo Beylin committed
343
344
                cv2.destroyAllWindows()
                break
345
346
347
348
349
350
            if key == ord("u"):
                move = not move
                print("Move: {}".format(move))
            if key == ord("o"):
                rotate = not rotate
                print("Rotate: {}".format(rotate))
Pavlo Beylin's avatar
Pavlo Beylin committed
351
352
353
354
355
356
357
358
359
            if key == ord("t"):
                resize = not resize
                print("Resize: {}".format(resize))
            if key == ord("z"):
                squeeze = not squeeze
                print("Squeeze: {}".format(squeeze))
            if key == ord("g"):
                gauss = not gauss
                print("Gauss: {}".format(gauss))
360
361
362
363
364
365
366
367
368
            if key == ord("p"):
                taper = not taper
                print("Taper: {}".format(taper))
            if key == ord("h"):
                obfuscate = not obfuscate
                print(f"Obfuscate: {obfuscate}")
            if key == ord("e"):
                stretch = not stretch
                print(f"Obfuscate: {obfuscate}")
369
            if key == ord("+"):
Pavlo Beylin's avatar
Pavlo Beylin committed
370
371
372
373
374
                # transform_interval += 1
                patch_transformer.maxsize += 0.01
                patch_transformer.minsize += 0.01
                # print("Transform Interval: {}".format(transform_interval))
                print(f"Size {patch_transformer.minsize}")
375
            if key == ord("-"):
Pavlo Beylin's avatar
Pavlo Beylin committed
376
377
378
379
380
381
                # transform_interval -= 1
                patch_transformer.maxsize -= 0.01
                patch_transformer.minsize -= 0.01
                print(f"Size {patch_transformer.minsize}")
                # transform_interval = max(transform_interval, 1)
                # print("Transform Interval: {}".format(transform_interval))
382
383
384
385
386
387
388
389
390
391
392
393
394
            if key == ord("9"):
                patch_transformer.maxangle = min(patch_transformer.maxangle + (math.pi * angle_step / 180), math.pi)
                patch_transformer.minangle = max(patch_transformer.minangle - (math.pi * angle_step / 180), -math.pi)
                print("Transformer MaxAngle: {}°".format(patch_transformer.maxangle / math.pi * 180))
            if key == ord("3"):
                patch_transformer.maxangle = max(patch_transformer.maxangle - (math.pi * angle_step / 180), 0)
                patch_transformer.minangle = min(patch_transformer.minangle + (math.pi * angle_step / 180), 0)
                print("Transformer MaxAngle: {}°".format(patch_transformer.maxangle / math.pi * 180))
            if key == ord("s"):
                save_image(patch)
            if key == ord("f"):
                fix_frame = not fix_frame
                print("Fix Frame: {}".format(fix_frame))
Pavlo Beylin's avatar
Pavlo Beylin committed
395
396
397
398
399
400
            if key == ord("a"):
                tv_factor += 1
                print("Total Variation Loss Factor: {}".format(tv_factor))
            if key == ord("y"):
                tv_factor -= 1
                print("Total Variation Loss Factor: {}".format(tv_factor))
Pavlo Beylin's avatar
Pavlo Beylin committed
401
402
403
404
405

        # calculate FPS
        fps += 1
        TIME = time.time() - start_time
        if TIME > display_time:
406
            # print("FPS:", fps / TIME)
Pavlo Beylin's avatar
Pavlo Beylin committed
407
408
            fps = 0
            start_time = time.time()
409
        # time.sleep(0.2)
Pavlo Beylin's avatar
Pavlo Beylin committed
410
411
412

    cap.release()
    cv2.destroyAllWindows()