main.py 13 KB
Newer Older
1
2
import datetime

Pavlo Beylin's avatar
Pavlo Beylin committed
3
import numpy as np
Pavlo Beylin's avatar
Pavlo Beylin committed
4
from PIL import Image
Pavlo Beylin's avatar
Pavlo Beylin committed
5
6
7
import torch
import cv2
import time
8
import math
Pavlo Beylin's avatar
Pavlo Beylin committed
9
import matplotlib
10
from torch import optim
Pavlo Beylin's avatar
Pavlo Beylin committed
11

Pavlo Beylin's avatar
Pavlo Beylin committed
12
13
import models
from models.common import Detections
14
from utils.external import TotalVariation
Pavlo Beylin's avatar
Pavlo Beylin committed
15
16
from utils.general import scale_coords

Pavlo Beylin's avatar
Pavlo Beylin committed
17
matplotlib.use('TkAgg')
Pavlo Beylin's avatar
Pavlo Beylin committed
18
19
20
import matplotlib.pyplot as plt

# Model
Pavlo Beylin's avatar
Pavlo Beylin committed
21
22
23
24
from torchvision.transforms import transforms

from patch_transformer import PatchTransformer, PatchApplier

Pavlo Beylin's avatar
Pavlo Beylin committed
25
model = torch.hub.load('ultralytics/yolov5', 'yolov5l')  # or yolov5m, yolov5l, yolov5x, cu
Pavlo Beylin's avatar
Pavlo Beylin committed
26

Pavlo Beylin's avatar
Pavlo Beylin committed
27
28
# model = torch.hub.load('ultralytics/yolov3', 'yolov3')

29
MIN_THRESHOLD = 0.00001
Pavlo Beylin's avatar
Pavlo Beylin committed
30

Pavlo Beylin's avatar
Pavlo Beylin committed
31
32
33
34
35
36
37
38
39
40
41
42
43
44
classes = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus",
           "train", "truck", "boat", "traffic light", "fire hydrant",
           "stop sign", "parking meter", "bench", "bird", "cat", "dog",
           "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
           "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
           "skis", "snowboard", "sports ball", "kite", "baseball bat",
           "baseball glove", "skateboard", "surfboard", "tennis racket",
           "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
           "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
           "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant",
           "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote",
           "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
           "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]

45
PATH = "saved_patches/fatcat.jpg"
Pavlo Beylin's avatar
Pavlo Beylin committed
46
PATCH_SIZE = 300
Pavlo Beylin's avatar
Pavlo Beylin committed
47

48
49
total_variation = TotalVariation()

50

51
52
53
def show(img):
    plt.imshow(img.detach().cpu())
    plt.show()
Pavlo Beylin's avatar
Pavlo Beylin committed
54

55

Pavlo Beylin's avatar
Pavlo Beylin committed
56
57
def debug_preds():
    detected_classes = [int(results.pred[0][i][-1]) for i in range(0, len(results.pred[0]))]
Pavlo Beylin's avatar
Pavlo Beylin committed
58
    # print(detected_classes)
Pavlo Beylin's avatar
Pavlo Beylin committed
59
    for det in results.pred[0]:
Pavlo Beylin's avatar
Pavlo Beylin committed
60
61
62
63
64
65
        if int(det[-1]) == 15:  # cat
            print("Pred BB: ", end="")
            # print("x1:y1 : {}:{}".format(float(det[0]), float(det[1])))
            # print("x2:y2 : {}:{}".format(float(det[2]), float(det[3])))
            print("{} {} {} {} ({}):".format(
                int(det[0]), int(det[1]), int(det[2]), int(det[3]), float(det[-2])))
Pavlo Beylin's avatar
Pavlo Beylin committed
66
67


Pavlo Beylin's avatar
Pavlo Beylin committed
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# from https://github.com/wangzh0ng/adversarial_yolo2
def read_image(path):
    """
    Read an input image to be used as a patch

    :param path: Path to the image to be read.
    :return: Returns the transformed patch as a pytorch Tensor.
    """
    patch_img = Image.open(path).convert('RGB')
    tf = transforms.Resize((PATCH_SIZE, PATCH_SIZE))
    patch_img = tf(patch_img)
    tf = transforms.ToTensor()

    return tf(patch_img)


Pavlo Beylin's avatar
Pavlo Beylin committed
84
def extract_bounding_box(patch):
85
    mask = torch.where(patch < MIN_THRESHOLD, torch.zeros(patch.shape).cuda(), torch.ones(patch.shape).cuda()).sum(2)
Pavlo Beylin's avatar
Pavlo Beylin committed
86

Pavlo Beylin's avatar
Pavlo Beylin committed
87
88
89
90
    bb_x1 = torch.nonzero(mask.sum(0))[0]
    bb_y1 = torch.nonzero(mask.sum(1))[0]
    bb_x2 = torch.nonzero(mask.sum(0))[-1]
    bb_y2 = torch.nonzero(mask.sum(1))[-1]
Pavlo Beylin's avatar
Pavlo Beylin committed
91

Pavlo Beylin's avatar
Pavlo Beylin committed
92
    return torch.stack([bb_x1, bb_y1, bb_x2, bb_y2]).sum(1)
Pavlo Beylin's avatar
Pavlo Beylin committed
93
94


95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
def get_avg_prediction(res, cls_nr):
    avg_prediction = 0

    ctr = 0
    if res is None:
        return 0

    for pred in res:
        if pred[5:].max() > 0.4 or True:
            ctr += 1
            avg_prediction += pred[cls_nr + 5]

    return avg_prediction / (ctr if ctr > 0 else 1)


110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# source https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/
def bb_intersection_over_union(boxA, boxB):
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    # compute the area of intersection rectangle
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)
    # return the intersection over union value
    return iou

def save_image(image):
    print("save image called!")
    im = transforms.ToPILImage('RGB')(image)
    plt.imshow(im)
    plt.show()
    im.save(f"saved_patches/{time.time()}.jpg")

Pavlo Beylin's avatar
Pavlo Beylin committed
137
138
def get_best_prediction(true_box, res, cls_nr):
    min_distance = float("inf")
139
    max_iou = float(0)
Pavlo Beylin's avatar
Pavlo Beylin committed
140
141
    best_prediction = None

142
    for pred in res:
143
144
        # pred_dist = torch.dist(true_box.cuda(), pred[:4])
        pred_iou = bb_intersection_over_union(true_box, pred[:4].float())
145

146
147
        if pred_iou >= max_iou:  # and pred[5:].max() > 0.1:
            max_iou = pred_iou
148
            best_prediction = pred[cls_nr + 5]
Pavlo Beylin's avatar
Pavlo Beylin committed
149

150
151
    print(f"max found iou: {max_iou}")
    return max_iou, best_prediction
Pavlo Beylin's avatar
Pavlo Beylin committed
152
153
154
155
156
157
158


if __name__ == "__main__":
    # init
    patch_transformer = PatchTransformer().cuda()
    patch_applier = PatchApplier().cuda()

Pavlo Beylin's avatar
Pavlo Beylin committed
159
160
161
162
163
164
165
166
167
168
169
170
171
172
    # set start time to current time
    start_time = time.time()

    # displays the frame rate every 2 second
    display_time = 2

    # Set primary FPS to 0
    fps = 0

    # we create the video capture object cap
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        raise IOError("We cannot open webcam")

Pavlo Beylin's avatar
Pavlo Beylin committed
173
    patch = read_image(PATH)
174
    # patch = torch.rand_like(patch)
Pavlo Beylin's avatar
Pavlo Beylin committed
175
    patch.requires_grad = True
Pavlo Beylin's avatar
Pavlo Beylin committed
176

177
    optimizer = optim.Adam([patch], lr=0.0001, amsgrad=True)
178
    gradient_sum = 0
179

180
181
    # img_size_x = 640
    img_size_x = 480
Pavlo Beylin's avatar
Pavlo Beylin committed
182
183
    img_size_y = 480

184
185
186
187
188
189
190
191
192
193
    # Launch Settings
    # move = True
    # rotate = True
    # taper = True
    # resize = True
    # squeeze = True
    # gauss = True
    # obfuscate = True
    # stretch = True

Pavlo Beylin's avatar
Pavlo Beylin committed
194
195
    move = False
    rotate = False
196
    taper = False
Pavlo Beylin's avatar
Pavlo Beylin committed
197
198
    resize = False
    squeeze = False
199
200
201
    gauss = False
    obfuscate = False
    stretch = False
202
203
    transform_interval = 1
    angle_step = 5
Pavlo Beylin's avatar
Pavlo Beylin committed
204
    tv_factor = 1
205
206
207

    ctr = -1
    pred = -1
208
209
    frame_read = False
    fix_frame = False
Pavlo Beylin's avatar
Pavlo Beylin committed
210
211
    patch_transformer.maxangle = 5/180 * math.pi
    patch_transformer.minangle = - 5/180 * math.pi
212
    loss = None
Pavlo Beylin's avatar
Pavlo Beylin committed
213
    while True:
214
215
        if not (fix_frame and frame_read):
            ret, frame = cap.read()
Pavlo Beylin's avatar
Pavlo Beylin committed
216

217
218
219
            # cut image
            frame = frame[:, :img_size_x, :]

Pavlo Beylin's avatar
Pavlo Beylin committed
220
        with torch.set_grad_enabled(True):
221
            # with torch.autograd.detect_anomaly():
222

223
224
225
226
227
228
229
230
231
232
233
234
235
236
            if not (fix_frame and frame_read):
                # resize our captured frame if we need
                frame = cv2.resize(frame, None, fx=1.0, fy=1.0, interpolation=cv2.INTER_AREA)
                frame_original = torch.tensor(frame, dtype=torch.float32, requires_grad=True).cuda()
                frame = frame_original.clone()
                frame_read = True

            results = None
            for _ in range(transform_interval):
                ctr += 1

                # transform patch (every transform_interval of frames)
                if ctr % 1 == 0:
                    trans_patch = patch_transformer(patch.cuda(), torch.ones([1, 14, 5]).cuda(), img_size_x, img_size_y,
Pavlo Beylin's avatar
Pavlo Beylin committed
237
                                                    do_rotate=rotate, rand_loc=move, rand_size=resize,
238
239
                                                    rand_squeeze=squeeze, gauss=gauss, obfuscate=obfuscate,
                                                    stretch=stretch, do_taper=taper)
240
241

                    # extract bounding box (x1, y1, x2, y2)
242
243
244
245
                    try:
                        bounding_box = extract_bounding_box(trans_patch)
                    except Exception:
                        print("zero-sized patch ... ")
246
247
248

                # apply patch
                frame = patch_applier(frame_original, trans_patch)
Pavlo Beylin's avatar
Pavlo Beylin committed
249

250
251
252
                # detect object on our frame
                if ctr % 1 == 0 or results is None:
                    results, raw_results = model.forward_pt(frame)
Pavlo Beylin's avatar
Pavlo Beylin committed
253

254
255
256
                if ctr % 1 == 0:
                    # debug_preds()
                    pass
Pavlo Beylin's avatar
Pavlo Beylin committed
257

258
259
260
261
262
                iou, pred = get_best_prediction(bounding_box, raw_results, 15)  # get cat
                # iou, pred = get_best_prediction(bounding_box, raw_results, 12)  # get parking meter
                # iou, pred = get_best_prediction(bounding_box, raw_results, 11)  # get stop sign
                # iou, pred = get_best_prediction(bounding_box, raw_results, 8)  # get boat
                # iou, pred = get_best_prediction(bounding_box, raw_results, 62)  # get tv
263
                # pred = get_best_prediction(bounding_box, raw_results, 42)  # get forked
Pavlo Beylin's avatar
Pavlo Beylin committed
264

265
266
                # pred = get_avg_prediction(raw_results, 15)  # make everything cats
                # pred = get_avg_prediction(raw_results, 0)  # make everything person
Pavlo Beylin's avatar
Pavlo Beylin committed
267

268
269
                if pred is not None:
                    # print("P:{}".format(pred))
Pavlo Beylin's avatar
Pavlo Beylin committed
270

271
272
273
                    # loss
                    loss = -1 * pred  # optimize class
                    # loss = 1 * pred  # adversarial
Pavlo Beylin's avatar
Pavlo Beylin committed
274

275
276
                    # total variation loss component
                    tv_loss = total_variation(patch)
Pavlo Beylin's avatar
Pavlo Beylin committed
277
                    loss += tv_factor * tv_loss
278

279
280
                    # IoU loss component (low iou = high loss)
                    loss += 0.1 * (1 - iou)
281

282
283
                    if not isinstance(loss, torch.Tensor):
                        continue
284

285
286
287
            if loss is None:
                print("loss is None")
                continue
288

289
290
291
            loss.backward(retain_graph=True)
            loss = None
            gradient_sum += patch.grad
292

293
294
295
296
297
298
            # sgn_grads = torch.sign(optimizer.param_groups[0]['params'][0].grad)
            # optimizer.param_groups[0]['params'][0].grad = sgn_grads
            # optimizer.step()
            patch.data -= torch.sign(gradient_sum) * 0.001
            patch.data = patch.detach().clone().clamp(MIN_THRESHOLD, 0.99999).data
            gradient_sum = 0
299

Pavlo Beylin's avatar
Pavlo Beylin committed
300
301
            # show us frame with detection
            # cv2.imshow("img", results_np.render()[0])
302
303
304
305
            try:
                cv2.imshow("img", results.render()[0])
            except Exception:
                print("catproblem")
306
307
308

            key = cv2.waitKey(25) & 0xFF
            if key == ord("q"):
Pavlo Beylin's avatar
Pavlo Beylin committed
309
310
                cv2.destroyAllWindows()
                break
311
312
313
314
315
316
            if key == ord("u"):
                move = not move
                print("Move: {}".format(move))
            if key == ord("o"):
                rotate = not rotate
                print("Rotate: {}".format(rotate))
Pavlo Beylin's avatar
Pavlo Beylin committed
317
318
319
320
321
322
323
324
325
            if key == ord("t"):
                resize = not resize
                print("Resize: {}".format(resize))
            if key == ord("z"):
                squeeze = not squeeze
                print("Squeeze: {}".format(squeeze))
            if key == ord("g"):
                gauss = not gauss
                print("Gauss: {}".format(gauss))
326
327
328
329
330
331
332
333
334
            if key == ord("p"):
                taper = not taper
                print("Taper: {}".format(taper))
            if key == ord("h"):
                obfuscate = not obfuscate
                print(f"Obfuscate: {obfuscate}")
            if key == ord("e"):
                stretch = not stretch
                print(f"Obfuscate: {obfuscate}")
335
336
337
338
339
340
341
            if key == ord("+"):
                transform_interval += 1
                print("Transform Interval: {}".format(transform_interval))
            if key == ord("-"):
                transform_interval -= 1
                transform_interval = max(transform_interval, 1)
                print("Transform Interval: {}".format(transform_interval))
342
343
344
345
346
347
348
349
350
351
352
353
354
            if key == ord("9"):
                patch_transformer.maxangle = min(patch_transformer.maxangle + (math.pi * angle_step / 180), math.pi)
                patch_transformer.minangle = max(patch_transformer.minangle - (math.pi * angle_step / 180), -math.pi)
                print("Transformer MaxAngle: {}°".format(patch_transformer.maxangle / math.pi * 180))
            if key == ord("3"):
                patch_transformer.maxangle = max(patch_transformer.maxangle - (math.pi * angle_step / 180), 0)
                patch_transformer.minangle = min(patch_transformer.minangle + (math.pi * angle_step / 180), 0)
                print("Transformer MaxAngle: {}°".format(patch_transformer.maxangle / math.pi * 180))
            if key == ord("s"):
                save_image(patch)
            if key == ord("f"):
                fix_frame = not fix_frame
                print("Fix Frame: {}".format(fix_frame))
Pavlo Beylin's avatar
Pavlo Beylin committed
355
356
357
358
359
360
            if key == ord("a"):
                tv_factor += 1
                print("Total Variation Loss Factor: {}".format(tv_factor))
            if key == ord("y"):
                tv_factor -= 1
                print("Total Variation Loss Factor: {}".format(tv_factor))
Pavlo Beylin's avatar
Pavlo Beylin committed
361
362
363
364
365

        # calculate FPS
        fps += 1
        TIME = time.time() - start_time
        if TIME > display_time:
366
            # print("FPS:", fps / TIME)
Pavlo Beylin's avatar
Pavlo Beylin committed
367
368
            fps = 0
            start_time = time.time()
369
        # time.sleep(0.2)
Pavlo Beylin's avatar
Pavlo Beylin committed
370
371
372

    cap.release()
    cv2.destroyAllWindows()