Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Pavlo Beylin
MaD Patch Yolov5
Commits
a678caa2
Commit
a678caa2
authored
Sep 21, 2021
by
Pavlo Beylin
Browse files
Include IoU into distance and loss calculation.
parent
3e896e33
Changes
1
Hide whitespace changes
Inline
Side-by-side
main.py
View file @
a678caa2
import
datetime
import
numpy
as
np
from
PIL
import
Image
import
torch
import
cv2
import
time
import
math
import
matplotlib
from
torch
import
optim
...
...
@@ -21,7 +24,6 @@ from patch_transformer import PatchTransformer, PatchApplier
model
=
torch
.
hub
.
load
(
'ultralytics/yolov5'
,
'yolov5l'
)
# or yolov5m, yolov5l, yolov5x, cu
# model = torch.hub.load('ultralytics/yolov3', 'yolov3')
MIN_THRESHOLD
=
0.00001
...
...
@@ -45,10 +47,12 @@ PATCH_SIZE = 300
total_variation
=
TotalVariation
()
def
show
(
img
):
plt
.
imshow
(
img
.
detach
().
cpu
())
plt
.
show
()
def
debug_preds
():
detected_classes
=
[
int
(
results
.
pred
[
0
][
i
][
-
1
])
for
i
in
range
(
0
,
len
(
results
.
pred
[
0
]))]
# print(detected_classes)
...
...
@@ -103,18 +107,48 @@ def get_avg_prediction(res, cls_nr):
return
avg_prediction
/
(
ctr
if
ctr
>
0
else
1
)
# source https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/
def
bb_intersection_over_union
(
boxA
,
boxB
):
# determine the (x, y)-coordinates of the intersection rectangle
xA
=
max
(
boxA
[
0
],
boxB
[
0
])
yA
=
max
(
boxA
[
1
],
boxB
[
1
])
xB
=
min
(
boxA
[
2
],
boxB
[
2
])
yB
=
min
(
boxA
[
3
],
boxB
[
3
])
# compute the area of intersection rectangle
interArea
=
max
(
0
,
xB
-
xA
+
1
)
*
max
(
0
,
yB
-
yA
+
1
)
# compute the area of both the prediction and ground-truth
# rectangles
boxAArea
=
(
boxA
[
2
]
-
boxA
[
0
]
+
1
)
*
(
boxA
[
3
]
-
boxA
[
1
]
+
1
)
boxBArea
=
(
boxB
[
2
]
-
boxB
[
0
]
+
1
)
*
(
boxB
[
3
]
-
boxB
[
1
]
+
1
)
# compute the intersection over union by taking the intersection
# area and dividing it by the sum of prediction + ground-truth
# areas - the interesection area
iou
=
interArea
/
float
(
boxAArea
+
boxBArea
-
interArea
)
# return the intersection over union value
return
iou
def
save_image
(
image
):
print
(
"save image called!"
)
im
=
transforms
.
ToPILImage
(
'RGB'
)(
image
)
plt
.
imshow
(
im
)
plt
.
show
()
im
.
save
(
f
"saved_patches/
{
time
.
time
()
}
.jpg"
)
def
get_best_prediction
(
true_box
,
res
,
cls_nr
):
min_distance
=
float
(
"inf"
)
max_iou
=
float
(
0
)
best_prediction
=
None
for
pred
in
res
:
pred_dist
=
torch
.
dist
(
true_box
.
cuda
(),
pred
[:
4
])
# pred_dist = torch.dist(true_box.cuda(), pred[:4])
pred_iou
=
bb_intersection_over_union
(
true_box
,
pred
[:
4
].
float
())
if
pred_
dist
<
min_distance
and
pred
[
5
:].
max
()
>
0.1
:
m
in_distance
=
pred_
dist
if
pred_
iou
>=
max_iou
:
#
and pred[5:].max() > 0.1:
m
ax_iou
=
pred_
iou
best_prediction
=
pred
[
cls_nr
+
5
]
return
best_prediction
print
(
f
"max found iou:
{
max_iou
}
"
)
return
max_iou
,
best_prediction
if
__name__
==
"__main__"
:
...
...
@@ -141,75 +175,97 @@ if __name__ == "__main__":
patch
.
requires_grad
=
True
optimizer
=
optim
.
Adam
([
patch
],
lr
=
0.0001
,
amsgrad
=
True
)
gradient_sum
=
0
img_size_x
=
640
img_size_y
=
480
ctr
=
-
1
pred
=
-
1
move
=
False
rotate
=
False
transform_interval
=
10
move
=
True
rotate
=
True
transform_interval
=
1
angle_step
=
5
frame_read
=
False
fix_frame
=
False
patch_transformer
.
maxangle
=
math
.
pi
patch_transformer
.
minangle
=
-
math
.
pi
loss
=
None
while
True
:
ctr
+=
1
ret
,
frame
=
cap
.
read
()
if
not
(
fix_frame
and
frame_read
):
ret
,
frame
=
cap
.
read
()
with
torch
.
set_grad_enabled
(
True
):
# with torch.autograd.detect_anomaly():
# resize our captured frame if we need
frame
=
cv2
.
resize
(
frame
,
None
,
fx
=
1.0
,
fy
=
1.0
,
interpolation
=
cv2
.
INTER_AREA
)
frame
=
torch
.
tensor
(
frame
,
dtype
=
torch
.
float32
,
requires_grad
=
True
).
cuda
()
if
not
(
fix_frame
and
frame_read
):
# resize our captured frame if we need
frame
=
cv2
.
resize
(
frame
,
None
,
fx
=
1.0
,
fy
=
1.0
,
interpolation
=
cv2
.
INTER_AREA
)
frame_original
=
torch
.
tensor
(
frame
,
dtype
=
torch
.
float32
,
requires_grad
=
True
).
cuda
()
frame
=
frame_original
.
clone
()
frame_read
=
True
results
=
None
for
_
in
range
(
transform_interval
):
ctr
+=
1
# transform patch (every transform_interval of frames)
if
ctr
%
1
==
0
:
trans_patch
=
patch_transformer
(
patch
.
cuda
(),
torch
.
ones
([
1
,
14
,
5
]).
cuda
(),
img_size_x
,
img_size_y
,
do_rotate
=
rotate
,
rand_loc
=
move
)
trans_patch
=
torch
.
transpose
(
trans_patch
[
0
][
0
].
T
,
0
,
1
)
# extract bounding box (x1, y1, x2, y2)
bounding_box
=
extract_bounding_box
(
trans_patch
)
# apply patch
frame
=
patch_applier
(
frame_original
,
trans_patch
)
# transform patch (every couple of frames)
if
ctr
%
transform_interval
==
0
:
# print("{} {}".format(float(patch.min()), float(patch.max())))
trans_patch
=
patch_transformer
(
patch
.
cuda
(),
torch
.
ones
([
1
,
14
,
5
]).
cuda
(),
img_size_x
,
img_size_y
,
do_rotate
=
rotate
,
rand_loc
=
move
)
trans_patch
=
torch
.
transpose
(
trans_patch
[
0
][
0
].
T
,
0
,
1
)
# detect object on our frame
if
ctr
%
1
==
0
or
results
is
None
:
results
,
raw_results
=
model
.
forward_pt
(
frame
)
# extract bounding box (x1, y1, x2, y2)
bounding_box
=
extract_bounding_box
(
trans_patch
)
# print("True BB: {} {} {} {}".format(int(bounding_box[0]), int(bounding_box[1]), int(bounding_box[2]),
# int(bounding_box[3])))
if
ctr
%
1
==
0
:
# debug_preds()
pass
# apply patch
frame
=
patch_applier
(
frame
,
t
ra
ns_patch
)
iou
,
pred
=
get_best_prediction
(
bounding_box
,
raw_results
,
15
)
# get cats
# pred = get_best_prediction(bounding_box
, ra
w_results, 42) # get forked
# detect object on our frame
results
,
raw_results
=
model
.
forward_pt
(
frame
)
# pred = get_avg_prediction(raw_results, 15) # make everything cats
# pred = get_avg_prediction(raw_results, 0) # make everything person
if
ctr
%
1
==
0
:
# debug_preds()
pass
if
pred
is
not
None
:
# print("P:{}".format(pred))
# pred = get_best_prediction(bounding_box, raw_results, 15) # get cats
# pred = get_best_prediction(bounding_box, raw_results, 42) # get forked
# loss
loss
=
-
1
*
pred
# optimize class
# loss = 1 * pred # adversarial
pred
=
get_avg_prediction
(
raw_results
,
15
)
# make everything cats
pred
=
get_avg_prediction
(
raw_results
,
0
)
# make everything person
# total variation loss component
tv_loss
=
total_variation
(
patch
)
loss
+=
tv_loss
if
pred
is
not
None
:
print
(
"P:{}"
.
format
(
pred
)
)
# IoU loss component (low iou = high loss)
loss
+=
0.1
*
(
1
-
iou
)
# loss
# loss = -1 * pred # optimize class
loss
=
1
*
pred
# adversarial
if
not
isinstance
(
loss
,
torch
.
Tensor
):
continue
# Total Variation Loss
tv_loss
=
total_variation
(
patch
)
loss
+=
tv_loss
if
loss
is
None
:
print
(
"loss is None"
)
continue
if
not
isinstance
(
loss
,
torch
.
Tensor
):
continue
loss
.
backward
(
retain_graph
=
True
)
loss
=
None
gradient_sum
+=
patch
.
grad
loss
.
backward
(
retain_graph
=
True
)
# sgn_grads = torch.sign(
optimizer.param_groups[0]['params'][0].grad
)
# optimizer.
param_groups[0]['params'][0].grad = sgn_grads
# optimizer.step()
patch
.
data
-
=
torch
.
sign
(
patch
.
grad
)
*
0.001
patch
.
data
=
patch
.
detach
().
clone
().
clamp
(
MIN_THRESHOLD
,
0.99999
).
data
# sgn_grads = torch.sign(optimizer.param_groups[0]['params'][0].grad
)
#
optimizer.param_groups[0]['params'][0].grad
= sgn_grads
# optimizer.
step()
patch
.
data
-=
torch
.
sign
(
gradient_sum
)
*
0.001
patch
.
data
=
patch
.
detach
().
clone
().
clamp
(
MIN_THRESHOLD
,
0.99999
).
data
gradient_sum
=
0
# show us frame with detection
# cv2.imshow("img", results_np.render()[0])
...
...
@@ -235,6 +291,19 @@ if __name__ == "__main__":
transform_interval
-=
1
transform_interval
=
max
(
transform_interval
,
1
)
print
(
"Transform Interval: {}"
.
format
(
transform_interval
))
if
key
==
ord
(
"9"
):
patch_transformer
.
maxangle
=
min
(
patch_transformer
.
maxangle
+
(
math
.
pi
*
angle_step
/
180
),
math
.
pi
)
patch_transformer
.
minangle
=
max
(
patch_transformer
.
minangle
-
(
math
.
pi
*
angle_step
/
180
),
-
math
.
pi
)
print
(
"Transformer MaxAngle: {}°"
.
format
(
patch_transformer
.
maxangle
/
math
.
pi
*
180
))
if
key
==
ord
(
"3"
):
patch_transformer
.
maxangle
=
max
(
patch_transformer
.
maxangle
-
(
math
.
pi
*
angle_step
/
180
),
0
)
patch_transformer
.
minangle
=
min
(
patch_transformer
.
minangle
+
(
math
.
pi
*
angle_step
/
180
),
0
)
print
(
"Transformer MaxAngle: {}°"
.
format
(
patch_transformer
.
maxangle
/
math
.
pi
*
180
))
if
key
==
ord
(
"s"
):
save_image
(
patch
)
if
key
==
ord
(
"f"
):
fix_frame
=
not
fix_frame
print
(
"Fix Frame: {}"
.
format
(
fix_frame
))
# calculate FPS
fps
+=
1
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment