Skip to content
Snippets Groups Projects
Commit 00ae676a authored by Pavlo Beylin's avatar Pavlo Beylin
Browse files

Add cosine similarity matrix calculation for YOLO predictions for all classes.

parent acf685f0
No related branches found
No related tags found
No related merge requests found
CSM.py 0 → 100644
import torch
import torch.nn.functional as F
def calc_yolo_csms(imgs_and_preds: torch.Tensor,
sign: bool = True,
rescale: bool = True) -> torch.Tensor:
'''
computes the cosine similarity map for given input images X
Parameters
---------
model: torch model
imgs_and_preds: torch tensor; shape: (Batch_Size, Channels, Width, Height)
sign: use sign of gradients to calculate cosine similarity maps
rescale: rescale the logits before applying softmax -> solves gradient obfuscation problem of large logits
Returns
---------
return: cosine_similarity_map:
'''
csms = [] # saliency maps w.r.t. all possible output classes
imgs = []
for tup in imgs_and_preds:
img, pred, frame, x1, y1, x2, y2 = tup
if not img.requires_grad:
img.requires_grad_()
logit = pred[5:]
imgs.append(img)
# rescale network output to avoid gradient obfuscation
if rescale:
logit = logit / torch.max(torch.abs(logit)) * 10
classes = len(logit)
deltas = []
for c in range(classes):
# calculate loss and compute gradient w.r.t. the input of the current class
y = torch.ones(1, device="cuda", dtype=torch.long) * c
loss = F.cross_entropy(logit.unsqueeze(0), y)
frame_grad = torch.autograd.grad(loss, frame, retain_graph=True)[0][:, 5:]
img_grad = frame_grad[int(y1):int(y2), int(x1):int(x2), :]
# take sign of gradient as in the original paper
if sign:
img_grad = torch.sign(img_grad)
deltas.append(img_grad.clone().detach())
deltas = torch.stack(deltas)
# compute cosine similarity matrices
try:
deltas = torch.max(deltas, dim=-3).values # take only the maximum value of all channels to compute the
deltas = deltas.view(classes, 1, -1)
norm = torch.norm(deltas, p=2, dim=2, keepdim=True)
deltas = deltas / norm
deltas = deltas.transpose(0, 1)
csm = torch.matmul(deltas, deltas.transpose(1, 2))
except Exception as e:
print("error")
raise e
# division by zero can lead to NaNs
if torch.isnan(csm).any():
# raise Exception("NaNs in CSM!")
print("NaNs in csm")
else:
print(f'{deltas.mean()}')
csms.append(csm)
return imgs, csms
def calc_csm(model: torch.nn.Module,
X: torch.Tensor,
sign: bool = True,
rescale: bool = True) -> torch.Tensor:
'''
computes the cosine similarity map for given input images X
Parameters
---------
model: torch model
X: torch tensor; shape: (Batch_Size, Channels, Width, Height)
sign: use sign of gradients to calculate cosine similarity maps
rescale: rescale the logits before applying softmax -> solves gradient obfuscation problem of large logits
Returns
---------
return: cosine_similarity_map:
'''
deltas = [] # saliency maps w.r.t. all possible output classes
if not X.requires_grad:
X.requires_grad_()
logits = model(X) # network output
# rescale network output to avoid gradient obfuscation
if rescale:
logits = logits / torch.max(torch.abs(logits), 1, keepdim=True).values * 10
B = logits.shape[0] # batch size
classes = logits.shape[-1] # output classes
for c in range(classes):
# calculate loss and compute gradient w.r.t. the input of the current class
y = torch.ones(B, device="cuda", dtype=torch.long) * c
loss = F.cross_entropy(logits, y)
grad = torch.autograd.grad(loss, X, retain_graph=True)[0]
# take sign of gradient as in the original paper
if sign:
grad = torch.sign(grad)
deltas.append(grad.detach().clone())
model.zero_grad()
deltas = torch.stack(deltas, dim=0)
deltas = torch.max(deltas,
dim=-3).values # take only the maximum value of all channels to compute the cosine similarity
# compute cosine similarity matrices
deltas = deltas.view(classes, B, -1)
norm = torch.norm(deltas, p=2, dim=2, keepdim=True)
deltas = deltas / norm
deltas = deltas.transpose(0, 1)
csm = torch.matmul(deltas, deltas.transpose(1, 2))
# division by zero can lead to NaNs
if torch.isnan(csm).any():
raise Exception("NaNs in CSM!")
return csm
def calc_csm_partial_network(model_first_part: torch.nn.Module,
model_second_part: torch.nn.Module,
X: torch.Tensor,
sign: bool = True,
rescale: bool = True,
scalar_product: bool = False) -> torch.Tensor:
'''
computes the cosine similarity map for given input images X
Parameters
---------
model: torch model
X: torch tensor; shape: (Batch_Size, Channels, Width, Height)
sign: use sign of gradients to calculate cosine similarity maps
rescale: rescale the logits before applying softmax -> solves gradient obfuscation problem of large logits
Returns
---------
return: cosine_similarity_map:
'''
deltas = [] # saliency maps w.r.t. all possible output classes
pre_ultimate_output = model_first_part(X)
pre_ultimate_output.requires_grad_()
logits = model_second_part(pre_ultimate_output) # network output
# rescale network output to avoid gradient obfuscation
if rescale:
logits = logits / torch.max(torch.abs(logits), 1, keepdim=True).values * 10
B = logits.shape[0] # batch size
classes = logits.shape[-1] # output classes
for c in range(classes):
# calculate loss and compute gradient w.r.t. the input of the current class
y = torch.ones(B, device="cuda", dtype=torch.long) * c
loss = F.cross_entropy(logits, y)
grad = torch.autograd.grad(loss, pre_ultimate_output, retain_graph=True)[0]
# take sign of gradient as in the original paper
if sign:
grad = torch.sign(grad)
deltas.append(grad.detach().clone())
deltas = torch.stack(deltas, dim=0)
# compute cosine similarity matrices
deltas = deltas.view(classes, B, -1)
norm = torch.norm(deltas, p=2, dim=2, keepdim=True)
if not scalar_product:
deltas = deltas / norm
deltas = deltas.transpose(0, 1)
csm = torch.matmul(deltas, deltas.transpose(1, 2))
# division by zero can lead to NaNs
if torch.isnan(csm).any():
raise Exception("NaNs in CSM!")
return csm
......@@ -9,6 +9,7 @@ import math
import matplotlib
from torch import optim
import CSM
import models
from models.common import Detections
from utils.external import TotalVariation
......@@ -129,6 +130,7 @@ def bb_intersection_over_union(boxA, boxB):
# return the intersection over union value
return iou
def save_image(image):
print("save image called!")
im = transforms.ToPILImage('RGB')(image)
......@@ -136,6 +138,7 @@ def save_image(image):
plt.show()
im.save(f"saved_patches/{time.time()}.jpg")
def get_best_prediction(true_box, res, cls_nr):
min_distance = float("inf")
max_iou = float(0)
......@@ -149,10 +152,32 @@ def get_best_prediction(true_box, res, cls_nr):
max_iou = pred_iou
best_prediction = pred[cls_nr + 5]
print(f"max found iou: {max_iou}")
# print(f"max found iou: {max_iou}")
return max_iou, best_prediction
def calculate_csms(frame, predictions):
imgs_and_preds = []
for pred in predictions:
x1, y1, x2, y2, conf = pred[:5].float()
pred_img_section = frame.flip(2)[int(y1):int(y2), int(x1):int(x2), :]
tup = (pred_img_section, pred, frame, x1, y1, x2, y2)
# print(tup)
imgs_and_preds.append(tup)
# if conf > 0.8:
# cls = classes[int(pred[5:].argmax())]
# print(f"{cls}: {conf} - {pred[:5].float()}")
# show(frame.flip(2)[int(y1):int(y2), int(x1):int(x2), :] / 255.)
# print("done")
imgs, csms = CSM.calc_yolo_csms(imgs_and_preds)
if __name__ == "__main__":
# init
patch_transformer = PatchTransformer().cuda()
......@@ -257,6 +282,12 @@ if __name__ == "__main__":
# debug_preds()
pass
# calculate Cosine Similarity Matrix
imgs, csms = calculate_csms(frame, raw_results)
for i in range(len(imgs)):
show(imgs[i])
show(csms[i])
iou, pred = get_best_prediction(bounding_box, raw_results, 15) # get cat
# iou, pred = get_best_prediction(bounding_box, raw_results, 0) # get personal
# iou, pred = get_best_prediction(bounding_box, raw_results, 12) # get parking meter
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment