common.py 21.6 KB
Newer Older
1
2
3
4
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
"""
Common modules
"""
Glenn Jocher's avatar
Glenn Jocher committed
5

Glenn Jocher's avatar
Glenn Jocher committed
6
import logging
7
import math
8
import warnings
9
from copy import copy
Glenn Jocher's avatar
Glenn Jocher committed
10
from pathlib import Path
11

Glenn Jocher's avatar
Glenn Jocher committed
12
import numpy as np
13
import pandas as pd
14
import requests
Jirka Borovec's avatar
Jirka Borovec committed
15
16
import torch
import torch.nn as nn
17
from PIL import Image
Glenn Jocher's avatar
Glenn Jocher committed
18
from torch.cuda import amp
Glenn Jocher's avatar
Glenn Jocher committed
19

20
from utils.datasets import exif_transpose, letterbox
21
from utils.general import non_max_suppression, make_divisible, scale_coords, increment_path, xyxy2xywh, save_one_box
Glenn Jocher's avatar
Glenn Jocher committed
22
from utils.plots import colors, plot_one_box
Glenn Jocher's avatar
Glenn Jocher committed
23
24
25
from utils.torch_utils import time_sync

LOGGER = logging.getLogger(__name__)
Glenn Jocher's avatar
Glenn Jocher committed
26
27


Glenn Jocher's avatar
Glenn Jocher committed
28
def autopad(k, p=None):  # kernel, padding
Glenn Jocher's avatar
Glenn Jocher committed
29
    # Pad to 'same'
Glenn Jocher's avatar
Glenn Jocher committed
30
31
32
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
    return p
Glenn Jocher's avatar
Glenn Jocher committed
33
34


Glenn Jocher's avatar
Glenn Jocher committed
35
36
class Conv(nn.Module):
    # Standard convolution
Glenn Jocher's avatar
Glenn Jocher committed
37
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
38
        super().__init__()
Glenn Jocher's avatar
Glenn Jocher committed
39
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
Glenn Jocher's avatar
Glenn Jocher committed
40
        self.bn = nn.BatchNorm2d(c2)
Glenn Jocher's avatar
Glenn Jocher committed
41
        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
Glenn Jocher's avatar
Glenn Jocher committed
42
43
44
45

    def forward(self, x):
        return self.act(self.bn(self.conv(x)))

Glenn Jocher's avatar
Glenn Jocher committed
46
    def forward_fuse(self, x):
Glenn Jocher's avatar
Glenn Jocher committed
47
48
        return self.act(self.conv(x))

Glenn Jocher's avatar
Glenn Jocher committed
49

50
class DWConv(Conv):
Glenn Jocher's avatar
Glenn Jocher committed
51
52
    # Depth-wise convolution class
    def __init__(self, c1, c2, k=1, s=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
53
        super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
Glenn Jocher's avatar
Glenn Jocher committed
54
55


56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
class TransformerLayer(nn.Module):
    # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
    def __init__(self, c, num_heads):
        super().__init__()
        self.q = nn.Linear(c, c, bias=False)
        self.k = nn.Linear(c, c, bias=False)
        self.v = nn.Linear(c, c, bias=False)
        self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
        self.fc1 = nn.Linear(c, c, bias=False)
        self.fc2 = nn.Linear(c, c, bias=False)

    def forward(self, x):
        x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
        x = self.fc2(self.fc1(x)) + x
        return x


class TransformerBlock(nn.Module):
    # Vision Transformer https://arxiv.org/abs/2010.11929
    def __init__(self, c1, c2, num_heads, num_layers):
        super().__init__()
        self.conv = None
        if c1 != c2:
            self.conv = Conv(c1, c2)
        self.linear = nn.Linear(c2, c2)  # learnable position embedding
        self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
        self.c2 = c2

    def forward(self, x):
        if self.conv is not None:
            x = self.conv(x)
        b, _, w, h = x.shape
88
89
        p = x.flatten(2).unsqueeze(0).transpose(0, 3).squeeze(3)
        return self.tr(p + self.linear(p)).unsqueeze(3).transpose(0, 3).reshape(b, self.c2, w, h)
90
91


Glenn Jocher's avatar
Glenn Jocher committed
92
class Bottleneck(nn.Module):
Glenn Jocher's avatar
Glenn Jocher committed
93
    # Standard bottleneck
Glenn Jocher's avatar
Glenn Jocher committed
94
    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
95
        super().__init__()
Glenn Jocher's avatar
Glenn Jocher committed
96
97
98
99
100
101
102
103
104
105
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c_, c2, 3, 1, g=g)
        self.add = shortcut and c1 == c2

    def forward(self, x):
        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))


class BottleneckCSP(nn.Module):
Glenn Jocher's avatar
Glenn Jocher committed
106
    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
Glenn Jocher's avatar
Glenn Jocher committed
107
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
108
        super().__init__()
Glenn Jocher's avatar
Glenn Jocher committed
109
110
111
112
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
        self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
Glenn Jocher's avatar
Glenn Jocher committed
113
        self.cv4 = Conv(2 * c_, c2, 1, 1)
Glenn Jocher's avatar
Glenn Jocher committed
114
115
116
117
118
119
120
121
122
123
        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
        self.act = nn.LeakyReLU(0.1, inplace=True)
        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])

    def forward(self, x):
        y1 = self.cv3(self.m(self.cv1(x)))
        y2 = self.cv2(x)
        return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))


Glenn Jocher's avatar
Glenn Jocher committed
124
125
126
class C3(nn.Module):
    # CSP Bottleneck with 3 convolutions
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
127
        super().__init__()
Glenn Jocher's avatar
Glenn Jocher committed
128
129
130
131
132
133
134
135
136
137
138
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c1, c_, 1, 1)
        self.cv3 = Conv(2 * c_, c2, 1)  # act=FReLU(c2)
        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
        # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])

    def forward(self, x):
        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))


139
140
141
142
143
144
145
146
class C3TR(C3):
    # C3 module with TransformerBlock()
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        super().__init__(c1, c2, n, shortcut, g, e)
        c_ = int(c2 * e)
        self.m = TransformerBlock(c_, c_, 4, n)


147
148
149
150
151
152
153
154
class C3SPP(C3):
    # C3 module with SPP()
    def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
        super().__init__(c1, c2, n, shortcut, g, e)
        c_ = int(c2 * e)
        self.m = SPP(c_, c_, k)


155
156
157
158
159
160
161
162
class C3Ghost(C3):
    # C3 module with GhostBottleneck()
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
        super().__init__(c1, c2, n, shortcut, g, e)
        c_ = int(c2 * e)  # hidden channels
        self.m = nn.Sequential(*[GhostBottleneck(c_, c_) for _ in range(n)])


Glenn Jocher's avatar
Glenn Jocher committed
163
class SPP(nn.Module):
Glenn Jocher's avatar
Glenn Jocher committed
164
    # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
Glenn Jocher's avatar
Glenn Jocher committed
165
    def __init__(self, c1, c2, k=(5, 9, 13)):
166
        super().__init__()
Glenn Jocher's avatar
Glenn Jocher committed
167
168
169
170
171
172
173
        c_ = c1 // 2  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])

    def forward(self, x):
        x = self.cv1(x)
174
175
176
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
            return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
Glenn Jocher's avatar
Glenn Jocher committed
177
178


Glenn Jocher's avatar
Glenn Jocher committed
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
class SPPF(nn.Module):
    # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
    def __init__(self, c1, c2, k=5):  # equivalent to SPP(k=(5, 9, 13))
        super().__init__()
        c_ = c1 // 2  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c_ * 4, c2, 1, 1)
        self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)

    def forward(self, x):
        x = self.cv1(x)
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')  # suppress torch 1.9.0 max_pool2d() warning
            y1 = self.m(x)
            y2 = self.m(y1)
            return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))


Glenn Jocher's avatar
Glenn Jocher committed
197
198
class Focus(nn.Module):
    # Focus wh information into c-space
Glenn Jocher's avatar
Glenn Jocher committed
199
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
200
        super().__init__()
Glenn Jocher's avatar
Glenn Jocher committed
201
        self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
Glenn Jocher's avatar
Glenn Jocher committed
202
        # self.contract = Contract(gain=2)
Glenn Jocher's avatar
Glenn Jocher committed
203
204
205

    def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
        return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
Glenn Jocher's avatar
Glenn Jocher committed
206
207
208
        # return self.conv(self.contract(x))


209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
class GhostConv(nn.Module):
    # Ghost Convolution https://github.com/huawei-noah/ghostnet
    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
        super().__init__()
        c_ = c2 // 2  # hidden channels
        self.cv1 = Conv(c1, c_, k, s, None, g, act)
        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)

    def forward(self, x):
        y = self.cv1(x)
        return torch.cat([y, self.cv2(y)], 1)


class GhostBottleneck(nn.Module):
    # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
    def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
        super().__init__()
        c_ = c2 // 2
        self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pw
                                  DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
                                  GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
        self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
                                      Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()

    def forward(self, x):
        return self.conv(x) + self.shortcut(x)


Glenn Jocher's avatar
Glenn Jocher committed
237
238
239
240
241
242
243
class Contract(nn.Module):
    # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
    def __init__(self, gain=2):
        super().__init__()
        self.gain = gain

    def forward(self, x):
Glenn Jocher's avatar
Glenn Jocher committed
244
        b, c, h, w = x.size()  # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
Glenn Jocher's avatar
Glenn Jocher committed
245
        s = self.gain
Glenn Jocher's avatar
Glenn Jocher committed
246
        x = x.view(b, c, h // s, s, w // s, s)  # x(1,64,40,2,40,2)
Glenn Jocher's avatar
Glenn Jocher committed
247
        x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # x(1,2,2,64,40,40)
Glenn Jocher's avatar
Glenn Jocher committed
248
        return x.view(b, c * s * s, h // s, w // s)  # x(1,256,40,40)
Glenn Jocher's avatar
Glenn Jocher committed
249
250
251
252
253
254
255
256
257


class Expand(nn.Module):
    # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
    def __init__(self, gain=2):
        super().__init__()
        self.gain = gain

    def forward(self, x):
Glenn Jocher's avatar
Glenn Jocher committed
258
        b, c, h, w = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain'
Glenn Jocher's avatar
Glenn Jocher committed
259
        s = self.gain
Glenn Jocher's avatar
Glenn Jocher committed
260
        x = x.view(b, s, s, c // s ** 2, h, w)  # x(1,2,2,16,80,80)
Glenn Jocher's avatar
Glenn Jocher committed
261
        x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # x(1,16,80,2,80,2)
Glenn Jocher's avatar
Glenn Jocher committed
262
        return x.view(b, c // s ** 2, h * s, w * s)  # x(1,16,160,160)
Glenn Jocher's avatar
Glenn Jocher committed
263
264
265
266
267


class Concat(nn.Module):
    # Concatenate a list of tensors along dimension
    def __init__(self, dimension=1):
268
        super().__init__()
Glenn Jocher's avatar
Glenn Jocher committed
269
270
271
272
        self.d = dimension

    def forward(self, x):
        return torch.cat(x, self.d)
Glenn Jocher's avatar
Glenn Jocher committed
273
274


275
class AutoShape(nn.Module):
276
    # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
Glenn Jocher's avatar
Glenn Jocher committed
277
278
279
    conf = 0.25  # NMS confidence threshold
    iou = 0.45  # NMS IoU threshold
    classes = None  # (optional list) filter by class
280
    max_det = 1000  # maximum number of detections per image
Glenn Jocher's avatar
Glenn Jocher committed
281
282

    def __init__(self, model):
283
        super().__init__()
284
        self.model = model.eval()
Glenn Jocher's avatar
Glenn Jocher committed
285

286
    def autoshape(self):
Glenn Jocher's avatar
Glenn Jocher committed
287
        LOGGER.info('AutoShape already enabled, skipping... ')  # model already converted to model.autoshape()
288
289
        return self

Pavlo Beylin's avatar
Pavlo Beylin committed
290
    @torch.enable_grad()
Pavlo Beylin's avatar
Pavlo Beylin committed
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
    def forward_pt(self, img, size=640, augment=False, profile=False):
        # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
        #   file:       imgs = 'data/images/zidane.jpg'  # str or PosixPath
        #   URI:             = 'https://ultralytics.com/images/zidane.jpg'
        #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)
        #   PIL:             = Image.open('image.jpg') or ImageGrab.grab()  # HWC x(640,1280,3)
        #   numpy:           = np.zeros((640,1280,3))  # HWC
        #   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)
        #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images

        t = [time_sync()]
        p = next(self.model.parameters())  # for device and type

        # Pre-process
        # n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs])  # number of images, list of images
        # shape0, shape1, files = [], [], []  # image and inference shapes, filenames
        # for i, im in enumerate(imgs):
        #     f = f'image{i}'  # filename
        #     if im.shape[0] < 5:  # image in CHW
        #         im = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
        #     im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3)  # enforce 3ch input
        #     s = im.shape[:2]  # HWC
        #     shape0.append(s)  # image shape
        #     g = (size / max(s))  # gain
        #     shape1.append([y * g for y in s])
        #     imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im)  # update
        # shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)]  # inference shape
        # x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs]  # pad
        # x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
        # x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
        x = img.type_as(p) / 255.  # uint8 to fp16/32
        x = torch.transpose(x, 0, 1).unsqueeze(-1).T
        t.append(time_sync())

        with amp.autocast(enabled=p.device.type != 'cpu'):
            # Inference
            y = self.model(x, augment, profile)[0]  # forward
            t.append(time_sync())

            # Post-process
331
            y_sup, y_raw, logits = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes, max_det=self.max_det)  # NMS
Pavlo Beylin's avatar
Pavlo Beylin committed
332
333
334
335
336
            # for i in range(n):
            #     scale_coords(shape1, y[i][:, :4], shape0[i])

            t.append(time_sync())
            # return Detections(imgs, y, files, t, self.names, x.shape)
337
338
339
            detections = Detections([img], y_sup, None, t, self.names, x.shape)
            detections.logits = logits
            return detections, y_raw
Pavlo Beylin's avatar
Pavlo Beylin committed
340

341
    @torch.no_grad()
342
    def forward(self, imgs, size=640, augment=False, profile=False):
343
        # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
344
        #   file:       imgs = 'data/images/zidane.jpg'  # str or PosixPath
345
        #   URI:             = 'https://ultralytics.com/images/zidane.jpg'
346
        #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)
347
        #   PIL:             = Image.open('image.jpg') or ImageGrab.grab()  # HWC x(640,1280,3)
348
349
        #   numpy:           = np.zeros((640,1280,3))  # HWC
        #   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)
350
        #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images
Glenn Jocher's avatar
Glenn Jocher committed
351

Glenn Jocher's avatar
Glenn Jocher committed
352
        t = [time_sync()]
Glenn Jocher's avatar
Glenn Jocher committed
353
        p = next(self.model.parameters())  # for device and type
354
        if isinstance(imgs, torch.Tensor):  # torch
Glenn Jocher's avatar
Glenn Jocher committed
355
356
            with amp.autocast(enabled=p.device.type != 'cpu'):
                return self.model(imgs.to(p.device).type_as(p), augment, profile)  # inference
Glenn Jocher's avatar
Glenn Jocher committed
357
358

        # Pre-process
359
        n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs])  # number of images, list of images
360
        shape0, shape1, files = [], [], []  # image and inference shapes, filenames
361
        for i, im in enumerate(imgs):
362
            f = f'image{i}'  # filename
Glenn Jocher's avatar
Glenn Jocher committed
363
            if isinstance(im, (str, Path)):  # filename or uri
364
                im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
365
                im = np.asarray(exif_transpose(im))
366
            elif isinstance(im, Image.Image):  # PIL Image
367
                im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
368
            files.append(Path(f).with_suffix('.jpg').name)
369
370
            if im.shape[0] < 5:  # image in CHW
                im = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
Glenn Jocher's avatar
Glenn Jocher committed
371
            im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3)  # enforce 3ch input
372
            s = im.shape[:2]  # HWC
Glenn Jocher's avatar
Glenn Jocher committed
373
374
375
            shape0.append(s)  # image shape
            g = (size / max(s))  # gain
            shape1.append([y * g for y in s])
376
            imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im)  # update
Glenn Jocher's avatar
Glenn Jocher committed
377
        shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)]  # inference shape
378
379
        x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs]  # pad
        x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
Glenn Jocher's avatar
Glenn Jocher committed
380
381
        x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
        x = torch.from_numpy(x).to(p.device).type_as(p) / 255.  # uint8 to fp16/32
Glenn Jocher's avatar
Glenn Jocher committed
382
        t.append(time_sync())
Glenn Jocher's avatar
Glenn Jocher committed
383

Glenn Jocher's avatar
Glenn Jocher committed
384
385
386
        with amp.autocast(enabled=p.device.type != 'cpu'):
            # Inference
            y = self.model(x, augment, profile)[0]  # forward
Glenn Jocher's avatar
Glenn Jocher committed
387
            t.append(time_sync())
Glenn Jocher's avatar
Glenn Jocher committed
388

Glenn Jocher's avatar
Glenn Jocher committed
389
            # Post-process
390
            y = non_max_suppression(y, self.conf, iou_thres=self.iou, classes=self.classes, max_det=self.max_det)  # NMS
Glenn Jocher's avatar
Glenn Jocher committed
391
392
            for i in range(n):
                scale_coords(shape1, y[i][:, :4], shape0[i])
393

Glenn Jocher's avatar
Glenn Jocher committed
394
            t.append(time_sync())
Glenn Jocher's avatar
Glenn Jocher committed
395
            return Detections(imgs, y, files, t, self.names, x.shape)
396
397
398


class Detections:
399
    # YOLOv5 detections class for inference results
400
    def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
401
        super().__init__()
402
403
        d = pred[0].device  # device
        gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs]  # normalizations
404
405
406
        self.imgs = imgs  # list of images as numpy arrays
        self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)
        self.names = names  # class names
407
        self.files = files  # image filenames
408
409
410
411
        self.xyxy = pred  # xyxy pixels
        self.xywh = [xyxy2xywh(x) for x in pred]  # xywh pixels
        self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)]  # xyxy normalized
        self.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalized
412
        self.n = len(self.pred)  # number of images (batch size)
Pavlo Beylin's avatar
Pavlo Beylin committed
413
414
        if times is not None:
            self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3))  # timestamps (ms)
415
        self.s = shape  # inference BCHW shape
416

Pavlo Beylin's avatar
Pavlo Beylin committed
417
    @torch.no_grad()
418
419
420
    def display(self, pprint=False, show=False, save=False, crop=False, render=False, save_dir=Path('')):
        for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
            str = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} '
Glenn Jocher's avatar
Glenn Jocher committed
421
            if pred.shape[0]:
422
423
                for c in pred[:, -1].unique():
                    n = (pred[:, -1] == c).sum()  # detections per class
424
                    str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to string
425
                if show or save or render or crop:
426
                    for *box, conf, cls in reversed(pred):  # xyxy, confidence, class
427
                        label = f'{self.names[int(cls)]} {conf:.2f}'
428
429
430
                        if crop:
                            save_one_box(box, im, file=save_dir / 'crops' / self.names[int(cls)] / self.files[i])
                        else:  # all others
431
                            im = plot_one_box(box, im, label=label, color=colors(cls))
Glenn Jocher's avatar
Glenn Jocher committed
432
433
            else:
                str += '(no detections)'
434
435

            im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im  # from np
436
            if pprint:
Glenn Jocher's avatar
Glenn Jocher committed
437
                LOGGER.info(str.rstrip(', '))
438
            if show:
439
                im.show(self.files[i])  # show
440
            if save:
441
                f = self.files[i]
442
                im.save(save_dir / f)  # save
Glenn Jocher's avatar
Glenn Jocher committed
443
444
                if i == self.n - 1:
                    LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to '{save_dir}'")
445
            if render:
446
                self.imgs[i] = np.asarray(im)
447
448
449

    def print(self):
        self.display(pprint=True)  # print results
Glenn Jocher's avatar
Glenn Jocher committed
450
451
        LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' %
                    self.t)
452
453
454
455

    def show(self):
        self.display(show=True)  # show results

Glenn Jocher's avatar
Glenn Jocher committed
456
457
    def save(self, save_dir='runs/detect/exp'):
        save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True)  # increment save_dir
458
        self.display(save=True, save_dir=save_dir)  # save results
Glenn Jocher's avatar
Glenn Jocher committed
459

Glenn Jocher's avatar
Glenn Jocher committed
460
461
    def crop(self, save_dir='runs/detect/exp'):
        save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True)  # increment save_dir
462
        self.display(crop=True, save_dir=save_dir)  # crop results
Glenn Jocher's avatar
Glenn Jocher committed
463
        LOGGER.info(f'Saved results to {save_dir}\n')
464

465
466
467
468
    def render(self):
        self.display(render=True)  # render results
        return self.imgs

469
470
471
472
473
474
475
476
477
    def pandas(self):
        # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
        new = copy(self)  # return copy
        ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name'  # xyxy columns
        cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name'  # xywh columns
        for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
            a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)]  # update
            setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
        return new
478
479
480

    def tolist(self):
        # return a list of Detections objects, i.e. 'for result in results.tolist():'
481
        x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
482
483
484
485
486
        for d in x:
            for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
                setattr(d, k, getattr(d, k)[0])  # pop out of list
        return x

487
488
489
    def __len__(self):
        return self.n

Glenn Jocher's avatar
Glenn Jocher committed
490

Glenn Jocher's avatar
Glenn Jocher committed
491
492
493
class Classify(nn.Module):
    # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
494
        super().__init__()
Glenn Jocher's avatar
Glenn Jocher committed
495
        self.aap = nn.AdaptiveAvgPool2d(1)  # to x(b,c1,1,1)
Glenn Jocher's avatar
Glenn Jocher committed
496
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g)  # to x(b,c2,1,1)
Glenn Jocher's avatar
Glenn Jocher committed
497
        self.flat = nn.Flatten()
Glenn Jocher's avatar
Glenn Jocher committed
498
499

    def forward(self, x):
Glenn Jocher's avatar
Glenn Jocher committed
500
501
        z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1)  # cat if list
        return self.flat(self.conv(z))  # flatten to x(b,c2)