1, 网络结构
左边Darknet网络结构,右边YOLOv3网络结构 ,详细解析可参考链接
2, pytorch代码实现
darknet53.py
# -*- coding: utf-8 -*- # @Time : 2020/10/20 下午10:17 # @Author : zxq # @File : YOLOv3_model.py # @Software: PyCharm from collections import OrderedDict import torch import torch.nn as nn class Conv2dBatchLeaky(nn.Module): """ This convenience layer groups a 2D convolution, a batchnorm and a leaky ReLU. They are executed in a sequential manner. 对应左图中Convolutional DarkNet最小子模块 只有stride=1控制特征缩放 Args: in_channels (int): Number of input channels out_channels (int): Number of output channels kernel_size (int or tuple): Size of the kernel of the convolution stride (int or tuple): Stride of the convolution negative_slope (number, optional): Controls the angle of the negative slope of the leaky ReLU; Default **0.1** """ def __init__(self, in_channels, out_channels, kernel_size, stride, negative_slope=0.1): super(Conv2dBatchLeaky, self).__init__() # Parameters self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.stride = stride # padding, 所以如果stride=1,则不会改变特征的高宽 if isinstance(kernel_size, (list, tuple)): self.padding = [int(ii / 2) for ii in kernel_size] else: self.padding = int(kernel_size / 2) # 向下取整 self.leaky_slope = negative_slope # Layer,打包 self.layers = nn.Sequential( nn.Conv2d(self.in_channels, self.out_channels, self.kernel_size, self.stride, self.padding, bias=False), nn.BatchNorm2d(self.out_channels), # , eps=1e-6, momentum=0.01), nn.LeakyReLU(self.leaky_slope, inplace=True) ) def forward(self, x): x = self.layers(x) # 因为打包好了,这里只需一句搞定 return x class ResBlock(nn.Module): def __init__(self, in_channels): """ 残差块 每个BasizeBlock由两次conv+bn+leakyReLU组成 特征图的通道数变化: in_channels -> in_channels//2 -> in_channels :param in_channels: 输入x特征图的通道数 """ super(ResBlock, self).__init__() # in_channels -> in_channels // 2,channel维度降维,减少参数目的,这也是为什么两次卷积后再残差的原因。 self.conv1 = Conv2dBatchLeaky(in_channels, in_channels // 2, kernel_size=1, stride=1, negative_slope=0.1) # in_channels//2 -> in_channels self.conv2 = Conv2dBatchLeaky(in_channels // 2, in_channels, kernel_size=3, stride=1, negative_slope=0.1) def forward(self, x): input_feature = x # in_channels = 64, 则out_channels=32 x = self.conv1(x) # -> in_channels//2 x = self.conv2(x) # -> channels=64 x += input_feature # 残差块:输入的特征加上两次卷积后的特征,作为下一个残差块的输入。 return x class DarkNet(nn.Module): def __init__(self, layers): """ DarkNet由5个模块组成,每个模块又由多个残差块组成 :param layers: list. len(layers)==5,每个数字代表各个模块的残差块个数,可以用来控制模型的大小。 eg. Darknet53, layers==[1, 2, 8, 8, 4] """ super(DarkNet, self).__init__() start_channel = 32 # 第一个卷积后的特征图通道数,这里固定 # c= 32 self.conv = Conv2dBatchLeaky(in_channels=3, out_channels=start_channel, kernel_size=3, stride=1) # 高宽不变 # 定义5个模块,每个模块前面都有一个卷积用于高宽的下采样,同时通道数翻倍。每个模块不会改变特征维度,包括h,w,c。 self.conv1 = Conv2dBatchLeaky(in_channels=start_channel, out_channels=start_channel * 2, kernel_size=3, stride=2) # 32->64 self.layer1 = self._build_layer(input_channels=start_channel * 2, num_res_block=layers[0]) # 64->64 self.conv2 = Conv2dBatchLeaky(in_channels=start_channel * 2, out_channels=start_channel * 4, kernel_size=3, stride=2) # ->128 self.layer2 = self._build_layer(input_channels=start_channel * 4, num_res_block=layers[1]) # 128->128 self.conv3 = Conv2dBatchLeaky(in_channels=start_channel * 4, out_channels=start_channel * 8, kernel_size=3, stride=2) # ->256 self.layer3 = self._build_layer(input_channels=start_channel * 8, num_res_block=layers[2]) # 256->256 self.conv4 = Conv2dBatchLeaky(in_channels=start_channel * 8, out_channels=start_channel * 16, kernel_size=3, stride=2) # ->512 self.layer4 = self._build_layer(input_channels=start_channel * 16, num_res_block=layers[3]) # 512->512 self.conv5 = Conv2dBatchLeaky(in_channels=start_channel * 16, out_channels=start_channel * 32, kernel_size=3, stride=2) # ->1024 self.layer5 = self._build_layer(input_channels=start_channel * 32, num_res_block=layers[4]) # 1024->1024 self.output_channels = [start_channel * 2, # 64 layer1 start_channel * 4, # 128 layer2 start_channel * 8, # 256 start_channel * 16, # 512 start_channel * 32, ] # 1024 @staticmethod def _build_layer(input_channels, num_res_block=1): """ 建议DarkNet子模块 每个子模块都是由多个残差块组成 :param input_channels: 输入特征的通道数 :param num_res_block: 子模块的残差块个数。 :return: """ layers = [] for i in range(0, num_res_block): layers.append(("res_block_{}".format(i), ResBlock(in_channels=input_channels))) return nn.Sequential(OrderedDict(layers)) def forward(self, x): x = self.conv(x) # [b,3,416,416] -> [b,32,416,416] x = self.conv1(x) # [b,32,416,416] -> [b,64,208,208] x = self.layer1(x) # 维度不变 x = self.conv2(x) # [b,64,208,208] -> [b,128,104,104] x = self.layer2(x) x = self.conv3(x) # [b,128,104,104] -> [b,256,52,52] out3 = self.layer3(x) out4 = self.conv4(out3) # [b,256,52,52] -> [b,512,26,26] out4 = self.layer4(out4) out5 = self.conv5(out4) # [b,512,26,26] -> [b,1024,13,13] out5 = self.layer5(out5) return out3, out4, out5 # [b,256,52,52], [b,512,26,26], [b,1024,13,13] def darknet53(pretrained, **kwargs): model = DarkNet([1, 2, 8, 8, 4]) if pretrained: # 如果不用,则False,如果用,则必须是权重路径 if isinstance(pretrained, str): model.load_state_dict(torch.load(pretrained)) else: raise Exception("darknet request pretrained path. got [{}]".format(pretrained)) return modelyolov3_module.py
# -*- coding: utf-8 -*- # @Time : 2020/10/22 下午10:10 # @Author : zxq # @File : yolov3_module.py # @Software: PyCharm import torch import torch.nn as nn import torch.nn.functional as F import yaml from backbone.darknet53 import Conv2dBatchLeaky, darknet53 class Conv2dBlock5L(nn.Module): """ 对应网络结构图中的Conv2D Block 5L,具体功能是6个conv+bn+leakyReLU, 为什么叫5L,我猜是输出通道有5次是在c1和c2两种之间变换 只改变通道数 """ def __init__(self, in_channels, out_channels): """ :param in_channels: 前面DarkNet输出的特征图通道数 :param out_channels: list. [c1, c2]. 通道数就在c1和c2之间变化,最后输出c2通道数 然后5个卷积的通道数就在in_channels和in_channels//2两者间变化 """ super(Conv2dBlock5L, self).__init__() conv = Conv2dBatchLeaky(in_channels=in_channels, out_channels=out_channels[0], kernel_size=1, stride=1) # 降维,减少计算量 conv1 = Conv2dBatchLeaky(in_channels=out_channels[0], out_channels=out_channels[1], kernel_size=3, stride=1) conv2 = Conv2dBatchLeaky(in_channels=out_channels[1], out_channels=out_channels[0], kernel_size=1, stride=1) conv3 = Conv2dBatchLeaky(in_channels=out_channels[0], out_channels=out_channels[1], kernel_size=3, stride=1) conv4 = Conv2dBatchLeaky(in_channels=out_channels[1], out_channels=out_channels[0], kernel_size=1, stride=1) conv5 = Conv2dBatchLeaky(in_channels=out_channels[0], out_channels=out_channels[1], kernel_size=3, stride=1) self.out_channels = out_channels[1] # 打包下,省得在forward重复写 self.layers = nn.Sequential( conv, conv1, conv2, conv3, conv4, conv5 ) def forward(self, x): x = self.layers(x) return x class Upsample(nn.Module): """ nn.Upsample is deprecated """ def __init__(self, scale_factor, mode="nearest"): super(Upsample, self).__init__() self.scale_factor = scale_factor self.mode = mode def forward(self, x): x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode) return x class YOLOv3(nn.Module): def __init__(self, config): super(YOLOv3, self).__init__() self.backbone = darknet53(pretrained=False) # num_anchors * (5+num_classes): 3 * (5+ 80) = 255 anchors = config['yolo']['anchor'] # [10,13, 16,30, 33,23, 30,61, 62,45, ...] # 9个 self.anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors) - 1, 2)] # [(10,13), (16,30), ...] num_anchors = len(self.anchors) // 3 # 平均分成3份 num_classes = config['yolo']['classes'] # 默认每个输出层的anchor个数都是len(config['yolo']['anchor'][0]), # 对于每个输出层的所有位置输出属性维度: coco: 3x85=255, 图中是3x(5+20)=75 self.final_out_channels = num_anchors * (5 + num_classes) # 1, stride 32 # output_channels[-1]是DarkNet最后一层输出, 这里layer5对应的尺度是DarkNet第5个模块的输出尺度 self.block_layer5 = Conv2dBlock5L(in_channels=self.backbone.output_channels[-1], out_channels=[512, 1024]) # yolo layer,这里使用1x1卷积,简单的把channels修改为self.final_out_channels self.conv1x1_out5 = nn.Conv2d(in_channels=self.block_layer5.out_channels, out_channels=self.final_out_channels, kernel_size=1, stride=1, padding=0, bias=True) # 2, stride 16 # 对应结构图中的Conv2D + UpSampling2D, 其中conv用来修改通道数,upsample用来修改高宽尺度 # channels: -> 256 self.conv5 = Conv2dBatchLeaky(in_channels=self.block_layer5.out_channels, out_channels=256, kernel_size=1, stride=1) # upSample: 13x13 -> 26x26 self.up_sample = Upsample(scale_factor=2, mode='nearest') # concat up_sample4 + backbone.out4 in_channels = self.backbone.output_channels[-2] + 256 # 512+256=768 # yolo layer 4 self.block_layer4 = Conv2dBlock5L(in_channels=in_channels, out_channels=[256, 512]) # 768->512 self.conv1x1_out4 = nn.Conv2d(in_channels=self.block_layer4.out_channels, out_channels=self.final_out_channels, kernel_size=1, stride=1, padding=0, bias=True) # 3, stride 8 self.conv4 = Conv2dBatchLeaky(in_channels=self.block_layer4.out_channels, out_channels=128, kernel_size=1, stride=1) # 512 -> 128 # up_sample3: 26x26 -> 52x52 # concat: up_sample3 + backbone.out3 in_channels = self.backbone.output_channels[-3] + 128 # 256+128=384 # yolo layer 3 self.block_layer3 = Conv2dBlock5L(in_channels=in_channels, out_channels=[128, 256]) # channels: -> 256 self.conv1x1_out3 = nn.Conv2d(in_channels=self.block_layer3.out_channels, out_channels=self.final_out_channels, kernel_size=1, stride=1, padding=0, bias=True) def forward(self, x): backbone_out3, backbone_out4, backbone_out5 = self.backbone(x) # [b,256,52,52],[b,512,26,26],[b,1024,52,52] # stride 32 block_out5 = self.block_layer5(backbone_out5) # [b,1024,13,13]. chw都没变,1024,13,13 yolo_out5 = self.conv1x1_out5(block_out5) # [b,1024,13,13]->[b,255,13,13]省去了一步conv3x3,这里通过1x1的卷积输出固定channel的特征图 # stride 16 x = self.conv5(block_out5) # [b,1024,13,13] -> [b,256,13,13] x = self.up_sample(x) # [b,256,13,13] -> [b,256,26,26] x = torch.cat([backbone_out4, x], 1) # backbone_out4: [b,512,26,26], x: [b,256,26,26] -> [b,768,26,26] block_out4 = self.block_layer4(x) # [b,768,26,26] -> [b,512,26,26], 图中是变成[256] yolo_out4 = self.conv1x1_out4(block_out4) # [b,512,26,26] -> [b,255,26,26] # stride 8 x = self.conv4(block_out4) # [b,512,26,26] -> [b,128,26,26] x = self.up_sample(x) # [b,128,26,26] -> [b,128,52,52] x = torch.cat([backbone_out3, x], 1) # backbone_out3: [b,256,52,52], x: [b,128,52,52] -> [b,384,52,52] block_out3 = self.block_layer3(x) # [b,384,52,52] -> [b,256,52,52] yolo_out3 = self.conv1x1_out3(block_out3) # [b,256,52,52] -> [b,255,52,52] return yolo_out3, yolo_out4, yolo_out5 if __name__ == '__main__': cfg_dict = yaml.load(open('./config/cfg.yaml'), Loader=yaml.SafeLoader) yolo_module = YOLOv3(config=cfg_dict) x = torch.Tensor(4, 3, 416, 416) output3, output4, output5 = yolo_module(x) print(output3.shape, output4.shape, output5.shape)yolov3_loss.py
# -*- coding: utf-8 -*- # @Time : 2020/10/23 下午10:10 # @Author : zxq # @File : yolov3_loss.py # @Software: PyCharm import math import torch import torch.nn as nn import numpy as np from utils.utils import bbox_iou class YOLOLoss(nn.Module): def __init__(self, image_size, num_classes, anchors): super(YOLOLoss, self).__init__() self.image_size = image_size # 原始图片大小: (x, y) self.num_classes = num_classes # 检测目标类别数 self.anchors = anchors # [[x1, y1], [x2, y2], [x3, y3] 在原图上的尺度 self.num_anchors = len(anchors) self.bbox_attrs = 5 + num_classes # num_classes: 类别个数, bbox_attrs:属性个数。(x,y,w,h,conf,c0,c1,c2,...,c79) self.ignore_threshold = 0.5 self.lambda_xy = 2.5 self.lambda_wh = 2.5 self.lambda_conf = 1.0 self.lambda_cls = 1.0 self.bce_loss = nn.BCELoss() self.mse_loss = nn.MSELoss() def forward(self, input, targets=None): """ :param input: [b, c, h, w] :param targets: [b, num_gt, num_attr]. attr = [cls, x_ratio, y_ratio, w_ratio, h_ratio]. 存放的是比例, x_r = x/img_w :return: """ batch_size = input.shape[0] in_h = input.shape[2] in_w = input.shape[3] stride_h = self.image_size[1] / in_h # 高下采样的倍数 stride_w = self.image_size[0] / in_w # 原图缩放了,anchor也要缩放对应的倍数,获取在特征图上的anchors scaled_anchors = [(a_w / stride_w, a_h / stride_h) for a_w, a_h in self.anchors] # anchors缩放到对应的yolo输出层 # [b,c,h,w] -> [b,num_anchors, bbox_attr,h,w] -> [b,num_anchors, h,w, bbox_attr] prediction = input.view(batch_size, self.num_anchors, self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous() # Get outputs attr # [b,num_anchors,h,w,bbox_attr] -> [b, num_anchors,h,w] 中心坐标相对于cell左上角的偏移量 (0,1)之间 x = torch.sigmoid(prediction[..., 0]).cuda() y = torch.sigmoid(prediction[..., 1]).cuda() # -> [b, num_anchors,h,w] Center y w = prediction[..., 2].cuda() # -> [b, num_anchors,h,w] h = prediction[..., 3].cuda() # -> [b, num_anchors,h,w] conf = torch.sigmoid(prediction[..., 4]).cuda() # 目标概率 pred_cls = prediction[..., 5:].cuda() # [b, num_anchors, h,w, num_classes]类别概率 # train if targets is not None: mask, noobj_mask, tx, ty, tw, th, tconf, tcls = \ self.build_target(targets, scaled_anchors, in_w, in_h, self.ignore_threshold) mask, noobj_mask = mask.cuda(), noobj_mask.cuda() tx, ty, tw, th = tx.cuda(), ty.cuda(), tw.cuda(), th.cuda() tconf, tcls = tconf.cuda(), tcls.cuda() # loss # 1 location loss # x.shape: [b, num_anchors,h,w]. mask.shape: [b, num_anchors,h,w] loss_x = self.bce_loss(x * mask, tx * mask) # x*mask: 预测的偏移量, tx: 标注的偏移量。mask值为1的位置是最佳anchor的位置 loss_y = self.bce_loss(y * mask, ty * mask) loss_w = self.mse_loss(w * mask, tw * mask) loss_h = self.mse_loss(h * mask, th * mask) # 2 object loss # mask值为1的位置是有目标的cell,noobj_mask值为1的位置是没有目标的cell。 loss_conf = self.bce_loss(conf * mask, mask) + 0.5 * self.bce_loss(conf * noobj_mask, noobj_mask * 0.0) # 3 class loss # pred_cls.shape: [2,3,52,52,80], mask.shape: [2,3,52,52] loss_cls = self.bce_loss(pred_cls[mask == 1], tcls[mask == 1]) # pred_cls[mask == 1].shape: [num_obj, 80] # total loss = losses * weight loss = (loss_x + loss_y) * self.lambda_xy + \ (loss_w + loss_h) * self.lambda_wh + \ loss_conf * self.lambda_conf + \ loss_cls * self.lambda_cls return loss, loss_x.item(), loss_y.item(), loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item() # detect else: pass def build_target(self, target, anchors, in_w, in_h, ignore_threshold): """ :param target: [b, num_gt, num_attr]. attr = [cls, x_ratio, y_ratio, w_ratio, h_ratio]. 标注的gt box信息 :param anchors: list. [(w1, h1), (w2, h2), (w3, h3)]. 在特征图尺度上的anchor :param in_w: 预测的特征图宽 :param in_h: 预测的特征图高 :param ignore_threshold: 计算标注的gt_bbox和3个anchor_box之间的iou,找到比较合适的anchor用于训练; 长方形的目标,最好不要用竖直的anchor训练。 :return: mask: bool. mask[b, best_anchor_index, gj, gi] = 1. 值为1的地方,就是对应cell最佳的anchor noobj_mask: bool. noobj_mask[b, anchor_ious > ignore_threshold, gj, gi] = 0, 值为1的地方,没有目标 tx: tx[b, best_anchor_index, gj, gi] = gx - gi 存放相对于cell(gj, gj)左上角的偏移量, 网络学习的是偏移量 ty: ty[b, best_anchor_index, gj, gi] = gy - gj tw: tw[b, best_anchor_index, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16),网络学习的是log(gw/aw) th: th[b, best_anchor_index, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16) tconf: tconf[b, best_n, gj, gi] = 1 tcls: tcls[b, best_n, gj, gi, int(target[b, t, 0])] = 1 """ batch_size = target.shape[0] mask = torch.zeros(batch_size, self.num_anchors, in_h, in_w, requires_grad=False) # [b,num_anchors,w,h]. [2,3,52,52] noobj_mask = torch.ones(batch_size, self.num_anchors, in_h, in_w, requires_grad=False) # [b,num_anchors,w,h] tx = torch.zeros(batch_size, self.num_anchors, in_h, in_w, requires_grad=False) # [b,num_anchors,w,h] ty = torch.zeros(batch_size, self.num_anchors, in_h, in_w, requires_grad=False) # [b,num_anchors,w,h] tw = torch.zeros(batch_size, self.num_anchors, in_h, in_w, requires_grad=False) # [b,num_anchors,w,h] th = torch.zeros(batch_size, self.num_anchors, in_h, in_w, requires_grad=False) # [b,num_anchors,w,h] tconf = torch.zeros(batch_size, self.num_anchors, in_h, in_w, requires_grad=False) # [b,num_anchors,w,h] # [b,num_anchors,w,h, num_cls] tcls = torch.zeros(batch_size, self.num_anchors, in_h, in_w, self.num_classes, requires_grad=False) # [2,3,52,52,80] for b in range(batch_size): # 遍历batch中的每个图像 for t in range(target.shape[1]): # 遍历图像中的所有目标 if target[b, t].sum() == 0: # 当前图像中没有目标,每张图片的目标个数可能不同,组成batch时进行了填0操作 continue # 标注存放的x_ratio,y_ratio,w_ratio,h_ratio值是相对于原始图像的比例值, # 获取在特征图尺度下的gt标注bbox信息 gx = target[b, t, 1] * in_w # float. 在特征层尺度的gt x坐标。tensor(0.3282) × 52 = 17.06 gy = target[b, t, 2] * in_h # tensor(0.7696) * 52 = 40.02 gw = target[b, t, 3] * in_w # 在特征层尺度上的高. tensor(0.4632) * 52 = 24.08 gh = target[b, t, 4] * in_h # 12.59 # Get grid box indices # 17.06, 40.02 -> 17, 40 gi = int(gx) # 对特征图上的坐标gx向下取整 gj = int(gy) # (gi, gj)就是有目标的网格 # Get shape of gt box # tensor([ 0.0000, 0.0000, gw, gh]) -> tensor([[ 0.0000, 0.0000, gw, gh]]) gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0) # tensor([[ 0.0000, 0.0000, 24.0841, 12.5948]]) # Get shape of anchor box # ->(3, 4). 每一行是类似于[0. , 0. , 2.2, 3.4]的anchor宽高信息。 anchor_box = torch.FloatTensor(np.concatenate((np.zeros((self.num_anchors, 2)), np.array(anchors)), 1)) # Calculate iou between gt and anchor shapes """ gt_box = tensor([[0.0000, 0.0000, gw, gh]]) anchor_box = tensor([[0.0000, 0.0000, 2.2000, 3.4000], [0.0000, 0.0000, 4.2000, 5.1000], [0.0000, 0.0000, 2.3000, 6.5000]]) """ anchor_ious = bbox_iou(gt_box, anchor_box) # gt_box.shape: (1,4). anchor_box.shape: (3,4) # Where the overlap is larger than threshold set mask to zero (ignore) noobj_mask[b, anchor_ious > ignore_threshold, gj, gi] = 0 # noobj_mask值为1就没有目标,ignore_threshold越大,值为1的越多 # Find the best matching anchor box best_anchor_index = np.argmax(anchor_ious) # masks mask[b, best_anchor_index, gj, gi] = 1 # 最合适的anchor索引 # Coordinates tx, ty tx[b, best_anchor_index, gj, gi] = gx - gi # 存放相对于cell左上角的偏移量 ty[b, best_anchor_index, gj, gi] = gy - gj # Width and height tw, th tw[b, best_anchor_index, gj, gi] = math.log(gw / anchors[best_anchor_index][0] + 1e-16) th[b, best_anchor_index, gj, gi] = math.log(gh / anchors[best_anchor_index][1] + 1e-16) # object tconf[b, best_anchor_index, gj, gi] = 1 # One-hot encoding of label tcls[b, best_anchor_index, gj, gi, int(target[b, t, 0])] = 1 return mask, noobj_mask, tx, ty, tw, th, tconf, tcls if __name__ == '__main__': loss_module = YOLOLoss(image_size=(416, 416), num_classes=80, anchors=[[116, 90], [156, 198], [373, 326]]) net_output = torch.rand(2, 255, 52, 52) * 10 # out5层的输出特征 target1 = torch.FloatTensor([[16, 0.328250, 0.769577, 0.463156, 0.242207], [1, 0.128828, 0.375258, 0.249063, 0.733333], [0, 0.521430, 0.258251, 0.021172, 0.060869]]) target2 = torch.FloatTensor([[59, 0.510930, 0.442073, 0.978141, 0.872188], [77, 0.858305, 0.073521, 0.074922, 0.059833], [0, 0.569492, 0.285235, 0.024547, 0.122254]]) # [b, num_gt, num_attr]. [b, num_gt, cls, x_ratio, y_ratio, w_ratio, h_ratio] targets = torch.cat((target1.unsqueeze(0), target2.unsqueeze(0)), 0) # [2, 2, 5] loss = loss_module(input=net_output, targets=targets) # [b, num_gt, cls, x_r, y_r, w_r, h_r]train.py
import torch import yaml from yolov3_module import YOLOv3 if __name__ == '__main__': cfg_dict = yaml.load(open('./config/cfg.yaml'), Loader=yaml.SafeLoader) yolo_module = YOLOv3(config=cfg_dict) x = torch.Tensor(4, 3, 416, 416) output3, output4, output5 = yolo_module(x) print(output3.shape, output4.shape, output5.shape) # YOLO loss with 3 scales yolo_loss = []待续。。。