1、聚类出所有目标的bounding box,提取目标的bounding box 2、获得bounding box的宽高信息(将左上角坐标,右下角坐标装换成宽高) 3、初始化k个anchor box,通过在所有的bounding boxes中随机选取k个值作为k个anchor boxes的初始值 4、计算每个bounding box与每个anchor box的iou值 5、分类操作。计算每个anchor box与bounding box的误差值,将误差最小的bounding box分类到anchor box,即每一个bounding box 属于 anchor box 6、anchor box 更新。将属于每一类anchor box的bouding box 的宽高求中值,宽高作为anchor box新的宽高 7、重复4-6步,直到anchor box类中的bouding box 不在更新 8、计算anchor boxes精确度,每个bounding box选取其最高的那个IOU值(代表其属于某一个anchor box类),然后求所有bounding box该IOU值的平均值也即最后的精确度值
yolov3 k-means算法参考链接
import numpy as np import matplotlib.pyplot as plt class YOLO_Kmeans: def __init__(self, cluster_number, filename): self.cluster_number = cluster_number self.filename = "train.txt" def iou(self, boxes, clusters): # 1 box -> k clusters n = boxes.shape[0] k = self.cluster_number # print(clusters) # print( boxes[:, 0],boxes[:, 1]) box_area = boxes[:, 0] * boxes[:, 1] # 宽乘高 box_area = box_area.repeat(k) #将每个box的面积重复k次,构成一个1行k列的数组 box_area = np.reshape(box_area, (n, k)) #把box_area整理成n行k列的形式 # print(box_area) cluster_area = clusters[:, 0] * clusters[:, 1] # 宽乘高 [10560 11088 444686 612942] # print(cluster_area) cluster_area = np.tile(cluster_area, [1, n]) # y轴不变,x轴复制 (1, 15172) # print(cluster_area) cluster_area = np.reshape(cluster_area, (n, k)) # 把cluster_area整理成n行k列的形式 # print(cluster_area) box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k)) # print(box_w_matrix) cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k)) # print(cluster_w_matrix) min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix) box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k)) cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k)) min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix) inter_area = np.multiply(min_w_matrix, min_h_matrix) result = inter_area / (box_area + cluster_area - inter_area) # print(result.shape) #n行k列 return result def kmeans(self, boxes, k, dist=np.median): """ :param boxes: boxes = all_boxes (3793, 2) :param k: :param dist:求宽和高的均值 :return:聚类后宽和高的结果 """ box_number = boxes.shape[0] # 盒子数量 distances = np.empty((box_number, k)) # (3793, 9) last_nearest = np.zeros((box_number,)) #(3793,) np.random.seed() clusters = boxes[np.random.choice( box_number, k, replace=False)] # init k clusters 按行随机取k个不同的box_number # print(clusters) while True: # distance是一个n行k列的小于1的数组,比较每一行提出来最小的一个,意义是每行中选出一个最合适的聚类中心 # 比如第一个box和第3个距离最小,第二个和第4个聚类中心距离最小。。。。。。[3,4,5,0,1,。。。。。。。4] distances = 1 - self.iou(boxes, clusters) # print(distances.shape) current_nearest = np.argmin(distances, axis=1)# 在行上找最小索引 # print(current_nearest)#[2 1 3 ... 2 0 0] # break if (last_nearest == current_nearest).all(): #判断两个数组是否相等 break # clusters won't change # 更换聚类中心 for cluster in range(k): """ 找出k个anchor的宽和高 current_nearest行上的索引 boxes[current_nearest == cluster] 取出需要聚类的盒子 clusters[cluster] = dist( # update clusters boxes[current_nearest == cluster], axis=0)求出平局宽和高 """ clusters[cluster] = dist( # update clusters boxes[current_nearest == cluster], axis=0) # 找出第一种box的宽和高 last_nearest = current_nearest return clusters def result2txt(self, data): f = open("yolo_anchors.txt", 'w') row = np.shape(data)[0] #k个anchor for i in range(row): if i == 0: x_y = "%d,%d" % (data[i][0], data[i][1]) else: x_y = ", %d,%d" % (data[i][0], data[i][1]) f.write(x_y) f.close() def txt2boxes(self): """ :return: [[170 58] [719 747] [166 63]] """ f = open(self.filename, 'r') dataSet = [] for line in f: infos = line.split(" ") # 按空格分隔 length = len(infos) for i in range(1, length): width = int(infos[i].split(",")[2]) - \ int(infos[i].split(",")[0]) height = int(infos[i].split(",")[3]) - \ int(infos[i].split(",")[1]) dataSet.append([width, height]) result = np.array(dataSet) f.close() return result def avg_iou(self, boxes, clusters): accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)]) # 按行取最大值再求均值 return accuracy def txt2clusters(self): """ result.T[0, None]取第一列元素并转置 np.lexsort(result.T[0, None]对元素排序,返回标号 result[np.lexsort(result.T[0, None])]重新排序 :return: """ all_boxes = self.txt2boxes() # print(all_boxes) result = self.kmeans(all_boxes, k=self.cluster_number) result = result[np.lexsort(result.T[0, None])] self.result2txt(result) # print(result) # print("K anchors:\n {}".format(result)) res = "{:.2f}%".format( self.avg_iou(all_boxes, result) * 100) # res = self.avg_iou(all_boxes, result) print("Accuracy: {:.2f}%".format( self.avg_iou(all_boxes, result) * 100)) return res if __name__ == '__main__': # cluster_number = 9 filename = "train.txt" y_ = [] for i in range(1,13): kmeans = YOLO_Kmeans(i, filename) accuracy = kmeans.txt2clusters() y_.append(accuracy) # print(kmeans.txt2clusters()) print(y_)