Dataset utils and dataloaders

import glob

import logging

import math

import os

import random

import shutil

import time

from itertools import repeat

from multiprocessing.pool import ThreadPool

from pathlib import Path

from threading import Thread

import cv2

import numpy as np

import torch

import torch.nn.functional as F

from PIL import Image, ExifTags

from torch.utils.data import Dataset

from tqdm import tqdm

from torch.utils.data import Sampler

from torch.utils.data.sampler import WeightedRandomSampler,RandomSampler,SubsetRandomSampler

from utils.general import check_requirements, xyxy2xywh, xywh2xyxy, xywhn2xyxy, xyn2xy, segment2box, segments2boxes, \

resample_segments, clean_str

from utils.torch_utils import torch_distributed_zero_first

from utils.mirror_fold import (

MIRROR_FOLD_APPLY_TO_VAL,

MIRROR_FOLD_DEBUG_APPLY_TO_VAL,

MIRROR_FOLD_PINK_COLOR_BGR,

build_pink_mask,

get_car_box_for_shape,

get_car_box_with_fallback,

get_debug_save_path,

should_apply_mirror_fold,

)

Parameters

help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'

img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo'] # acceptable image suffixes

vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv' , 'h264'] # acceptable video suffixes

logger = logging.getLogger(name)

Get orientation exif tag

for orientation in ExifTags.TAGS.keys():

if ExifTags.TAGS[orientation] == 'Orientation':

break

def get_hash(files):

Returns a single hash value of a list of files

return sum(os.path.getsize(f) for f in files if os.path.isfile(f))

def exif_size(img):

Returns exif-corrected PIL size

s = img.size # (width, height)

try:

rotation = dict(img._getexif().items())[orientation]

if rotation == 6: # rotation 270

s = (s[1], s[0])

elif rotation == 8: # rotation 90

s = (s[1], s[0])

except:

pass

return s

def showlabels(img, boxs, landmarks):

for box in boxs:

x,y,w,h = box[0] * img.shape[1], box[1] * img.shape[0], box[2] * img.shape[1], box[3] * img.shape[0]

#cv2.rectangle(image, (x,y), (x+w,y+h), (0,255,0), 2)

cv2.rectangle(img, (int(x - w/2), int(y - h/2)), (int(x + w/2), int(y + h/2)), (0, 255, 0), 2)

for landmark in landmarks:

#cv2.circle(img,(60,60),30,(0,0,255))

for i in range(4):

cv2.circle(img, (int(landmark[2*i] * img.shape[1]), int(landmark[2*i+1]*img.shape[0])), 3 ,(0,0,255), -1)

kk = random.random() + random.random()

cv2.imwrite('./train_labels/' + str(kk) +".jpg", img)

cv2.waitKey(0)

def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False,

rank=-1, world_size=1, workers=8, image_weights=False, quad=False, prefix=''):

Make sure only the first process in DDP process the dataset first, and the following others can use the cache

with torch_distributed_zero_first(rank): # 多进程数据同步, 主进程处理数据, 其他进程读cache

dataset = LoadImagesAndLabels_v5face(path, imgsz, batch_size, # 构建dataset

augment=augment, # augment images

hyp=hyp, # augmentation hyperparameters

rect=rect, # rectangular training

cache_images=cache,

single_cls=opt.single_cls,

stride=int(stride),

pad=pad,

image_weights=image_weights,

prefix=prefix)

batch_size = min(batch_size, len(dataset))

nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers

sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None # DDP就用其Sampler,否则设为None用默认

loader = torch.utils.data.DataLoader if image_weights else InfiniteDataLoader

Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader()

假设有7个类别，将类别1的样本分配更高的权重

class_weights = [1.0, 1.0, 1.0, 5.0, 1.0, 1.0, 1.0]

sampler = WeightedRandomSampler(class_weights, num_samples=len(dataset), replacement=True)

假设有7个类别，我们需要保证类别1在每个batch中出现

class_indices = []

for i, (images, label,index,shapes) in enumerate(dataset):

if index == "sss":

class_indices.append(i)

batch_size = batch_size

num_samples_per_class = 1

def get_batch():

# 从类别1对应的索引中随机抽取1个样本

indices = np.random.choice(class_indices, size=num_samples_per_class, replace=True)

# 从其它类别对应的索引中随机抽取(batch_size-1)个样本

other_indices = [i for i in range(len(dataset)) if i not in class_indices]

other_indices = np.random.choice(other_indices, size=batch_size-num_samples_per_class, replace=False)

# 将indices和other_indices组合成一个batch

batch_indices = np.concatenate((indices, other_indices))

# 将batch_indices随机打乱

np.random.shuffle(batch_indices)

return batch_indices

sampler = RandomSampler(dataset)

dataloader = loader(dataset,

batch_size=batch_size,

num_workers=nw,

sampler=sampler,

#batch_sampler=get_batch,

pin_memory=True,

shuffle=True,

collate_fn=LoadImagesAndLabels_v5face.collate_fn4 if quad else LoadImagesAndLabels_v5face.collate_fn)

return dataloader, dataset

class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader):

""" Dataloader that reuses workers

Uses same syntax as vanilla DataLoader

"""

def init(self, *args, **kwargs):

super().init(*args, **kwargs)

object.setattr(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))

self.iterator = super().iter()

def len(self):

return len(self.batch_sampler.sampler)

def iter(self):

for i in range(len(self)):

yield next(self.iterator)

class StratifiedSampler(Sampler):

def init(self, dataset):

self.dataset = dataset

self.labels = dataset.labels

self.indices = self._stratify()

def _stratify(self):

将样本按照类别分组

label_to_indices = {}

for i, label in enumerate(self.labels):

if label not in label_to_indices:

label_to_indices[label] = []

label_to_indices[label].append(i)

对每个类别进行采样，使得每个batch中的样本都包含相同数量的类别

indices = []

for label, label_indices in label_to_indices.items():

label_indices = np.array(label_indices)

np.random.shuffle(label_indices)

indices.extend(label_indices)

return indices

def iter(self):

return iter(self.indices)

def len(self):

return len(self.indices)

class _RepeatSampler(object):

""" Sampler that repeats forever

Args:

sampler (Sampler)

"""

def init(self, sampler):

self.sampler = sampler

def iter(self):

while True:

yield from iter(self.sampler)

def cv2_crop(im, box):

'''cv2实现类似PIL的裁剪

:param im: 加载好的图像

:param box: 裁剪的矩形，元组(left, upper, right, lower).

'''

return im.copy()[box[1]:box[3], box[0]:box[2], :]

def _mirror_fold_box_for_letterbox(orig_hw, resized_hw, ratio, pad):

h0, w0 = orig_hw

h, w = resized_hw

car_box = get_car_box_for_shape(w0, h0)

if car_box is not None:

scale_w = (w / w0) * ratio[0]

scale_h = (h / h0) * ratio[1]

x1, y1, x2, y2 = car_box

return (

int(x1 * scale_w + pad[0]),

int(y1 * scale_h + pad[1]),

int(x2 * scale_w + pad[0]),

int(y2 * scale_h + pad[1]),

)

car_box = get_car_box_for_shape(w, h)

if car_box is None:

return None

x1, y1, x2, y2 = car_box

return (

int(x1 * ratio[0] + pad[0]),

int(y1 * ratio[1] + pad[1]),

int(x2 * ratio[0] + pad[0]),

int(y2 * ratio[1] + pad[1]),

)

def _apply_mirror_fold_det(img, labels, car_box):

h, w = img.shape[:2]

pink_mask = build_pink_mask(w, h, car_box)

if pink_mask is None:

return img, labels

img[pink_mask] = MIRROR_FOLD_PINK_COLOR_BGR

if labels is None:

return img, labels

labels = labels if isinstance(labels, np.ndarray) else np.array(labels)

if labels.size == 0:

return img, labels

优先按四角点判定：若任一有效角点落入粉色区则剔除；

若角点不可用，则退化为 bbox 与粉色区有交集就剔除。

keep = np.ones((labels.shape[0],), dtype=bool)

for i, row in enumerate(labels):

x1, y1, x2, y2 = [int(v) for v in row[1:5]]

x1 = max(0, min(x1, w - 1))

y1 = max(0, min(y1, h - 1))

x2 = max(0, min(x2, w))

y2 = max(0, min(y2, h))

if x2 <= x1 or y2 <= y1:

keep[i] = False

continue

labels[:, 5:13] = (x1,y1,x2,y2,x3,y3,x4,y4) 角点，<0 视为无效

lmk = row[5:13].reshape(4, 2)

valid = (lmk[:, 0] >= 0) & (lmk[:, 1] >= 0)

if valid.any():

xs = np.clip(np.round(lmk[valid, 0]).astype(int), 0, w - 1)

ys = np.clip(np.round(lmk[valid, 1]).astype(int), 0, h - 1)

if pink_mask[ys, xs].any():

keep[i] = False

continue

else:

if pink_mask[y1:y2, x1:x2].any():

keep[i] = False

return img, labels[keep]

def _draw_det_labels_xyxy(img, labels):

if labels is None:

return img

labels = labels if isinstance(labels, np.ndarray) else np.array(labels)

if labels.size == 0:

return img

out = img.copy()

for row in labels:

cls_id = int(row[0])

x1, y1, x2, y2 = [int(v) for v in row[1:5]]

cv2.rectangle(out, (x1, y1), (x2, y2), (0, 255, 0), 2)

cv2.putText(out, str(cls_id), (x1, max(0, y1 - 4)),

cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

lmk = row[5:13].reshape(4, 2)

valid = (lmk[:, 0] >= 0) & (lmk[:, 1] >= 0)

if valid.any():

pts = np.round(lmk).astype(int)

for j in range(4):

if valid[j]:

cv2.circle(out, (pts[j, 0], pts[j, 1]), 3, (0, 255, 255), -1)

有效点按标注顺序连线，展示车位四角

ordered = [j for j in range(4) if valid[j]]

if len(ordered) >= 2:

for j in range(len(ordered)):

a = ordered[j]

b = ordered[(j + 1) % len(ordered)]

cv2.line(out, (pts[a, 0], pts[a, 1]), (pts[b, 0], pts[b, 1]), (255, 255, 0), 1)

return out

class LoadImages: # for inference

def init(self, path, img_size=640, stride=32):

p = str(Path(path).absolute()) # os-agnostic absolute path

if '*' in p:

files = sorted(glob.glob(p, recursive=True)) # glob

elif os.path.isdir(p):

files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir

elif os.path.isfile(p):

files = [p] # files

else:

raise Exception(f'ERROR: {p} does not exist')

images = [x for x in files if x.split('.')[-1].lower() in img_formats]

videos = [x for x in files if x.split('.')[-1].lower() in vid_formats]

ni, nv = len(images), len(videos)

self.img_size = img_size

self.stride = stride

self.files = images + videos

self.nf = ni + nv # number of files

self.video_flag = [False] * ni + [True] * nv

self.mode = 'image'

if any(videos):

self.new_video(videos[0]) # new video

else:

self.cap = None

assert self.nf > 0, f'No images or videos found in {p}. ' \

f'Supported formats are:\nimages: {img_formats}\nvideos: {vid_formats}'

def iter(self):

self.count = 0

return self

def next(self):

if self.count == self.nf:

raise StopIteration

path = self.files[self.count]

if self.video_flag[self.count]:

Read video

self.mode = 'video'

ret_val, img0 = self.cap.read()

if not ret_val:

self.count += 1

self.cap.release()

if self.count == self.nf: # last video

raise StopIteration

else:

path = self.files[self.count]

self.new_video(path)

ret_val, img0 = self.cap.read()

self.frame += 1

print(f'video {self.count + 1}/{self.nf} ({self.frame}/{self.nframes}) {path}: ', end='')

else:

Read image

self.count += 1

img0 = cv2.imread(path) # BGR

assert img0 is not None, 'Image Not Found ' + path

print(f'image {self.count}/{self.nf} {path}: ', end='')

shape = img0.shape[:2] #hw

if shape[0] > 1088 and shape[1]>960:

centerPoint = (shape[1]//2,shape[0]//2)

box = (centerPoint[0] - 480, centerPoint[1] - 544, centerPoint[0] + 480, centerPoint[1] + 544)

img0 = cv2_crop(img0,box)

print("crop image to 1088 960")

img = img0

img = cv2.copyMakeBorder(img, 2, 2, 8, 8, cv2.BORDER_CONSTANT, value=(0,0,0)) # add border

img = cv2.resize(img0, (544,480), interpolation=cv2.INTER_LINEAR)

Padded resize

img = letterbox(img0, self.img_size, stride=self.stride)[0]

Convert

img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416

img = np.ascontiguousarray(img)

return path, img, img0, self.cap

def new_video(self, path):

self.frame = 0

self.cap = cv2.VideoCapture(path)

self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))

def len(self):

return self.nf # number of files

class LoadWebcam: # for inference

def init(self, pipe='0', img_size=640, stride=32):

self.img_size = img_size

self.stride = stride

if pipe.isnumeric():

pipe = eval(pipe) # local camera

pipe = 'rtsp://192.168.1.64/1' # IP camera

pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg' # IP golf camera

self.pipe = pipe

self.cap = cv2.VideoCapture(pipe) # video capture object

self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size

def iter(self):

self.count = -1

return self

def next(self):

self.count += 1

if cv2.waitKey(1) == ord('q'): # q to quit

self.cap.release()

cv2.destroyAllWindows()

raise StopIteration

Read frame

if self.pipe == 0: # local camera

ret_val, img0 = self.cap.read()

img0 = cv2.flip(img0, 1) # flip left-right

else: # IP camera

n = 0

while True:

n += 1

self.cap.grab()

if n % 30 == 0: # skip frames

ret_val, img0 = self.cap.retrieve()

if ret_val:

break

Print

assert ret_val, f'Camera Error {self.pipe}'

img_path = 'webcam.jpg'

print(f'webcam {self.count}: ', end='')

Padded resize

img = letterbox(img0, self.img_size, stride=self.stride)[0]

Convert

img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416

img = np.ascontiguousarray(img)

return img_path, img, img0, None

def len(self):

return 0

class LoadStreams: # multiple IP or RTSP cameras

def init(self, sources='streams.txt', img_size=640, stride=32):

self.mode = 'stream'

self.img_size = img_size

self.stride = stride

if os.path.isfile(sources):

with open(sources, 'r') as f:

sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]

else:

sources = [sources]

n = len(sources)

self.imgs = [None] * n

self.sources = [clean_str(x) for x in sources] # clean source names for later

for i, s in enumerate(sources):

Start the thread to read frames from the video stream

print(f'{i + 1}/{n}: {s}... ', end='')

url = eval(s) if s.isnumeric() else s

if 'youtube.com/' in url or 'youtu.be/' in url: # if source is YouTube video

check_requirements(('pafy', 'youtube_dl'))

import pafy

url = pafy.new(url).getbest(preftype="mp4").url

cap = cv2.VideoCapture(url)

assert cap.isOpened(), f'Failed to open {s}'

w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))

h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

self.fps = cap.get(cv2.CAP_PROP_FPS) % 100

_, self.imgs[i] = cap.read() # guarantee first frame

thread = Thread(target=self.update, args=([i, cap]), daemon=True)

print(f' success ({w}x{h} at {self.fps:.2f} FPS).')

thread.start()

print('') # newline

check for common shapes

s = np.stack([letterbox(x, self.img_size, stride=self.stride)[0].shape for x in self.imgs], 0) # shapes

self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal

if not self.rect:

print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')

def update(self, index, cap):

_, self.imgs[index] = cap.read()

cap.grab()

if n == 4: # read every 4th frame

success, im = cap.retrieve()

self.imgs[index] = im if success else self.imgs[index] * 0

n = 0

time.sleep(1 / self.fps) # wait time

def iter(self):

self.count = -1

return self

def next(self):

self.count += 1

img0 = self.imgs.copy()

if cv2.waitKey(1) == ord('q'): # q to quit

cv2.destroyAllWindows()

raise StopIteration

Letterbox

img = [letterbox(x, self.img_size, auto=self.rect, stride=self.stride)[0] for x in img0]

Stack

img = np.stack(img, 0)

Convert

img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416

img = np.ascontiguousarray(img)

return self.sources, img, img0, None

def len(self):

return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years

def img2label_paths(img_paths):

Define label paths as a function of image paths

sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings

return ['txt'.join(x.replace(sa, sb, 1).rsplit(x.split('.')[-1], 1)) for x in img_paths]

def gettteodlabels(img_paths):

labels=[]

for img in img_paths:

label = img.replace("/image/","/yolo/")

label = label.replace(".jpg","_gt.txt")

labels.append(label)

return labels

class LoadImagesAndLabels(Dataset): # for training/testing

def init(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,

cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):

self.img_size = img_size

self.augment = augment

self.hyp = hyp

self.image_weights = image_weights

self.rect = False if image_weights else rect # 开image_weights就不可rect inference

self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) 开rect就不可mosaic

self.mosaic_border = [-img_size // 2, -img_size // 2]

self.stride = stride

self.path = path

try:

f = [] # image files

for p in path if isinstance(path, list) else [path]:

p = Path(p) # os-agnostic

if p.is_dir(): # dir

f += glob.glob(str(p / '**' / '*.*'), recursive=True)

f = list(p.rglob('**/.')) # pathlib

elif p.is_file(): # file

with open(p, 'r') as t:

t = t.read().strip().splitlines()

parent = str(p.parent) + os.sep

f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path

f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)

else:

raise Exception(f'{prefix}{p} does not exist')

self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])

self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats]) # pathlib

assert self.img_files, f'{prefix}No images found'

except Exception as e:

raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {help_url}')

Check cache

self.label_files = gettteodlabels(self.img_files) # labels

#labels11 = gettteodlabels(self.img_files)

cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels

if cache_path.is_file():

cache, exists = torch.load(cache_path), True # load

if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache: # changed

cache, exists = self.cache_labels(cache_path, prefix), False # re-cache

else:

cache, exists = self.cache_labels(cache_path, prefix), False # cache

Display cache

nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupted, total

if exists:

d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"

tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results

assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {help_url}'

Read cache

cache.pop('hash') # remove hash

cache.pop('version') # remove version

labels, shapes, self.segments = zip(*cache.values())

self.labels = list(labels)

self.shapes = np.array(shapes, dtype=np.float64)

self.img_files = list(cache.keys()) # update

self.label_files = img2label_paths(cache.keys()) # update

if single_cls:

for x in self.labels:

x[:, 0] = 0

n = len(shapes) # number of images

bi = np.floor(np.arange(n) / batch_size).astype(np.int32) # batch index

nb = bi[-1] + 1 # number of batches

self.batch = bi # batch index of image

self.n = n

self.indices = range(n)

Rectangular Training

if self.rect:

Sort by aspect ratio

s = self.shapes # wh

ar = s[:, 1] / s[:, 0] # aspect ratio

irect = ar.argsort()

self.img_files = [self.img_files[i] for i in irect]

self.label_files = [self.label_files[i] for i in irect]

self.labels = [self.labels[i] for i in irect]

self.shapes = s[irect] # wh

ar = ar[irect]

Set training image shapes

shapes = [[1, 1]] * nb

for i in range(nb):

ari = ar[bi == i]

mini, maxi = ari.min(), ari.max()

if maxi < 1:

shapes[i] = [maxi, 1]

elif mini > 1:

shapes[i] = [1, 1 / mini]

self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int32) * stride

Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)

self.imgs = [None] * n

if cache_images:

gb = 0 # Gigabytes of cached images

self.img_hw0, self.img_hw = [None] * n, [None] * n

results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads

pbar = tqdm(enumerate(results), total=n)

for i, x in pbar:

self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i)

gb += self.imgs[i].nbytes

pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB)'

pbar.close()

def cache_labels(self, path=Path('./labels.cache'), prefix=''):

Cache dataset labels, check images and read shapes

x = {} # dict

nm, nf, ne, nc = 0, 0, 0, 0 # number missing, found, empty, duplicate

pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))

for i, (im_file, lb_file) in enumerate(pbar):

try:

verify images

im = Image.open(im_file)

im.verify() # PIL verify

shape = exif_size(im) # image size

segments = [] # instance segments

assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'

assert im.format.lower() in img_formats, f'invalid image format {im.format}'

verify labels

if os.path.isfile(lb_file):

nf += 1 # label found

with open(lb_file, 'r') as f:

l = [x.split() for x in f.read().strip().splitlines()]

if any([len(x) > 8 for x in l]): # is segment

classes = np.array([x[0] for x in l], dtype=np.float32)

segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l] # (cls, xy1...)

l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh)

l = np.array(l, dtype=np.float32)

if len(l):

assert l.shape[1] == 5, 'labels require 5 columns each'

assert (l >= 0).all(), 'negative labels'

assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels'

assert np.unique(l, axis=0).shape[0] == l.shape[0], 'duplicate labels'

else:

ne += 1 # label empty

l = np.zeros((0, 5), dtype=np.float32)

else:

nm += 1 # label missing

l = np.zeros((0, 5), dtype=np.float32)

x[im_file] = [l, shape, segments]

except Exception as e:

nc += 1

print(f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}')

pbar.desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels... " \

f"{nf} found, {nm} missing, {ne} empty, {nc} corrupted"

pbar.close()

if nf == 0:

print(f'{prefix}WARNING: No labels found in {path}. See {help_url}')

x['hash'] = get_hash(self.label_files + self.img_files)

x['results'] = nf, nm, ne, nc, i + 1

x['version'] = 0.1 # cache version

torch.save(x, path) # save for next time

logging.info(f'{prefix}New cache created: {path}')

return x

def len(self):

return len(self.img_files)

def iter(self):

self.count = -1

print('ran dataset iter')

#self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)

return self

def getitem(self, index):

index = self.indices[index] # linear, shuffled, or image_weights

hyp = self.hyp

mosaic = self.mosaic and random.random() < hyp['mosaic']

if mosaic:

Load mosaic

img, labels = load_mosaic(self, index)

shapes = None

MixUp https://arxiv.org/pdf/1710.09412.pdf 有mosaic才mixup

if random.random() < hyp['mixup']:

img2, labels2 = load_mosaic(self, random.randint(0, self.n - 1))

r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0

img = (img * r + img2 * (1 - r)).astype(np.uint8)

labels = np.concatenate((labels, labels2), 0)

else:

Load image

img, (h0, w0), (h, w) = load_image(self, index)

Letterbox

shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape

img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)

shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling

labels = self.labels[index].copy()

if labels.size: # normalized xywh to pixel xyxy format

labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])

#yolov5face

labels = []

x = self.labels[index]

if x.size > 0:

Normalized xywh to pixel xyxy format

labels = x.copy()

labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width

labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height

labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]

labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]

#labels[:, 5] = ratio[0] * w * x[:, 5] + pad[0] # pad width

labels[:, 5] = np.array(x[:, 5] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 5] + pad[0]) + (

np.array(x[:, 5] > 0, dtype=np.int32) - 1)

labels[:, 6] = np.array(x[:, 6] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 6] + pad[1]) + (

np.array(x[:, 6] > 0, dtype=np.int32) - 1)

labels[:, 7] = np.array(x[:, 7] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 7] + pad[0]) + (

np.array(x[:, 7] > 0, dtype=np.int32) - 1)

labels[:, 8] = np.array(x[:, 8] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 8] + pad[1]) + (

np.array(x[:, 8] > 0, dtype=np.int32) - 1)

labels[:, 9] = np.array(x[:, 5] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 9] + pad[0]) + (

np.array(x[:, 9] > 0, dtype=np.int32) - 1)

labels[:, 10] = np.array(x[:, 5] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 10] + pad[1]) + (

np.array(x[:, 10] > 0, dtype=np.int32) - 1)

labels[:, 11] = np.array(x[:, 11] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 11] + pad[0]) + (

np.array(x[:, 11] > 0, dtype=np.int32) - 1)

labels[:, 12] = np.array(x[:, 12] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 12] + pad[1]) + (

np.array(x[:, 12] > 0, dtype=np.int32) - 1)

labels[:, 13] = np.array(x[:, 13] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 13] + pad[0]) + (

np.array(x[:, 13] > 0, dtype=np.int32) - 1)

labels[:, 14] = np.array(x[:, 14] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 14] + pad[1]) + (

np.array(x[:, 14] > 0, dtype=np.int32) - 1)

if self.augment:

Augment imagespace

if not mosaic: # 没mosaic才random percpective

img, labels = random_perspective(img, labels,

degrees=hyp['degrees'],

translate=hyp['translate'],

scale=hyp['scale'],

shear=hyp['shear'],

perspective=hyp['perspective'])

Augment colorspace

augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])

Apply cutouts

if random.random() < 0.9:

labels = cutout(img, labels)

nL = len(labels) # number of labels

if nL:

labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh

labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1

labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1

labels[:, [5, 7, 9, 11, 13]] /= img.shape[1] # normalized landmark x 0-1

labels[:, [5, 7, 9, 11, 13]] = np.where(labels[:, [5, 7, 9, 11, 13]] < 0, -1, labels[:, [5, 7, 9, 11, 13]])

labels[:, [6, 8, 10, 12, 14]] /= img.shape[0] # normalized landmark y 0-1

labels[:, [6, 8, 10, 12, 14]] = np.where(labels[:, [6, 8, 10, 12, 14]] < 0, -1, labels[:, [6, 8, 10, 12, 14]])

if self.augment:

flip up-down

if random.random() < hyp['flipud']:

img = np.flipud(img)

if nL:

labels[:, 2] = 1 - labels[:, 2]

labels[:, 6] = np.where(labels[:,6] < 0, -1, 1 - labels[:, 6])

labels[:, 8] = np.where(labels[:, 8] < 0, -1, 1 - labels[:, 8])

labels[:, 10] = np.where(labels[:, 10] < 0, -1, 1 - labels[:, 10])

labels[:, 12] = np.where(labels[:, 12] < 0, -1, 1 - labels[:, 12])

labels[:, 14] = np.where(labels[:, 14] < 0, -1, 1 - labels[:, 14])

flip left-right

if random.random() < hyp['fliplr']:

img = np.fliplr(img)

if nL:

labels[:, 1] = 1 - labels[:, 1]

labels[:, 5] = np.where(labels[:, 5] < 0, -1, 1 - labels[:, 5])

labels[:, 7] = np.where(labels[:, 7] < 0, -1, 1 - labels[:, 7])

labels[:, 9] = np.where(labels[:, 9] < 0, -1, 1 - labels[:, 9])

labels[:, 11] = np.where(labels[:, 11] < 0, -1, 1 - labels[:, 11])

labels[:, 13] = np.where(labels[:, 13] < 0, -1, 1 - labels[:, 13])

#左右镜像的时候，左眼、右眼，　左嘴角、右嘴角无法区分, 应该交换位置，便于网络学习

eye_left = np.copy(labels[:, [5, 6]])

mouth_left = np.copy(labels[:, [11, 12]])

labels[:, [5, 6]] = labels[:, [7, 8]]

labels[:, [7, 8]] = eye_left

labels[:, [11, 12]] = labels[:, [13, 14]]

labels[:, [13, 14]] = mouth_left

labels_out = torch.zeros((nL, 6))

if nL:

labels_out[:, 1:] = torch.from_numpy(labels)

Convert

img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416

img = np.ascontiguousarray(img)

return torch.from_numpy(img), labels_out, self.img_files[index], shapes

@staticmethod

def collate_fn(batch):

img, label, path, shapes = zip(*batch) # transposed

for i, l in enumerate(label):

l[:, 0] = i # add target image index for build_targets()

return torch.stack(img, 0), torch.cat(label, 0), path, shapes

@staticmethod

def collate_fn4(batch):

img, label, path, shapes = zip(*batch) # transposed

n = len(shapes) // 4

img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]

ho = torch.tensor([[0., 0, 0, 1, 0, 0]])

wo = torch.tensor([[0., 0, 1, 0, 0, 0]])

s = torch.tensor([[1, 1, .5, .5, .5, .5]]) # scale

for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW

i *= 4

if random.random() < 0.5:

im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2., mode='bilinear', align_corners=False)[

0].type(img[i].type())

l = label[i]

else:

im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)

l = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s

img4.append(im)

label4.append(l)

for i, l in enumerate(label4):

l[:, 0] = i # add target image index for build_targets()

return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4

class LoadImagesAndLabels_v5face(Dataset): # for training/testing

def init(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,

cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):

self.img_size = img_size

self.augment = augment

self.hyp = hyp

self.image_weights = image_weights

self.rect = False if image_weights else rect # 开image_weights就不可rect inference

self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) 开rect就不可mosaic

self.mosaic_border = [-img_size // 2, -img_size // 2]

self.stride = stride

self.path = path

try:

f = [] # image files

for p in path if isinstance(path, list) else [path]:

p = Path(p) # os-agnostic

if p.is_dir(): # dir

f += glob.glob(str(p / '**' / '*.*'), recursive=True)

f = list(p.rglob('**/.')) # pathlib

elif p.is_file(): # file

with open(p, 'r') as t:

t = t.read().strip().splitlines()

parent = str(p.parent) + os.sep

f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path

f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)

else:

raise Exception(f'{prefix}{p} does not exist')

self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])

self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats]) # pathlib

assert self.img_files, f'{prefix}No images found'

except Exception as e:

raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {help_url}')

Check cache

self.label_files = gettteodlabels(self.img_files) # labels

#labels11 = gettteodlabels(self.img_files)

cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels

if cache_path.is_file():

cache, exists = torch.load(cache_path), True # load

if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache: # changed

cache, exists = self.cache_labels(cache_path, prefix), False # re-cache

else:

cache, exists = self.cache_labels(cache_path, prefix), False # cache

Display cache

nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupted, total

if exists:

d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"

tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results

assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {help_url}'

Read cache

cache.pop('hash') # remove hash

cache.pop('version') # remove version

labels, shapes, self.segments = zip(*cache.values())

self.labels = list(labels)

self.shapes = np.array(shapes, dtype=np.float64)

self.img_files = list(cache.keys()) # update

self.label_files = img2label_paths(cache.keys()) # update

if single_cls:

for x in self.labels:

x[:, 0] = 0

n = len(shapes) # number of images

bi = np.floor(np.arange(n) / batch_size).astype(np.int32) # batch index

nb = bi[-1] + 1 # number of batches

self.batch = bi # batch index of image

self.n = n

self.indices = range(n)

Rectangular Training

if self.rect:

Sort by aspect ratio

s = self.shapes # wh

ar = s[:, 1] / s[:, 0] # aspect ratio

irect = ar.argsort()

self.img_files = [self.img_files[i] for i in irect]

self.label_files = [self.label_files[i] for i in irect]

self.labels = [self.labels[i] for i in irect]

self.shapes = s[irect] # wh

ar = ar[irect]

Set training image shapes

shapes = [[1, 1]] * nb

for i in range(nb):

ari = ar[bi == i]

mini, maxi = ari.min(), ari.max()

if maxi < 1:

shapes[i] = [maxi, 1]

elif mini > 1:

shapes[i] = [1, 1 / mini]

self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int32) * stride

Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)

self.imgs = [None] * n

if cache_images:

gb = 0 # Gigabytes of cached images

self.img_hw0, self.img_hw = [None] * n, [None] * n

results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads

pbar = tqdm(enumerate(results), total=n)

for i, x in pbar:

self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i)

gb += self.imgs[i].nbytes

pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB)'

pbar.close()

def cache_labels(self, path=Path('./labels.cache'), prefix=''):

Cache dataset labels, check images and read shapes

x = {} # dict

nm, nf, ne, nc = 0, 0, 0, 0 # number missing, found, empty, duplicate

pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))

for i, (im_file, lb_file) in enumerate(pbar):

try:

verify images

im = Image.open(im_file)

im.verify() # PIL verify

shape = exif_size(im) # image size

segments = [] # instance segments

assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'

assert im.format.lower() in img_formats, f'invalid image format {im.format}'

verify labels

if os.path.isfile(lb_file):

nf += 1 # label found

with open(lb_file, 'r') as f:

l = [x.split() for x in f.read().strip().splitlines()]

if any([len(x) > 25 for x in l]): # is segment

classes = np.array([x[0] for x in l], dtype=np.float32)

segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l] # (cls, xy1...)

l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh)

l = np.array(l, dtype=np.float32)

if len(l):

assert l.shape[1] == 19, 'labels require 5 columns each'

assert (l >= 0).all(), 'negative labels'

assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels'

assert np.unique(l, axis=0).shape[0] == l.shape[0], 'duplicate labels'

else:

ne += 1 # label empty

l = np.zeros((0, 19), dtype=np.float32)

else:

nm += 1 # label missing

l = np.zeros((0, 19), dtype=np.float32)

x[im_file] = [l, shape, segments]

except Exception as e:

nc += 1

print(f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}')

pbar.desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels... " \

f"{nf} found, {nm} missing, {ne} empty, {nc} corrupted"

pbar.close()

if nf == 0:

print(f'{prefix}WARNING: No labels found in {path}. See {help_url}')

x['hash'] = get_hash(self.label_files + self.img_files)

x['results'] = nf, nm, ne, nc, i + 1

x['version'] = 0.1 # cache version

torch.save(x, path) # save for next time

logging.info(f'{prefix}New cache created: {path}')

return x

def len(self):

return len(self.img_files)

def iter(self):

self.count = -1

print('ran dataset iter')

#self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)

return self

def getitem(self, index):

index = self.indices[index] # linear, shuffled, or image_weights

hyp = self.hyp

mosaic = self.mosaic and random.random() < hyp['mosaic']

h0 = w0 = h = w = None

ratio = (1.0, 1.0)

pad = (0.0, 0.0)

if mosaic:

Load mosaic

img, labels = load_mosaic_face(self, index)

shapes = None

h, w = img.shape[:2]

MixUp https://arxiv.org/pdf/1710.09412.pdf

if random.random() < hyp['mixup']:

img2, labels2 = load_mosaic_face(self, random.randint(0, self.n - 1))

r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0

img = (img * r + img2 * (1 - r)).astype(np.uint8)

labels = np.concatenate((labels, labels2), 0)

else:

Load image

img, (h0, w0), (h, w) = load_image(self, index)

Letterbox

shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape

shape = self.img_size

img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)

shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling

Load labels

labels = []

x = self.labels[index]

if x.size > 0:

Normalized xywh to pixel xyxy format

labels = x.copy()

labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width

labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height

labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]

labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]

#labels[:, 5] = ratio[0] * w * x[:, 5] + pad[0] # pad width

labels[:, 5] = np.array(x[:, 5] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 5] + pad[0]) + (

np.array(x[:, 5] > 0, dtype=np.int32) - 1)

labels[:, 6] = np.array(x[:, 6] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 6] + pad[1]) + (

np.array(x[:, 6] > 0, dtype=np.int32) - 1)

labels[:, 7] = np.array(x[:, 7] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 7] + pad[0]) + (

np.array(x[:, 7] > 0, dtype=np.int32) - 1)

labels[:, 8] = np.array(x[:, 8] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 8] + pad[1]) + (

np.array(x[:, 8] > 0, dtype=np.int32) - 1)

labels[:, 9] = np.array(x[:, 5] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 9] + pad[0]) + (

np.array(x[:, 9] > 0, dtype=np.int32) - 1)

if 1:

z = np.array(x[:, 5] > 0, dtype=np.int32)

h = (ratio[0] * w * x[:, 9] + pad[0])

k = (np.array(x[:, 9] > 0, dtype=np.int32) - 1)

zh = z * h + k

labels[:, 10] = np.array(x[:, 5] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 10] + pad[1]) + (

np.array(x[:, 10] > 0, dtype=np.int32) - 1)

labels[:, 11] = np.array(x[:, 11] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 11] + pad[0]) + (

np.array(x[:, 11] > 0, dtype=np.int32) - 1)

labels[:, 12] = np.array(x[:, 12] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 12] + pad[1]) + (

np.array(x[:, 12] > 0, dtype=np.int32) - 1)

labels[:, 13] = np.array(x[:, 13] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 13] + pad[0]) + (

np.array(x[:, 13] > 0, dtype=np.int32) - 1)

labels[:, 14] = np.array(x[:, 14] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 14] + pad[1]) + (

np.array(x[:, 14] > 0, dtype=np.int32) - 1)

labels[:, 13] = np.array(x[:, 13], dtype=np.int32)

labels[:, 14] = np.array(x[:, 14], dtype=np.int32)

labels[:, 15] = np.array(x[:, 15], dtype=np.int32)

labels[:, 16] = np.array(x[:, 16], dtype=np.int32)

labels[:, 17] = np.array(x[:, 17], dtype=np.int32)

labels[:, 18] = np.array(x[:, 18], dtype=np.int32)

MixUp https://arxiv.org/pdf/1710.09412.pdf

if self.augment:

if random.random() < hyp['mixup']:

index1 = random.randint(1,len(self.imgs) - 1)

img2, labels2 = load_nomer_image(self,index1)

if len(labels2)!=0 and len(labels)!=0:

r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0

img = (img * r + img2 * (1 - r)).astype(np.uint8)

labels = np.concatenate((labels, labels2), 0)

if (self.augment or MIRROR_FOLD_APPLY_TO_VAL) and should_apply_mirror_fold():

if mosaic:

car_box = get_car_box_with_fallback(w, h, img)

else:

car_box = _mirror_fold_box_for_letterbox((h0, w0), (h, w), ratio, pad)

if car_box is None:

car_box = get_car_box_with_fallback(img.shape[1], img.shape[0], img)

if car_box is not None:

img, labels = _apply_mirror_fold_det(img, labels, car_box)

if self.augment:

Augment imagespace

if not mosaic:

img, labels = random_perspective_facev5(img, labels,

degrees=hyp['degrees'],

translate=hyp['translate'],

scale=hyp['scale'],

shear=hyp['shear'],

perspective=hyp['perspective'])

Augment colorspace

augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])

Apply cutouts

if random.random() < 0.9:

labels = cutout(img, labels)

debug_path = get_debug_save_path("det", self.img_files[index], "jpg") if (

self.augment or (MIRROR_FOLD_APPLY_TO_VAL and MIRROR_FOLD_DEBUG_APPLY_TO_VAL)

) else None

if debug_path:

debug_img = _draw_det_labels_xyxy(img, labels)

cv2.imwrite(debug_path, debug_img)

nL = len(labels) # number of labels

if nL:

labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh

labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1

labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1

labels[:, [5, 7, 9, 11]] /= img.shape[1] # normalized landmark x 0-1

labels[:, [5, 7, 9, 11]] = np.where(labels[:, [5, 7, 9, 11]] < 0, -1, labels[:, [5, 7, 9, 11]])

labels[:, [6, 8, 10, 12]] /= img.shape[0] # normalized landmark y 0-1

labels[:, [6, 8, 10, 12]] = np.where(labels[:, [6, 8, 10, 12]] < 0, -1, labels[:, [6, 8, 10, 12]])

if self.augment:

# flip up-down

if random.random() < hyp['flipud']:

img = np.flipud(img)

if nL:

labels[:, 2] = 1 - labels[:, 2]

labels[:, 6] = np.where(labels[:,6] < 0, -1, 1 - labels[:, 6])

labels[:, 8] = np.where(labels[:, 8] < 0, -1, 1 - labels[:, 8])

labels[:, 10] = np.where(labels[:, 10] < 0, -1, 1 - labels[:, 10])

labels[:, 12] = np.where(labels[:, 12] < 0, -1, 1 - labels[:, 12])

labels[:, 14] = np.where(labels[:, 14] < 0, -1, 1 - labels[:, 14])

flip left-right

if random.random() < hyp['fliplr']:

img = np.fliplr(img)

if nL:

labels[:, 1] = 1 - labels[:, 1]

labels[:, 5] = np.where(labels[:, 5] < 0, -1, 1 - labels[:, 5])

labels[:, 7] = np.where(labels[:, 7] < 0, -1, 1 - labels[:, 7])

labels[:, 9] = np.where(labels[:, 9] < 0, -1, 1 - labels[:, 9])

labels[:, 11] = np.where(labels[:, 11] < 0, -1, 1 - labels[:, 11])

# labels[:, 13] = np.where(labels[:, 13] < 0, -1, 1 - labels[:, 13])

#左右镜像的时候，左眼、右眼，　左嘴角、右嘴角无法区分, 应该交换位置，便于网络学习

eye_left = np.copy(labels[:, [5, 6]])

mouth_left = np.copy(labels[:, [11, 12]])

labels[:, [5, 6]] = labels[:, [7, 8]]

labels[:, [7, 8]] = eye_left

labels[:, [11, 12]] = labels[:, [13, 14]]

labels[:, [13, 14]] = mouth_left

labels_out = torch.zeros((nL, 20))

if nL:

labels_out[:, 1:] = torch.from_numpy(labels)

showlabels(img, labels[:, 1:5], labels[:, 5:13])

Convert

img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416

img = np.ascontiguousarray(img)

#print(index, ' --- labels_out: ', labels_out)

#if nL:

#print( ' : landmarks : ', torch.max(labels_out[:, 5:15]), ' --- ', torch.min(labels_out[:, 5:15]))

return torch.from_numpy(img), labels_out, self.img_files[index], shapes

@staticmethod

def collate_fn(batch):

img, label, path, shapes = zip(*batch) # transposed

for i, l in enumerate(label):

l[:, 0] = i # add target image index for build_targets()

return torch.stack(img, 0), torch.cat(label, 0), path, shapes

@staticmethod

def collate_fn4(batch):

img, label, path, shapes = zip(*batch) # transposed

n = len(shapes) // 4

img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]

ho = torch.tensor([[0., 0, 0, 1, 0, 0]])

wo = torch.tensor([[0., 0, 1, 0, 0, 0]])

s = torch.tensor([[1, 1, .5, .5, .5, .5]]) # scale

for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW

i *= 4

if random.random() < 0.5:

im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2., mode='bilinear', align_corners=False)[

0].type(img[i].type())

l = label[i]

else:

im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)

l = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s

img4.append(im)

label4.append(l)

for i, l in enumerate(label4):

l[:, 0] = i # add target image index for build_targets()

return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4

Ancillary functions --------------------------------------------------------------------------------------------------

def load_image(self, index):

loads 1 image from dataset, returns img, original hw, resized hw

img = self.imgs[index]

if img is None: # not cached

path = self.img_files[index]

img = cv2.imread(path) # BGR

assert img is not None, 'Image Not Found ' + path

h0, w0 = img.shape[:2] # orig hw

r = self.img_size / max(h0, w0) # resize image to img_size

if r != 1: # always resize down, only resize up if training with augmentation

interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR

img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)

return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized

else:

return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized

def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):

r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains

hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))

dtype = img.dtype # uint8

x = np.arange(0, 256, dtype=np.int16)

lut_hue = ((x * r[0]) % 180).astype(dtype)

lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)

lut_val = np.clip(x * r[2], 0, 255).astype(dtype)

img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)

cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed

def hist_equalize(img, clahe=True, bgr=False):

Equalize histogram on BGR image 'img' with img.shape(n,m,3) and range 0-255

yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)

if clahe:

c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))

yuv[:, :, 0] = c.apply(yuv[:, :, 0])

else:

yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram

return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB

def load_mosaic(self, index):

loads images in a 4-mosaic

labels4, segments4 = [], []

s = self.img_size

yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y

indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices

for i, index in enumerate(indices):

Load image

img, _, (h, w) = load_image(self, index)

place img in img4

if i == 0: # top left

img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles

x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)

x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)

elif i == 1: # top right

x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc

x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h

elif i == 2: # bottom left

x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)

x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)

elif i == 3: # bottom right

x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)

x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]

padw = x1a - x1b

padh = y1a - y1b

Labels

labels, segments = self.labels[index].copy(), self.segments[index].copy()

if labels.size:

labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format

segments = [xyn2xy(x, w, h, padw, padh) for x in segments]

labels4.append(labels)

segments4.extend(segments)

Concat/clip labels

labels4 = np.concatenate(labels4, 0)

for x in (labels4[:, 1:], *segments4):

np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()

img4, labels4 = replicate(img4, labels4) # replicate

Augment

img4, labels4 = random_perspective(img4, labels4, segments4,

degrees=self.hyp['degrees'],

translate=self.hyp['translate'],

scale=self.hyp['scale'],

shear=self.hyp['shear'],

perspective=self.hyp['perspective'],

border=self.mosaic_border) # border to remove

return img4, labels4

def load_mosaic9(self, index):

loads images in a 9-mosaic

labels9, segments9 = [], []

s = self.img_size

indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices

for i, index in enumerate(indices):

Load image

img, _, (h, w) = load_image(self, index)

place img in img9

if i == 0: # center

img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles

h0, w0 = h, w

c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates

elif i == 1: # top

c = s, s - h, s + w, s

elif i == 2: # top right

c = s + wp, s - h, s + wp + w, s

elif i == 3: # right

c = s + w0, s, s + w0 + w, s + h

elif i == 4: # bottom right

c = s + w0, s + hp, s + w0 + w, s + hp + h

elif i == 5: # bottom

c = s + w0 - w, s + h0, s + w0, s + h0 + h

elif i == 6: # bottom left

c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h

elif i == 7: # left

c = s - w, s + h0 - h, s, s + h0

elif i == 8: # top left

c = s - w, s + h0 - hp - h, s, s + h0 - hp

padx, pady = c[:2]

x1, y1, x2, y2 = [max(x, 0) for x in c] # allocate coords

Labels

labels, segments = self.labels[index].copy(), self.segments[index].copy()

if labels.size:

labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format

segments = [xyn2xy(x, w, h, padx, pady) for x in segments]

labels9.append(labels)

segments9.extend(segments)

Image

img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax]

hp, wp = h, w # height, width previous

Offset

yc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border] # mosaic center x, y

img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]

Concat/clip labels

labels9 = np.concatenate(labels9, 0)

labels9[:, [1, 3]] -= xc

labels9[:, [2, 4]] -= yc

c = np.array([xc, yc]) # centers

segments9 = [x - c for x in segments9]

for x in (labels9[:, 1:], *segments9):

np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()

img9, labels9 = replicate(img9, labels9) # replicate

Augment

img9, labels9 = random_perspective(img9, labels9, segments9,

degrees=self.hyp['degrees'],

translate=self.hyp['translate'],

scale=self.hyp['scale'],

shear=self.hyp['shear'],

perspective=self.hyp['perspective'],

border=self.mosaic_border) # border to remove

return img9, labels9

def replicate(img, labels):

Replicate labels

h, w = img.shape[:2]

boxes = labels[:, 1:].astype(int)

x1, y1, x2, y2 = boxes.T

s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)

for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices

x1b, y1b, x2b, y2b = boxes[i]

bh, bw = y2b - y1b, x2b - x1b

yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y

x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]

img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]

labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)

return img, labels

def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):

Resize and pad image while meeting stride-multiple constraints

shape = img.shape[:2] # current shape [height, width]

if isinstance(new_shape, int):

new_shape = (new_shape, new_shape)

Scale ratio (new / old)

r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])

if not scaleup: # only scale down, do not scale up (for better test mAP)

r = min(r, 1.0)

Compute padding

ratio = r, r # width, height ratios

new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))

dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding

if auto: # minimum rectangle

dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding

elif scaleFill: # stretch

dw, dh = 0.0, 0.0

new_unpad = (new_shape[1], new_shape[0])

ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios

dw /= 2 # divide padding into 2 sides

dh /= 2

if shape[::-1] != new_unpad: # resize

img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)

top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))

left, right = int(round(dw - 0.1)), int(round(dw + 0.1))

img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border

return img, ratio, (dw, dh)

def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,

border=(0, 0)):

torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))

targets = [cls, xyxy]

height = img.shape[0] + border[0] * 2 # shape(h,w,c)

width = img.shape[1] + border[1] * 2

Center

C = np.eye(3)

C[0, 2] = -img.shape[1] / 2 # x translation (pixels)

C[1, 2] = -img.shape[0] / 2 # y translation (pixels)

Perspective

P = np.eye(3)

P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)

P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)

Rotation and Scale

R = np.eye(3)

a = random.uniform(-degrees, degrees)

a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations

s = random.uniform(1 - scale, 1 + scale)

s = 2 ** random.uniform(-scale, scale)

R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)

Shear

S = np.eye(3)

S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)

S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)

Translation

T = np.eye(3)

T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)

T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)

Combined rotation matrix

M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT

if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed

if perspective:

img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))

else: # affine

img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))

Visualize

import matplotlib.pyplot as plt

ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()

ax[0].imshow(img[:, :, ::-1]) # base

ax[1].imshow(img2[:, :, ::-1]) # warped

Transform label coordinates

n = len(targets)

if n:

use_segments = any(x.any() for x in segments)

new = np.zeros((n, 4))

if use_segments: # warp segments

segments = resample_segments(segments) # upsample

for i, segment in enumerate(segments):

xy = np.ones((len(segment), 3))

xy[:, :2] = segment

xy = xy @ M.T # transform

xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine

clip

new[i] = segment2box(xy, width, height)

else: # warp boxes

xy = np.ones((n * 4, 3))

xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1

xy = xy @ M.T # transform

xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine

create new boxes

x = xy[:, [0, 2, 4, 6]]

y = xy[:, [1, 3, 5, 7]]

new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T

clip

new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)

new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)

filter candidates

i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)

targets = targets[i]

targets[:, 1:5] = new[i]

return img, targets

def random_perspective_facev5(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)):

torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))

targets = [cls, xyxy]

height = img.shape[0] + border[0] * 2 # shape(h,w,c)

width = img.shape[1] + border[1] * 2

Center

C = np.eye(3)

C[0, 2] = -img.shape[1] / 2 # x translation (pixels)

C[1, 2] = -img.shape[0] / 2 # y translation (pixels)

Perspective

P = np.eye(3)

P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)

P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)

Rotation and Scale

R = np.eye(3)

a = random.uniform(-degrees, degrees)

a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations

s = random.uniform(1 - scale, 1 + scale)

s = 2 ** random.uniform(-scale, scale)

R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)

Shear

S = np.eye(3)

S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)

S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)

Translation

T = np.eye(3)

T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)

T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)

Combined rotation matrix

M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT

if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed

if perspective:

img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))

else: # affine

img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))

Visualize

import matplotlib.pyplot as plt

ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()

ax[0].imshow(img[:, :, ::-1]) # base

ax[1].imshow(img2[:, :, ::-1]) # warped

Transform label coordinates

n = len(targets)

if n:

warp points

#xy = np.ones((n * 4, 3))

xy = np.ones((n * 8, 3))

xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2, 5, 6, 7, 8, 9, 10, 11, 12]].reshape(n * 8, 2) # x1y1, x2y2, x1y2, x2y1

xy = xy @ M.T # transform

if perspective:

xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 16) # rescale

else: # affine

xy = xy[:, :2].reshape(n, 16)

create new boxes

x = xy[:, [0, 2, 4, 6]]

y = xy[:, [1, 3, 5, 7]]

landmarks = xy[:, [8, 9, 10, 11, 12, 13, 14, 15]]

mask = np.array(targets[:, 5:] > 0, dtype=np.int32) #因为targets后面还跟了5个属性值

mask = np.array(targets[:, 5:13] > 0, dtype=np.int32)

landmarks = landmarks * mask

landmarks = landmarks + mask - 1

landmarks = np.where(landmarks < 0, -1, landmarks)

landmarks[:, [0, 2, 4, 6]] = np.where(landmarks[:, [0, 2, 4, 6]] > width, -1, landmarks[:, [0, 2, 4, 6]])

landmarks[:, [1, 3, 5, 7]] = np.where(landmarks[:, [1, 3, 5, 7]] > height, -1,landmarks[:, [1, 3, 5, 7]])

landmarks[:, 0] = np.where(landmarks[:, 1] == -1, -1, landmarks[:, 0])

landmarks[:, 1] = np.where(landmarks[:, 0] == -1, -1, landmarks[:, 1])

landmarks[:, 2] = np.where(landmarks[:, 3] == -1, -1, landmarks[:, 2])

landmarks[:, 3] = np.where(landmarks[:, 2] == -1, -1, landmarks[:, 3])

landmarks[:, 4] = np.where(landmarks[:, 5] == -1, -1, landmarks[:, 4])

landmarks[:, 5] = np.where(landmarks[:, 4] == -1, -1, landmarks[:, 5])

landmarks[:, 6] = np.where(landmarks[:, 7] == -1, -1, landmarks[:, 6])

landmarks[:, 7] = np.where(landmarks[:, 6] == -1, -1, landmarks[:, 7])

landmarks[:, 8] = np.where(landmarks[:, 9] == -1, -1, landmarks[:, 8])

landmarks[:, 9] = np.where(landmarks[:, 8] == -1, -1, landmarks[:, 9])

targets[:,5:13] = landmarks

xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T

# apply angle-based reduction of bounding boxes

radians = a * math.pi / 180

reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5

x = (xy[:, 2] + xy[:, 0]) / 2

y = (xy[:, 3] + xy[:, 1]) / 2

w = (xy[:, 2] - xy[:, 0]) * reduction

h = (xy[:, 3] - xy[:, 1]) * reduction

xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T

clip boxes

xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)

xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)

filter candidates

i = box_candidates(box1=targets[:, 1:5].T * s, box2=xy.T)

targets = targets[i]

targets[:, 1:5] = xy[i]

return img, targets

def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)

Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio

w1, h1 = box1[2] - box1[0], box1[3] - box1[1]

w2, h2 = box2[2] - box2[0], box2[3] - box2[1]

ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio

return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates

def cutout(image, labels):

Applies image cutout augmentation https://arxiv.org/abs/1708.04552

h, w = image.shape[:2]

def bbox_ioa(box1, box2):

Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2

box2 = box2.transpose()

Get the coordinates of bounding boxes

b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]

b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]

Intersection area

inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \

(np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)

box2 area

box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16

Intersection over box2 area

return inter_area / box2_area

create random masks

scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction

for s in scales:

mask_h = random.randint(1, int(h * s))

mask_w = random.randint(1, int(w * s))

box

xmin = max(0, random.randint(0, w) - mask_w // 2)

ymin = max(0, random.randint(0, h) - mask_h // 2)

xmax = min(w, xmin + mask_w)

ymax = min(h, ymin + mask_h)

apply random color mask

image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]

return unobscured labels

if len(labels) and s > 0.03:

box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)

ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area

labels = labels[ioa < 0.60] # remove >60% obscured labels

return labels

def create_folder(path='./new'):

Create folder

if os.path.exists(path):

shutil.rmtree(path) # delete output folder

os.makedirs(path) # make new output folder

def flatten_recursive(path='../coco128'):

Flatten a recursive directory by bringing all files to top level

new_path = Path(path + '_flat')

create_folder(new_path)

for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):

shutil.copyfile(file, new_path / Path(file).name)

def extract_boxes(path='../coco128/'): # from utils.datasets import *; extract_boxes('../coco128')

Convert detection dataset into classification dataset, with one directory per class

path = Path(path) # images dir

shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing

files = list(path.rglob('*.*'))

n = len(files) # number of files

for im_file in tqdm(files, total=n):

if im_file.suffix[1:] in img_formats:

image

im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB

h, w = im.shape[:2]

labels

lb_file = Path(img2label_paths([str(im_file)])[0])

if Path(lb_file).exists():

with open(lb_file, 'r') as f:

lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels

for j, x in enumerate(lb):

c = int(x[0]) # class

f = (path / 'classifier') / f'{c}' / f'{path.stem}{im_file.stem}{j}.jpg' # new filename

if not f.parent.is_dir():

f.parent.mkdir(parents=True)

b = x[1:] * [w, h, w, h] # box

b[2:] = b[2:].max() # rectangle to square

b[2:] = b[2:] * 1.2 + 3 # pad

b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int32)

b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image

b[[1, 3]] = np.clip(b[[1, 3]], 0, h)

assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'

def autosplit(path='../coco128', weights=(0.9, 0.1, 0.0), annotated_only=False):

""" Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files

Usage: from utils.datasets import *; autosplit('../coco128')

Arguments

path: Path to images directory

weights: Train, val, test weights (list)

annotated_only: Only use images with an annotated txt file

"""

path = Path(path) # images dir

files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in img_formats], []) # image files only

n = len(files) # number of files

indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split

txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files

(path / x).unlink() for x in txt if (path / x).exists()\] # remove existing print(f'Autosplitting images from {path}' + ', using \*.txt labeled images only' \* annotated_only) for i, img in tqdm(zip(indices, files), total=n): if not annotated_only or Path(img2label_paths(\[str(img)\])\[0\]).exists(): # check label with open(path / txt\[i\], 'a') as f: f.write(str(img) + '\\n') # add image to txt file def load_mosaic_face(self, index): # loads images in a mosaic labels4 = \[

s = self.img_size

yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y

indices = [index] + [self.indices[random.randint(0, self.n - 1)] for _ in range(3)] # 3 additional image indices

for i, index in enumerate(indices):

Load image

img, _, (h, w) = load_image(self, index)

place img in img4

if i == 0: # top left

img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles

x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)

x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)

elif i == 1: # top right

x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc

x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h

elif i == 2: # bottom left

x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)

x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)

elif i == 3: # bottom right

x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)

x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]

padw = x1a - x1b

padh = y1a - y1b

Labels

x = self.labels[index]

labels = x.copy()

if x.size > 0: # Normalized xywh to pixel xyxy format

#box, x1,y1,x2,y2

labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw

labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh

labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw

labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh

#10 landmarks

labels[:, 5] = np.array(x[:, 5] > 0, dtype=np.int32) * (w * x[:, 5] + padw) + (np.array(x[:, 5] > 0, dtype=np.int32) - 1)

labels[:, 6] = np.array(x[:, 6] > 0, dtype=np.int32) * (h * x[:, 6] + padh) + (np.array(x[:, 6] > 0, dtype=np.int32) - 1)

labels[:, 7] = np.array(x[:, 7] > 0, dtype=np.int32) * (w * x[:, 7] + padw) + (np.array(x[:, 7] > 0, dtype=np.int32) - 1)

labels[:, 8] = np.array(x[:, 8] > 0, dtype=np.int32) * (h * x[:, 8] + padh) + (np.array(x[:, 8] > 0, dtype=np.int32) - 1)

labels[:, 9] = np.array(x[:, 9] > 0, dtype=np.int32) * (w * x[:, 9] + padw) + (np.array(x[:, 9] > 0, dtype=np.int32) - 1)

labels[:, 10] = np.array(x[:, 10] > 0, dtype=np.int32) * (h * x[:, 10] + padh) + (np.array(x[:, 10] > 0, dtype=np.int32) - 1)

labels[:, 11] = np.array(x[:, 11] > 0, dtype=np.int32) * (w * x[:, 11] + padw) + (np.array(x[:, 11] > 0, dtype=np.int32) - 1)

labels[:, 12] = np.array(x[:, 12] > 0, dtype=np.int32) * (h * x[:, 12] + padh) + (np.array(x[:, 12] > 0, dtype=np.int32) - 1)

labels[:, 13] = np.array(x[:, 13] > 0, dtype=np.int32) * (w * x[:, 13] + padw) + (np.array(x[:, 13] > 0, dtype=np.int32) - 1)

labels[:, 14] = np.array(x[:, 14] > 0, dtype=np.int32) * (h * x[:, 14] + padh) + (np.array(x[:, 14] > 0, dtype=np.int32) - 1)

labels4.append(labels)

Concat/clip labels

if len(labels4):

labels4 = np.concatenate(labels4, 0)

np.clip(labels4[:, 1:5], 0, 2 * s, out=labels4[:, 1:5]) # use with random_perspective

img4, labels4 = replicate(img4, labels4) # replicate

#landmarks

labels4[:, 5:] = np.where(labels4[:, 5:] < 0, -1, labels4[:, 5:])

labels4[:, 5:] = np.where(labels4[:, 5:] > 2 * s, -1, labels4[:, 5:])

labels4[:, 5] = np.where(labels4[:, 6] == -1, -1, labels4[:, 5])

labels4[:, 6] = np.where(labels4[:, 5] == -1, -1, labels4[:, 6])

labels4[:, 7] = np.where(labels4[:, 8] == -1, -1, labels4[:, 7])

labels4[:, 8] = np.where(labels4[:, 7] == -1, -1, labels4[:, 8])

labels4[:, 9] = np.where(labels4[:, 10] == -1, -1, labels4[:, 9])

labels4[:, 10] = np.where(labels4[:, 9] == -1, -1, labels4[:, 10])

labels4[:, 11] = np.where(labels4[:, 12] == -1, -1, labels4[:, 11])

labels4[:, 12] = np.where(labels4[:, 11] == -1, -1, labels4[:, 12])

labels4[:, 13] = np.where(labels4[:, 14] == -1, -1, labels4[:, 13])

labels4[:, 14] = np.where(labels4[:, 13] == -1, -1, labels4[:, 14])

Augment

img4, labels4 = random_perspective(img4, labels4,

degrees=self.hyp['degrees'],

translate=self.hyp['translate'],

scale=self.hyp['scale'],

shear=self.hyp['shear'],

perspective=self.hyp['perspective'],

border=self.mosaic_border) # border to remove

return img4, labels4

def load_nomer_image(self,index):

img, (h0, w0), (h, w) = load_image(self, index)

shape = self.img_size

img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)

shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling

Load labels

labels = []

x = self.labels[index]

if x.size > 0:

Normalized xywh to pixel xyxy format

labels = x.copy()

labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width

labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height

labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]

labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]

#labels[:, 5] = ratio[0] * w * x[:, 5] + pad[0] # pad width

labels[:, 5] = np.array(x[:, 5] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 5] + pad[0]) + (

np.array(x[:, 5] > 0, dtype=np.int32) - 1)

labels[:, 6] = np.array(x[:, 6] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 6] + pad[1]) + (

np.array(x[:, 6] > 0, dtype=np.int32) - 1)

labels[:, 7] = np.array(x[:, 7] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 7] + pad[0]) + (

np.array(x[:, 7] > 0, dtype=np.int32) - 1)

labels[:, 8] = np.array(x[:, 8] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 8] + pad[1]) + (

np.array(x[:, 8] > 0, dtype=np.int32) - 1)

labels[:, 9] = np.array(x[:, 5] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 9] + pad[0]) + (

np.array(x[:, 9] > 0, dtype=np.int32) - 1)

labels[:, 10] = np.array(x[:, 5] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 10] + pad[1]) + (

np.array(x[:, 10] > 0, dtype=np.int32) - 1)

labels[:, 11] = np.array(x[:, 11] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 11] + pad[0]) + (

np.array(x[:, 11] > 0, dtype=np.int32) - 1)

labels[:, 12] = np.array(x[:, 12] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 12] + pad[1]) + (

np.array(x[:, 12] > 0, dtype=np.int32) - 1)

return img , labels

dataset.py_0224_cursor

Dataset utils and dataloaders

Parameters

Get orientation exif tag

Returns a single hash value of a list of files

Returns exif-corrected PIL size

cv2.waitKey(0)

Make sure only the first process in DDP process the dataset first, and the following others can use the cache

Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader()

假设有7个类别，将类别1的样本分配更高的权重

class_weights = [1.0, 1.0, 1.0, 5.0, 1.0, 1.0, 1.0]

sampler = WeightedRandomSampler(class_weights, num_samples=len(dataset), replacement=True)

假设有7个类别，我们需要保证类别1在每个batch中出现

class_indices = []

for i, (images, label,index,shapes) in enumerate(dataset):

if index == "sss":

class_indices.append(i)

batch_size = batch_size

num_samples_per_class = 1

def get_batch():

# 从类别1对应的索引中随机抽取1个样本

indices = np.random.choice(class_indices, size=num_samples_per_class, replace=True)

# 从其它类别对应的索引中随机抽取(batch_size-1)个样本

other_indices = [i for i in range(len(dataset)) if i not in class_indices]

other_indices = np.random.choice(other_indices, size=batch_size-num_samples_per_class, replace=False)

# 将indices和other_indices组合成一个batch

batch_indices = np.concatenate((indices, other_indices))

# 将batch_indices随机打乱

np.random.shuffle(batch_indices)

return batch_indices

sampler = RandomSampler(dataset)

将样本按照类别分组

对每个类别进行采样，使得每个batch中的样本都包含相同数量的类别

优先按四角点判定：若任一有效角点落入粉色区则剔除；

若角点不可用，则退化为 bbox 与粉色区有交集就剔除。

labels[:, 5:13] = (x1,y1,x2,y2,x3,y3,x4,y4) 角点，<0 视为无效

有效点按标注顺序连线，展示车位四角

Read video

Read image

print("crop image to 1088 960")

img = img0

img = cv2.copyMakeBorder(img, 2, 2, 8, 8, cv2.BORDER_CONSTANT, value=(0,0,0)) # add border

img = cv2.resize(img0, (544,480), interpolation=cv2.INTER_LINEAR)

Padded resize

Convert

pipe = 'rtsp://192.168.1.64/1' # IP camera

pipe = 'rtsp://username:password@192.168.1.64/1' # IP camera with login

pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg' # IP golf camera

Read frame

Print

Padded resize

Convert

Start the thread to read frames from the video stream

check for common shapes

Read next stream frame in a daemon thread

_, self.imgs[index] = cap.read()

Letterbox

Stack

Convert

Define label paths as a function of image paths

f = list(p.rglob('**/*.*')) # pathlib

f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)

self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats]) # pathlib

Check cache

Display cache

Read cache

Rectangular Training

Sort by aspect ratio

Set training image shapes

Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)

Cache dataset labels, check images and read shapes

verify images

verify labels

def iter(self):

self.count = -1

print('ran dataset iter')

#self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)

return self

Load mosaic

MixUp https://arxiv.org/pdf/1710.09412.pdf 有mosaic才mixup

f = list(p.rglob('**/.')) # pathlib

f = list(p.rglob('**/.')) # pathlib