项目参考AAAI Association for the Advancement of Artificial Intelligence
因此,本研究提出了一种新的掌纹ROI区域分割系统,该系统融合了分布移位卷积(DSConv)和YOLO(You Only Look Once)算法。DSConv是一种新型的卷积操作,它可以在保持计算效率的同时,提高模型对细节信息的感知能力。而YOLO算法是一种实时目标检测算法,它可以快速准确地定位和识别图像中的目标。
(2)打开eiseg并选择“Open Dir”来选择你的图片目录。
import contextlib
import json
import cv2
import pandas as pd
from PIL import Image
from collections import defaultdict
from utils import *
# Convert INFOLKS JSON file into YOLO-format labels ----------------------------
def convert_infolks_json(name, files, img_path):
# Create folders
path = make_dirs()
# Import json
data = []
for file in glob.glob(files):
with open(file) as f:
jdata = json.load(f)
jdata['json_file'] = file
# Write images and shapes
name = path + os.sep + name
file_id, file_name, wh, cat = [], [], [], []
for x in tqdm(data, desc='Files and Shapes'):
f = glob.glob(img_path + Path(x['json_file']).stem + '.*')[0]
wh.append(exif_size(Image.open(f))) # (width, height)
cat.extend(a['classTitle'].lower() for a in x['output']['objects']) # categories
# filename
with open(name + '.txt', 'a') as file:
file.write('%s\n' % f)
# Write *.names file
names = sorted(np.unique(cat))
# names.pop(names.index('Missing product')) # remove
with open(name + '.names', 'a') as file:
[file.write('%s\n' % a) for a in names]
# Write labels file
for i, x in enumerate(tqdm(data, desc='Annotations')):
label_name = Path(file_name[i]).stem + '.txt'
with open(path + '/labels/' + label_name, 'a') as file:
for a in x['output']['objects']:
# if a['classTitle'] == 'Missing product':
# continue # skip
category_id = names.index(a['classTitle'].lower())
# The INFOLKS bounding box format is [x-min, y-min, x-max, y-max]
box = np.array(a['points']['exterior'], dtype=np.float32).ravel()
box[[0, 2]] /= wh[i][0] # normalize x by width
box[[1, 3]] /= wh[i][1] # normalize y by height
box = [box[[0, 2]].mean(), box[[1, 3]].mean(), box[2] - box[0], box[3] - box[1]] # xywh
if (box[2] > 0.) and (box[3] > 0.): # if w > 0 and h > 0
file.write('%g %.6f %.6f %.6f %.6f\n' % (category_id, *box))
# Split data into train, test, and validate files
split_files(name, file_name)
write_data_data(name + '.data', nc=len(names))
print(f'Done. Output saved to {os.getcwd() + os.sep + path}')
# Convert vott JSON file into YOLO-format labels -------------------------------
def convert_vott_json(name, files, img_path):
# Create folders
path = make_dirs()
name = path + os.sep + name
# Import json
data = []
for file in glob.glob(files):
with open(file) as f:
jdata = json.load(f)
jdata['json_file'] = file
# Get all categories
file_name, wh, cat = [], [], []
for i, x in enumerate(tqdm(data, desc='Files and Shapes')):
with contextlib.suppress(Exception):
cat.extend(a['tags'][0] for a in x['regions']) # categories
# Write *.names file
names = sorted(pd.unique(cat))
with open(name + '.names', 'a') as file:
[file.write('%s\n' % a) for a in names]
# Write labels file
n1, n2 = 0, 0
missing_images = []
for i, x in enumerate(tqdm(data, desc='Annotations')):
f = glob.glob(img_path + x['asset']['name'] + '.jpg')
if len(f):
f = f[0]
wh = exif_size(Image.open(f)) # (width, height)
n1 += 1
if (len(f) > 0) and (wh[0] > 0) and (wh[1] > 0):
n2 += 1
# append filename to list
with open(name + '.txt', 'a') as file:
file.write('%s\n' % f)
# write labelsfile
label_name = Path(f).stem + '.txt'
with open(path + '/labels/' + label_name, 'a') as file:
for a in x['regions']:
category_id = names.index(a['tags'][0])
# The INFOLKS bounding box format is [x-min, y-min, x-max, y-max]
box = a['boundingBox']
box = np.array([box['left'], box['top'], box['width'], box['height']]).ravel()
box[[0, 2]] /= wh[0] # normalize x by width
box[[1, 3]] /= wh[1] # normalize y by height
box = [box[0] + box[2] / 2, box[1] + box[3] / 2, box[2], box[3]] # xywh
if (box[2] > 0.) and (box[3] > 0.): # if w > 0 and h > 0
file.write('%g %.6f %.6f %.6f %.6f\n' % (category_id, *box))
print('Attempted %g json imports, found %g images, imported %g annotations successfully' % (i, n1, n2))
if len(missing_images):
print('WARNING, missing images:', missing_images)
# Split data into train, test, and validate files
split_files(name, file_name)
print(f'Done. Output saved to {os.getcwd() + os.sep + path}')
# Convert ath JSON file into YOLO-format labels --------------------------------
def convert_ath_json(json_dir): # dir contains json annotations and images
# Create folders
dir = make_dirs() # output directory
jsons = []
for dirpath, dirnames, filenames in os.walk(json_dir):
os.path.join(dirpath, filename)
for filename in [
f for f in filenames if f.lower().endswith('.json')
# Import json
n1, n2, n3 = 0, 0, 0
missing_images, file_name = [], []
for json_file in sorted(jsons):
with open(json_file) as f:
data = json.load(f)
# # Get classes
# try:
# classes = list(data['_via_attributes']['region']['class']['options'].values()) # classes
# except:
# classes = list(data['_via_attributes']['region']['Class']['options'].values()) # classes
# # Write *.names file
# names = pd.unique(classes) # preserves sort order
# with open(dir + 'data.names', 'w') as f:
# [f.write('%s\n' % a) for a in names]
# Write labels file
for x in tqdm(data['_via_img_metadata'].values(), desc=f'Processing {json_file}'):
image_file = str(Path(json_file).parent / x['filename'])
f = glob.glob(image_file) # image file
if len(f):
f = f[0]
wh = exif_size(Image.open(f)) # (width, height)
n1 += 1 # all images
if len(f) > 0 and wh[0] > 0 and wh[1] > 0:
label_file = dir + 'labels/' + Path(f).stem + '.txt'
nlabels = 0
with open(label_file, 'a') as file: # write labelsfile
# try:
# category_id = int(a['region_attributes']['class'])
# except:
# category_id = int(a['region_attributes']['Class'])
category_id = 0 # single-class
for a in x['regions']:
# bounding box format is [x-min, y-min, x-max, y-max]
box = a['shape_attributes']
box = np.array([box['x'], box['y'], box['width'], box['height']],
box[[0, 2]] /= wh[0] # normalize x by width
box[[1, 3]] /= wh[1] # normalize y by height
box = [box[0] + box[2] / 2, box[1] + box[3] / 2, box[2],
box[3]] # xywh (left-top to center x-y)
if box[2] > 0. and box[3] > 0.: # if w > 0 and h > 0
file.write('%g %.6f %.6f %.6f %.6f\n' % (category_id, *box))
n3 += 1
nlabels += 1
if nlabels == 0: # remove non-labelled images from dataset
os.system(f'rm {label_file}')
# print('no labels for %s' % f)
continue # next file
# write image
img_size = 4096 # resize to maximum
img = cv2.imread(f) # BGR
assert img is not None, 'Image Not Found ' + f
r = img_size / max(img.shape) # size ratio
if r < 1: # downsize if necessary
h, w, _ = img.shape
img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_AREA)
ifile = dir + 'images/' + Path(f).name
if cv2.imwrite(ifile, img): # if success append image to list
with open(dir + 'data.txt', 'a') as file:
file.write('%s\n' % ifile)
n2 += 1 # correct images
except Exception:
os.system(f'rm {label_file}')
print(f'problem with {f}')
nm = len(missing_images) # number missing
print('\nFound %g JSONs with %g labels over %g images. Found %g images, labelled %g images successfully' %
(len(jsons), n3, n1, n1 - nm, n2))
if len(missing_images):
print('WARNING, missing images:', missing_images)
# Write *.names file
names = ['knife'] # preserves sort order
with open(dir + 'data.names', 'w') as f:
[f.write('%s\n' % a) for a in names]
# Split data into train, test, and validate files
split_rows_simple(dir + 'data.txt')
write_data_data(dir + 'data.data', nc=1)
print(f'Done. Output saved to {Path(dir).absolute()}')
def convert_coco_json(json_dir='../coco/annotations/', use_segments=False, cls91to80=False):
save_dir = make_dirs() # output directory
coco80 = coco91_to_coco80_class()
# Import json
for json_file in sorted(Path(json_dir).resolve().glob('*.json')):
fn = Path(save_dir) / 'labels' / json_file.stem.replace('instances_', '') # folder name
with open(json_file) as f:
data = json.load(f)
# Create image dict
images = {'%g' % x['id']: x for x in data['images']}
# Create image-annotations dict
imgToAnns = defaultdict(list)
for ann in data['annotations']:
# Write labels file
for img_id, anns in tqdm(imgToAnns.items(), desc=f'Annotations {json_file}'):
img = images['%g' % img_id]
h, w, f = img['height'], img['width'], img['file_name']
bboxes = []
segments = []
for ann in anns:
if ann['iscrowd']:
# The COCO box format is [top left x, top left y, width, height]
box = np.array(ann['bbox'], dtype=np.float64)
box[:2] += box[2:] / 2 # xy top-left corner to center
box[[0, 2]] /= w # normalize x
box[[1, 3]] /= h # normalize y
if box[2] <= 0 or box[3] <= 0: # if w <= 0 and h <= 0
cls = coco80[ann['category_id'] - 1] if cls91to80 else ann['category_id'] - 1 # class
box = [cls] + box.tolist()
if box not in bboxes:
# Segments
if use_segments:
if len(ann['segmentation']) > 1:
s = merge_multi_segment(ann['segmentation'])
s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist()
s = [j for i in ann['segmentation'] for j in i] # all segments concatenated
s = (np.array(s).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist()
s = [cls] + s
if s not in segments:
# Write
with open((fn / f).with_suffix('.txt'), 'a') as file:
for i in range(len(bboxes)):
line = *(segments[i] if use_segments else bboxes[i]), # cls, box or segments
file.write(('%g ' * len(line)).rstrip() % line + '\n')
def min_index(arr1, arr2):
"""Find a pair of indexes with the shortest distance.
arr1: (N, 2).
arr2: (M, 2).
a pair of indexes(tuple).
dis = ((arr1[:, None, :] - arr2[None, :, :]) ** 2).sum(-1)
return np.unravel_index(np.argmin(dis, axis=None), dis.shape)
def merge_multi_segment(segments):
"""Merge multi segments to one list.
Find the coordinates with min distance between each segment,
then connect these coordinates with one thin line to merge all
segments into one.
segments(List(List)): original segmentations in coco's json file.
like [segmentation1, segmentation2,...],
each segmentation is a list of coordinates.
s = []
segments = [np.array(i).reshape(-1, 2) for i in segments]
idx_list = [[] for _ in range(len(segments))]
# record the indexes with min distance between each segment
for i in range(1, len(segments)):
idx1, idx2 = min_index(segments[i - 1], segments[i])
idx_list[i - 1].append(idx1)
# use two round to connect all the segments
for k in range(2):
# forward connection
if k == 0:
for i, idx in enumerate(idx_list):
# middle segments have two indexes
# reverse the index of middle segments
if len(idx) == 2 and idx[0] > idx[1]:
idx = idx[::-1]
segments[i] = segments[i][::-1, :]
segments[i] = np.roll(segments[i], -idx[0], axis=0)
segments[i] = np.concatenate([segments[i], segments[i][:1]])
# deal with the first segment and the last one
if i in [0, len(idx_list) - 1]:
idx = [0, idx[1] - idx[0]]
s.append(segments[i][idx[0]:idx[1] + 1])
for i in range(len(idx_list) - 1, -1, -1):
if i not in [0, len(idx_list) - 1]:
idx = idx_list[i]
nidx = abs(idx[1] - idx[0])
return s
def delete_dsstore(path='../datasets'):
# Delete apple .DS_store files
from pathlib import Path
files = list(Path(path).rglob('.DS_store'))
for f in files:
if __name__ == '__main__':
source = 'COCO'
if source == 'COCO':
convert_coco_json('./annotations', # directory with *.json
elif source == 'infolks': # Infolks https://infolks.info/
elif source == 'vott': # VoTT https://github.com/microsoft/VoTT
img_path='../../Downloads/athena_day/20190715/') # images folder
elif source == 'ath': # ath format
convert_ath_json(json_dir='../../Downloads/athena/') # images folder
# zip results
# os.system('zip -r ../coco.zip ../coco')
| |-----train
| |-----valid
| |-----test
| |-----train
| |-----valid
| |-----test
Epoch gpu_mem box obj cls labels img_size
1/200 20.8G 0.01576 0.01955 0.007536 22 1280: 100%|██████████| 849/849 [14:42<00:00, 1.04s/it]
Class Images Labels P R mAP@.5 mAP@.5:.95: 100%|██████████| 213/213 [01:14<00:00, 2.87it/s]
all 3395 17314 0.994 0.957 0.0957 0.0843
Epoch gpu_mem box obj cls labels img_size
2/200 20.8G 0.01578 0.01923 0.007006 22 1280: 100%|██████████| 849/849 [14:44<00:00, 1.04s/it]
Class Images Labels P R mAP@.5 mAP@.5:.95: 100%|██████████| 213/213 [01:12<00:00, 2.95it/s]
all 3395 17314 0.996 0.956 0.0957 0.0845
Epoch gpu_mem box obj cls labels img_size
3/200 20.8G 0.01561 0.0191 0.006895 27 1280: 100%|██████████| 849/849 [10:56<00:00, 1.29it/s]
Class Images Labels P R mAP@.5 mAP@.5:.95: 100%|███████ | 187/213 [00:52<00:00, 4.04it/s]
all 3395 17314 0.996 0.957 0.0957 0.0845
5.1 export.py
def export_formats():
# YOLOv5 export formats
x = [
['PyTorch', '-', '.pt', True, True],
['TorchScript', 'torchscript', '.torchscript', True, True],
['ONNX', 'onnx', '.onnx', True, True],
['OpenVINO', 'openvino', '_openvino_model', True, False],
['TensorRT', 'engine', '.engine', False, True],
['CoreML', 'coreml', '.mlmodel', True, False],
['TensorFlow SavedModel', 'saved_model', '_saved_model', True, True],
['TensorFlow GraphDef', 'pb', '.pb', True, True],
['TensorFlow Lite', 'tflite', '.tflite', True, False],
['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', False, False],
['TensorFlow.js', 'tfjs', '_web_model', False, False],
['PaddlePaddle', 'paddle', '_paddle_model', True, True],]
return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'CPU', 'GPU'])
def try_export(inner_func):
# YOLOv5 export decorator, i..e @try_export
inner_args = get_default_args(inner_func)
def outer_func(*args, **kwargs):
prefix = inner_args['prefix']
with Profile() as dt:
f, model = inner_func(*args, **kwargs)
LOGGER.info(f'{prefix} export success ✅ {dt.t:.1f}s, saved as {f} ({file_size(f):.1f} MB)')
return f, model
except Exception as e:
LOGGER.info(f'{prefix} export failure ❌ {dt.t:.1f}s: {e}')
return None, None
return outer_func
def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:')):
# YOLOv5 TorchScript model export
LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...')
f = file.with_suffix('.torchscript')
ts = torch.jit.trace(model, im, strict=False)
d = {"shape": im.shape, "stride": int(max(model.stride)), "names": model.names}
extra_files = {'config.txt': json.dumps(d)} # torch._C.ExtraFilesMap()
if optimize: # https://pytorch.org/tutorials/recipes/mobile_interpreter.html
optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files)
ts.save(str(f), _extra_files=extra_files)
return f, None
def export_onnx(model, im, file, opset, dynamic, simplify, prefix=colorstr('ONNX:')):
# YOLOv5 ONNX export
import onnx
LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...')
f = file.with_suffix('.onnx')
output_names = ['output0', 'output1'] if isinstance(model, SegmentationModel) else ['output0']
if dynamic:
dynamic = {'images': {0: 'batch', 2: 'height', 3: 'width'}} # shape(1,3,640,640)
if isinstance(model, SegmentationModel):
dynamic['output0'] = {0: 'batch', 1: 'anchors'} # shape(1,25200,85)
dynamic['output1'] = {0: 'batch', 2: 'mask_height', 3: 'mask_width'} # shape(1,32,160,160)
elif isinstance(model, DetectionModel):
dynamic['output0'] = {0: 'batch', 1: 'anchors'} # shape(1,25200,85)
model.cpu() if dynamic else model, # --dynamic only compatible with cpu
im.cpu() if dynamic else im,
do_constant_folding=True, # WARNING: DNN inference with torch>=1.12 may require do_constant_folding=False
dynamic_axes=dynamic or None)
# Checks
model_onnx = onnx.load(f) # load onnx model
onnx.checker.check_model(model_onnx) # check onnx model
# Metadata
d = {'stride': int(max(model.stride)), 'names': model.names}
for k, v in d.items():
meta = model_onnx.metadata_props.add()
meta.key, meta.value = k, str(v)
onnx.save(model_onnx, f)
# Simplify
if simplify:
cuda = torch.cuda.is_available()
check_requirements(('onnxruntime-gpu' if cuda else 'onnxruntime', 'onnx-simplifier>=0.4.1'))
import onnxsim
LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
model_simp, check = onnxsim.simplify(f, check=True)
assert check, 'assert check failed'
onnx.save(model_simp, f)
except Exception as e:
LOGGER.info(f'{prefix} simplifier failure ❌ {e}')
return f, None
export.py是一个用于将YOLOv5 PyTorch模型导出为其他格式的程序文件。该文件提供了多种导出格式选项,包括TorchScript、ONNX、OpenVINO、TensorRT、CoreML、TensorFlow SavedModel、TensorFlow GraphDef、TensorFlow Lite、TensorFlow Edge TPU、TensorFlow.js和PaddlePaddle。用户可以根据需要选择要导出的格式,并使用相应的命令行参数运行export.py文件来进行导出。
5.2 ui.py
class YOLOv5Detector:
def __init__(self, weights='./best.pt', data=ROOT / 'data/coco128.yaml', device='', half=False, dnn=False):
self.model, self.stride, self.names, self.pt = self.load_model(weights, data, device, half, dnn)
def load_model(self, weights, data, device, half, dnn):
device = select_device(device)
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
stride, names, pt = model.stride, model.names, model.pt
return model, stride, names, pt
def run(self, img, imgsz=(640, 640), conf_thres=0.25, iou_thres=0.45, max_det=1000, device='', classes=None,
agnostic_nms=False, augment=False, half=False, retina_masks=True):
imgsz = check_img_size(imgsz, s=self.stride) # check image size
self.model.warmup(imgsz=(1 if self.pt else 1, 3, *imgsz)) # warmup
cal_detect = []
device = select_device(device)
names = self.model.module.names if hasattr(self.model, 'module') else self.model.names # get class names
# Set Dataloader
im = letterbox(img, imgsz, self.stride, self.pt)[0]
# Convert
im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
im = np.ascontiguousarray(im)
im = torch.from_numpy(im).to(device)
im = im.half() if half else im.float() # uint8 to fp
5.3 val.py
class YOLOv5Validator:
def __init__(self, weights, data, batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, max_det=300, task='val', device='', workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project=ROOT / 'runs/val', name='exp', exist_ok=False, half=True, dnn=False, model=None, dataloader=None, save_dir=Path(''), plots=True, callbacks=Callbacks(), compute_loss=None):
self.weights = weights
self.data = data
self.batch_size = batch_size
self.imgsz = imgsz
self.conf_thres = conf_thres
self.iou_thres = iou_thres
self.max_det = max_det
self.task = task
self.device = device
self.workers = workers
self.single_cls = single_cls
self.augment = augment
self.verbose = verbose
self.save_txt = save_txt
self.save_hybrid = save_hybrid
self.save_conf = save_conf
self.save_json = save_json
self.project = project
self.name = name
self.exist_ok = exist_ok
self.half = half
self.dnn = dnn
self.model = model
self.dataloader = dataloader
self.save_dir = save_dir
self.plots = plots
self.callbacks = callbacks
self.compute_loss = compute_loss
def run(self):
# Initialize/load model and set device
training = self.model is not None
if training: # called by train.py
device, pt, jit, engine = next(self.model.parameters()).device, True, False, False # get model device, PyTorch model
half &= device.type != 'cpu' # half precision only supported on CUDA
self.model.half() if half else self.model.float()
else: # called directly
device = select_device(self.device, batch_size=self.batch_size)
# Directories
self.save_dir = increment_path(Path(self.project) / self.name, exist_ok=self.exist_ok) # increment run
(self.save_dir / 'labels' if self.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Load model
self.model = DetectMultiBackend(self.weights, device=device, dnn=self.dnn, data=self.data, fp16=self.half)
stride, pt, jit, engine = self.model.stride, self.model.pt, self.model.jit, self.model.engine
self.imgsz = check_img_size(self.imgsz, s=stride) # check image size
self.half = self.model.fp16 # FP16 supported on limited backends with CUDA
if engine:
self.batch_size = self.model.batch_size
device = self.model.device
if not (pt or jit):
self.batch_size = 1 # export.py models default to batch-size 1
LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{self.imgsz},{self.imgsz}) for non-PyTorch models')
# Data
self.data = check_dataset(self.data) # check
# Configure
cuda = device.type != 'cpu'
is_coco = isinstance(self.data.get('val'), str) and self.data['val'].endswith(f'coco{os.sep}val2017.txt') # COCO dataset
nc = 1 if self.single_cls else int(self.data['nc']) # number of classes
iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for mAP@0.5:0.95
niou = iouv.numel()
# Dataloader
if not training:
if pt and not self.single_cls: # check --weights are trained on --data
ncm = self.model.model.nc
assert ncm == nc, f'{self.weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \
f'classes). Pass correct combination of --weights and --data that are trained together.'
self.model.warmup(imgsz=(1 if pt else self.batch_size, 3, self.imgsz, self.imgsz)) # warmup
pad, rect = (0.0, False) if self.task == 'speed' else (0.5, pt) # square inference for benchmarks
self.task = self.task if self.task in ('train', 'val', 'test') else 'val' # path to train/val/test images
self.dataloader = create_dataloader(self.data[self.task],
prefix=colorstr(f'{self.task}: '))[0]
seen = 0
confusion_matrix = ConfusionMatrix(nc=nc)
names = self.model.names if hasattr(self.model, 'names') else self.model.module.names # get class names
if isinstance(names, (list, tuple)): # old format
names = dict(enumerate(names))
class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
s = ('%22s' + '%11s' * 6) % ('Class', 'Images', 'Instances', 'P', 'R', 'mAP50', 'mAP50-95')
tp, fp, p, r, f1, mp, mr, map50, ap50, map = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
5.4 yolov5-DSConv.py
class DSConv(_ConvNd):
def __init__(self, in_channels, out_channels, kernel_size, stride=1,
padding=None, dilation=1, groups=1, padding_mode='zeros', bias=False, block_size=32, KDSBias=False, CDS=False):
padding = _pair(autopad(kernel_size, padding, dilation))
kernel_size = _pair(kernel_size)
stride = _pair(stride)
dilation = _pair(dilation)
blck_numb = math.ceil(((in_channels)/(block_size*groups)))
super(DSConv, self).__init__(
in_channels, out_channels, kernel_size, stride, padding, dilation,
False, _pair(0), groups, bias, padding_mode)
# KDS weight From Paper
self.intweight = torch.Tensor(out_channels, in_channels, *kernel_size)
self.alpha = torch.Tensor(out_channels, blck_numb, *kernel_size)
# KDS bias From Paper
self.KDSBias = KDSBias
self.CDS = CDS
if KDSBias:
self.KDSb = torch.Tensor(out_channels, blck_numb, *kernel_size)
if CDS:
self.CDSw = torch.Tensor(out_channels)
self.CDSb = torch.Tensor(out_channels)
def get_weight_res(self):
# Include expansion of alpha and multiplication with weights to include in the convolution layer here
alpha_res = torch.zeros(self.weight.shape).to(self.alpha.device)
# Include KDSBias
if self.KDSBias:
KDSBias_res = torch.zeros(self.weight.shape).to(self.alpha.device)
# Handy definitions:
nmb_blocks = self.alpha.shape[1]
total_depth = self.weight.shape[1]
bs = total_depth//nmb_blocks
llb = total_depth-(nmb_blocks-1)*bs
# Casting the Alpha values as same tensor shape as weight
for i in range(nmb_blocks):
length_blk = llb if i==nmb_blocks-1 else bs
shp = self.alpha.shape # Notice this is the same shape for the bias as well
to_repeat=self.alpha[:, i, ...].view(shp[0],1,shp[2],shp[3]).clone()
repeated = to_repeat.expand(shp[0], length_blk, shp[2], shp[3]).clone()
alpha_res[:, i*bs:(i*bs+length_blk), ...] = repeated.clone()
if self.KDSBias:
to_repeat = self.KDSb[:, i, ...].view(shp[0], 1, shp[2], shp[3]).clone()
repeated = to_repeat.expand(shp[0], length_blk, shp[2], shp[3]).clone()
KDSBias_res[:, i*bs:(i*bs+length_blk), ...] = repeated.clone()
if self.CDS:
to_repeat = self.CDSw.view(-1, 1, 1, 1)
repeated = to_repeat.expand_as(self.weight)
# Element-wise multiplication of alpha and weight
weight_res = torch.mul(alpha_res, self.weight)
if self.KDSBias:
weight_res = torch.add(weight_res, KDSBias_res)
return weight_res
def forward(self, input):
# Get resulting weight
#weight_res = self.get_weight_res()
# Returning convolution
return F.conv2d(input, self.weight, self.bias,
self.stride, self.padding, self.dilation,
class DSConv2D(nn.Module):
def __init__(self, inc, ouc, k=1, s=1, p=None, g=1, d=1, act=True):
self.conv = DSConv(inc, ouc, k, s, p, g, d)
class Bottleneck_DSConv(nn.Module):
# Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
c_ = int(c2 * e) # hidden channels
self.cv1 = DSConv2D(c1, c_, 1, 1)
self.cv2 = DSConv2D(c_, c2, 3, 1, g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class C3_DSConv(nn.Module):
# C3 module with dsconv
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
c_ = int(c2 * e)
self.m = nn.Sequential(*(Bottleneck_DSConv(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
这个程序文件是一个实现了DSConv(Depthwise Separable Convolution)的模块。DSConv是一种卷积操作,可以在减少参数数量的同时保持模型的性能。该文件定义了DSConv类和DSConv2D类,以及使用DSConv的Bottleneck_DSConv和C3_DSConv模块。
5.5 classify\predict.py
class YOLOv5Classifier:
def __init__(self, weights, source, data, imgsz, device, view_img, save_txt, nosave, augment, visualize, update,
project, name, exist_ok, half, dnn, vid_stride):
self.weights = weights
self.source = source
self.data = data
self.imgsz = imgsz
self.device = device
self.view_img = view_img
self.save_txt = save_txt
self.nosave = nosave
self.augment = augment
self.visualize = visualize
self.update = update
self.project = project
self.name = name
self.exist_ok = exist_ok
self.half = half
self.dnn = dnn
self.vid_stride = vid_stride
def run(self):
source = str(self.source)
save_img = not self.nosave and not source.endswith('.txt') # save inference images
is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
webcam = source.isnumeric() or source.endswith('.streams') or (is_url and not is_file)
screenshot = source.lower().startswith('screen')
if is_url and is_file:
source = check_file(source) # download
# Directories
save_dir = increment_path(Path(self.project) / self.name, exist_ok=self.exist_ok) # increment run
(save_dir / 'labels' if self.save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Load model
device = select_device(self.device)
model = DetectMultiBackend(self.weights, device=device, dnn=self.dnn, data=self.data, fp16=self.half)
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(self.imgsz, s=stride) # check image size
# Dataloader
bs = 1 # batch_size
if webcam:
view_img = check_imshow(warn=True)
dataset = LoadStreams(source, img_size=imgsz, transforms=classify_transforms(imgsz[0]),
bs = len(dataset)
elif screenshot:
dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
dataset = LoadImages(source, img_size=imgsz, transforms=classify_transforms(imgsz[0]),
vid_path, vid_writer = [None] * bs, [None] * bs
# Run inference
model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup
seen, windows, dt = 0, [], (Profile(), Profile(), Profile())
for path, im, im0s, vid_cap, s in dataset:
with dt[0]:
im = torch.Tensor(im).to(model.device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
if len(im.shape) == 3:
im = im[None] # expand for batch dim
# Inference
with dt[1]:
results = model(im)
# Post-process
with dt[2]:
pred = F.softmax(results, dim=1) # probabilities
# Process predictions
for i, prob in enumerate(pred): # per image
seen += 1
if webcam: # batch_size >= 1
p, im0, frame = path[i], im0s[i].copy(), dataset.count
s += f'{i}: '
p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
p = Path(p) # to Path
save_path = str(save_dir / p.name) # im.jpg
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt
s += '%gx%g ' % im.shape[2:] # print string
annotator = Annotator(im0, example=str(names), pil=True)
# Print results
top5i = prob.argsort(0, descending=True)[:5].tolist() # top 5 indices
s += f"{', '.join(f'{names[j]} {prob[j]:.2f}' for j in top5i)}, "
# Write results
text = '\n'.join(f'{prob[j]:.2f} {names[j]
:是否使用OpenCV DNN进行ONNX推断。--vid-stride
文件路径 | 功能概述 |
export.py | 将YOLOv5模型导出为其他格式的文件 |
ui.py | 创建图形用户界面(GUI)应用程序 |
val.py | 在检测数据集上验证训练好的YOLOv5模型 |
yolov5-DSConv.py | 实现了DSConv的模块 |
classify/predict.py | 使用YOLOv5模型进行图像分类推断 |
classify/train.py | 训练一个基于YOLOv5的分类器模型 |
classify/val.py | 在验证数据集上验证训练好的分类器模型 |
models/common.py | 包含了一些通用的模型函数和类 |
models/experimental.py | 包含了一些实验性的模型函数和类 |
models/tf.py | 包含了一些与TensorFlow相关的模型函数和类 |
models/yolo.py | 包含了YOLOv5模型的定义和相关函数 |
models/init.py | 模型模块的初始化文件 |
segment/predict.py | 使用分割模型进行图像分割推断 |
segment/train.py | 训练一个图像分割模型 |
segment/val.py | 在验证数据集上验证训练好的图像分割模型 |
utils/activations.py | 包含了一些激活函数的定义 |
utils/augmentations.py | 包含了一些数据增强的函数和类 |
utils/autoanchor.py | 包含了自动锚框生成的函数和类 |
utils/autobatch.py | 包含了自动批处理的函数和类 |
utils/callbacks.py | 包含了一些回调函数的定义 |
utils/dataloaders.py | 包含了数据加载器的定义 |
utils/downloads.py | 包含了一些下载数据集的函数 |
utils/general.py | 包含了一些通用的辅助函数 |
utils/loss.py | 包含了一些损失函数的定义 |
utils/metrics.py | 包含了一些评估指标的定义 |
utils/plots.py | 包含了一些绘图函数的定义 |
utils/torch_utils.py | 包含了一些与PyTorch相关的辅助函数 |
utils/triton.py | 包含了与Triton Inference Server相关的函数和类 |
utils/init.py | 工具模块的初始化文件 |
utils/aws/resume.py | 包含了AWS训练恢复的函数和类 |
utils/aws/init.py | AWS模块的初始化文件 |
utils/flask_rest_api/example_request.py | 包含了Flask REST API的示例请求 |
utils/flask_rest_api/restapi.py | 包含了Flask REST API的实现 |
utils/loggers/init.py | 日志记录器模块的初始化文件 |
utils/loggers/clearml/clearml_utils.py | 包含了ClearML日志记录器的辅助函数 |
utils/loggers/clearml/hpo.py | 包含了ClearML日志记录器的超参数优化函数 |
utils/loggers/clearml/init.py | ClearML日志记录器模块的初始化文件 |
utils/loggers/comet/comet_utils.py | 包含了Comet日志记录器的辅助函数 |
utils/loggers/comet/hpo.py | 包含了Comet日志记录器的超参数优化函数 |
utils/loggers/comet/init.py | Comet日志记录器模块的初始化文件 |
utils/loggers/wandb/log_dataset.py | 包含了WandB日志记录器的数据集记录函数 |
utils/loggers/wandb/sweep.py | 包含了WandB日志记录器的超参数优化函数 |
utils/loggers/wandb/wandb_utils.py | 包含了WandB日志记录器的辅助函数 |
utils/loggers/wandb/init.py | WandB日志记录器模块的初始化文件 |
utils/segment/augmentations.py | 包含了图像分割的数据增强函数和类 |
utils/segment/dataloaders.py | 包含了图像分割的数据加载器的定义 |
utils/segment/general.py | 包含了图像分割的通用辅助函数 |
utils/segment/loss.py | 包含了图像分割的损失函数的定义 |
utils/segment/metrics.py | 包含了图像分割的评估指标的定义 |
utils/segment/plots.py | 包含了图像分割的绘图函数的定义 |
utils/segment/init.py | 图像分割模块的初始化文件 |
研究者通过将传统的卷积内核分解为两个组件来实现这一点。其中之一是只有整数值的张量,不可训练,并根据预训练网络中浮点 (FP) 权重的分布进行计算。另一个组件由两个分布移位器张量组成,它们将量化张量的权重定位在模拟原始预训练网络分布的范围内:其中一个移动每个内核的分布,另一个移动每个通道。这些权重可以重新训练,使网络能够适应新的任务和数据集。
框架(DSConv layer)
Quantization Procedure
Distribution Shifts
KL-Divergence: 内核分布器移位后产生的VQK应该具有与原始权重类似的分布。量化过程仅适用缩放因子来评估VQK的整数值
Optimized Inference
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 1 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.25 # layer channel multiple
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
# YOLOv5 v6.0 head
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3_DSConv, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3_DSConv, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3_DSConv, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3_DSConv, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
训练损失(train/box_loss, train/seg_loss, train/obj_loss, train/cls_loss):这些损失值表示模型在训练集上的表现,包括边界框损失、分割损失、对象损失和分类损失。
指标评价(metrics/ precision、metrics/recall、metrics/mAP_0.5、metrics/mAP_0.5:0.95):这些指标分别对应准确度、识别率和不同阈值下的平均精度。这些指标分别针对两个类别(B 和 M)进行了测量。
验证损失(val/box_loss, val/seg_loss, val/obj_loss, val/cls_loss):这些损失值表示模型在验证集上的表现。
学习率(x/lr0, x/lr1, x/lr2):这些值表示训练过程中的学习率。
精度、召回率和平均精度 (mAP)
mAP@0.5 和 mAP@0.5:0.95:两个 mAP 指标都会随着时间的推移而增加。mAP@0.5 始终高于 mAP@0.5:0.95,这是预期的,因为后者是一个更严格的指标。这些指标的不断改进表明该模型在准确定位和分类对象方面的熟练程度不断提高。
学习率 ( x/lr0、x/lr1和x/lr2) 似乎随着时间的推移而降低,这在训练深度学习模型中很常见。这种逐渐减少有助于微调模型的权重,特别是当它接近最佳性能时。
[1]刘明,李丽华,李哲.基于指导滤波与二值图像组互相关匹配的3D掌纹识别[J].计算机科学.2014,(9).DOI:10.11896/j.issn.1002-137X.2014.09.058 .
[2]赵志刚,吴鑫,洪丹枫,等.基于信息熵的GLBP掌纹识别算法[J].计算机科学.2014,(8).DOI:10.11896/j.issn.1002-137X.2014.08.062 .
[3]曹雏清,李瑞峰,赵立军.基于深度图像技术的手势识别方法[J].计算机工程.2012,(8).DOI:10.3969/j.issn.1000-3428.2012.08.006 .
[6]David Lowrence.Biometrics and retail: moving towards the future[J].Biometric Technology Today.2014,2014(2).7-9.
[7]Christian Rathgeb,Andreas Uhl.A survey on biometric cryptosystems and cancelable biometrics[J].EURASIP Journal on Information Security.2011.2011
[8]Zhang, D.,Lu, G.,Li, W.,等.Palmprint Recognition Using 3-D Information[J].IEEE transactions on systems, man and cybernetics, Part C. Applications and reviews: A publication of the IEEE Systems, Man, and Cybernetics Society.2009,39(5).505-519.
[9]Xu, Shuang,Suo, Jidong,Ding, Jifeng.Improved linear discriminant analysis based on two-dimensional Gabor for palmprint recognition[C].2011.
[10]Shuang Xu,Jidong Suo,Jiyin Zhao.Research on the Location and Segmentation Technologies in Palmprint Identification[C].2011.