机器学习----Day07

最新推荐文章于 2022-06-28 21:53:11 发布

碧落&凡尘

最新推荐文章于 2022-06-28 21:53:11 发布

阅读量197

点赞数 1

分类专栏： AI python 文章标签：机器学习人工智能图像识别人脸识别识别

本文链接：https://blog.csdn.net/willluckysmile/article/details/89556052

版权

python 同时被 2 个专栏收录

38 篇文章 0 订阅

订阅专栏

8 篇文章 0 订阅

订阅专栏

语音识别

通过傅里叶变换, 将时域的声音函数分解为一系列不同频率的正弦函数的叠加, 通过频率谱线的特殊分布, 建立音频内容和文本的对应关系, 以此作为模型训练的基础.

案例: freq.wav

import numpy as np
import numpy.fft as nf
import scipy.io.wavfile as wf
import matplotlib.pyplot as mp

sample_rate, sigs = wf.read('freq.wav')
print(sample_rate)
print(sigs.shape, sigs.dtype)
# x坐标
times = np.arange(len(sigs)) / sample_rate
# 傅里叶变换, 获取拆解出的正弦波频率与能量信息
freqs = nf.fftfreq(sigs.size, 1 / sample_rate)
ffts = nf.fft(sigs)
pows = np.abs(ffts)
# 绘制两个图像
mp.figure('Audio', facecolor='lightgray')
mp.subplot(121)
mp.title('Time Domain')
mp.xlabel('Time', fontsize=12)
mp.ylabel('Signal', fontsize=12)
mp.grid(linestyle=":")
mp.plot(times, sigs, c='dodgerblue')
# 绘制频域图
mp.subplot(122)
mp.title('Frequency Domain')
mp.xlabel('Fequency', fontsize=12)
mp.ylabel('Pow', fontsize=12)
mp.grid(linestyle=":")
mp.plot(freqs[freqs>0], pows[freqs>0], c='orangered')

mp.tight_layout()
mp.show()

###语音识别过程
梅尔频率倒谱系数(MFCC): 对声音做傅里叶变换后, 发现通过与声音内容密切相关的13个特殊频率所对应的能量分布, 可以使用MFCC矩阵作为语音识别的特征. 基于隐马尔科夫模型进行模式识别, 找到测试样本最匹配的声音模型, 从而识别语音内容.

MFCC相关API:

import scipy.io.wavfile as wf
import python_speech_features as tf
# 读取音频文件, 获取采样率及每个采样点的值
sample_rate, sigs = wf.read('freq.wav')
# 交给语音特征提取器, 获取该语音的梅尔频率倒谱矩阵
mfcc = sf.mfcc(sigs, sample_rate)

案例:比较不同音频的mfcc矩阵

import numpy as np
import numpy.fft as nf
import scipy.io.wavfile as wf
import python_speech_features as sf
import matplotlib.pyplot as mp

sample_rate, sigs = wf.read('apple01.wav')

mfcc = sf.mfcc(sigs, sample_rate)
print(mfcc.shape)

mp.matshow(mfcc.T, cmap='gist_rainbow')
mp.title('MFCC', fontsize=16)
mp.ylabel('Feature', fontsize=12)
mp.xlabel('Sample', fontsize=12)
mp.tick_params(labelsize=10)
mp.show()

sample_rate, sigs = wf.read('freq.wav')

mfcc = sf.mfcc(sigs, sample_rate)
print(mfcc.shape)

mp.matshow(mfcc.T, cmap='gist_rainbow')
mp.title('MFCC', fontsize=16)
mp.ylabel('Feature', fontsize=12)
mp.xlabel('Sample', fontsize=12)
mp.tick_params(labelsize=10)
mp.show()

隐马尔可夫模型相关API:

import hmmlearn.hmm as hl
# 构建隐马模型
model = hl.GaussianHMM(
    n_components=4, # 用几个高斯分布函数拟合样本数据
    convariance_type='diag', # 使用相关矩阵的辅对角线进行相关性比较
    n_iter=1000 # 最大迭代上限
)
score = model.score(test_mfccs)

案例: 遍历文件夹, 使用多个隐马模型训练所有音频的mfcc矩阵. 然后使用测试集验证训练结果.

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
"""
    demo01_hmm.py语音识别
"""

import os
import numpy as np
import scipy.io.wavfile as wf
import python_speech_features as sf
import hmmlearn.hmm as hl

def search_files(directory):
    """
    搜索dicrectory目录下的所有wav文件, 返回字典
    :param directory: 搜索目录名
    :return: directory下wav文件的路径组成的dict
    """
    directory = os.path.normpath(directory)
    objects = {}
    for curdir, subdirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.wav'):
                label = curdir.split(os.path.sep)[-1]
                if label not in objects:
                    objects[label] = []
                path = os.path.join(curdir, file)
                objects[label].append(path)
    else:
        return objects


train_samples = search_files('./speeches/training')
print(train_samples)

# 整理训练集数据, 把每一个类别中音频的mfcc整理入训练集中.
train_x, train_y = [], []
for label, filenames in train_samples.items():
    mfcces = np.array([])
    for filename in filenames:
        sample_rate, sigs = wf.read(filename)
        mfcc = sf.mfcc(sigs, sample_rate)
        if len(mfcces) == 0:
            mfcces = mfcc
        else:
            mfcces = np.append(mfcces, mfcc, axis=0)
    train_x.append(mfcces)
    train_y.append(label)

# 基于隐马模型, 进行训练, 把所有训练结果model都存起来, 供以后使用
models = {}
for mfccs, label in zip(train_x, train_y):
    model = hl.GaussianHMM(n_components=3, covariance_type='diag', n_iter=1000)
    models[label] = model.fit(mfccs)

# 读取测试集中的文件, 输出模式匹配的结果
test_samples = search_files('speeches/testing')
test_x, test_y = [], []
for label, filenames in train_samples.items():
    mfcces = np.array([])
    for filename in filenames:
        sample_rate, sigs = wf.read(filename)
        mfcc = sf.mfcc(sigs, sample_rate)
        if len(mfcces) == 0:
            mfcces = mfcc
        else:
            mfcces = np.append(mfcces, mfcc, axis=0)
    test_x.append(mfcces)
    test_y.append(label)

# 让每一个隐马模型验证每一个测试样本的匹配度
pred_test_y = []
for mfccs in test_x:
    best_score, best_label = None, None
    for label, model in models.items():
        score = model.score(mfccs)
        if (best_score is None) or (best_score < score):
            best_score, best_label = score, label
    pred_test_y.append(best_label)

print(test_y)
print(pred_test_y)

图像识别

OpenCV基础

openCV是一个开源的计算机视觉库. 提供了很多图像处理的常用工具.
案例:

# -*- coding: utf-8 -*-
from __future__ import unicode_literals

"""
    demo02_cv.py opencv基础
"""

import numpy as np
import cv2 as cv

img = cv.imread('forest.jpg')
cv.imshow('Img', img)
# 显示图像每个颜色通道的信息
blue = np.zeros_like(img)
# 仅保留原数组的蓝色通道   bgr
blue[:, :, 0] = img[:, :, 0]
cv.imshow('blue', blue)

green = np.zeros_like(img)
# 仅保留原数组的绿色通道   bgr
green[:, :, 1] = img[:, :, 1]
cv.imshow('green', green)

red = np.zeros_like(img)
# 仅保留原数组的红色通道   bgr
red[:, :, 2] = img[:, :, 2]
cv.imshow('red', red)

# 图像裁剪
h, w = img.shape[:2]
l, t = int(w/4), int(h/4)
r, b = int(w*3/4), int(h*3/4)
cropped = img[t:b, l:r]
cv.imshow('Cropped', cropped)

# 图像缩放
sc1 = cv.resize(img, (int(w/4), int(h/4)),
                interpolation=cv.INTER_LINEAR)
cv.imshow('Scale1', sc1)

# 图像放大, 若不给出w和h, 可以给出x, y缩放比例
sc2 = cv.resize(sc1, None, fx=4, fy=4, interpolation=cv.INTER_LINEAR)
cv.imshow('Scale2', sc2)

cv.waitKey()

# 图像保存
cv.imwrite('blue.jpg', blue)
cv.imwrite('green.jpg', green)
cv.imwrite('red.jpg', red)
cv.imwrite('sc2.jpg', sc2)
cv.imwrite('sc1.jpg', sc1)

边缘检测

物体的边缘检测是物体识别常用的手段, 边缘检测常用亮度梯度的方法, 通过识别亮度梯度变化最大的像素点从而检测出物体的边缘.

边缘识别相关API:

# 索贝尔边缘识别
# cv.CV_64F: 图片像素为整型, 转为浮点数避免精度损失
# 1: 水平方向做索贝尔偏微分
# 0: 垂直方向不做索贝尔偏微分
# ksize: 索贝尔卷积核为5*5
cv.Sobel(img, cv.CV_64F, 1, 0, ksize=5)
# 拉普拉斯边缘识别
cv.Laplacian(img, cb.CV_64F)
# Canny边缘识别
# 50: 水平方向的阈值 240: 垂直方向的阈值
cv.Canny(img, 50, 240)

案例:

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
"""
    sobel.py 边缘识别
"""
import cv2 as cv
img = cv.imread('chair.jpg', cv.IMREAD_GRAYSCALE)
cv.imshow('image', img)
s1 = cv.Sobel(img, cv.CV_64F, 0, 1, ksize=3)
cv.imshow('H-Sobel', s1)
s2 = cv.Sobel(img, cv.CV_64F, 1, 0, ksize=3)
cv.imshow('W-Sobel', s2)
s3 = cv.Sobel(img, cv.CV_64F, 1, 1, ksize=3)
cv.imshow('Sobel', s3)

# 拉普拉斯边缘识别
laplacian = cv.Laplacian(img, cv.CV_64F)
cv.imshow('laplacian', laplacian)

# canny边缘识别
canny = cv.Canny(img, 50, 200)
cv.imshow('Canny', canny)

cv.waitKey()

亮度提升

OpenCV提供了直方图均衡化的方式实现亮度提升, 亮度提升后更有利于边缘识别模型的训练.

# 针对灰度图像 做直方图均衡化处理
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
equalized_gray = cv.equalizeHist(gray)

案例:

# -*- coding:utf-8 -*-
from __future__ import unicode_literals
"""
    demo04_equalhist.py 直方图均衡化
"""
import cv2 as cv
img = cv.imread('sunrise.jpg')
cv.imshow('Sunrise', img)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
cv.imshow('gray', gray)
equalize_gray = cv.equalizeHist(gray)
cv.imshow('equalize_gray', equalize_gray)

# 彩色图提亮度 YUV: 亮度 色度 饱和度
yuv = cv.cvtColor(img, cv.COLOR_BGR2YUV)
yuv[:, :, 0] = cv.equalizeHist(yuv[:, :, 0])
img = cv.cvtColor(yuv, cv.COLOR_YUV2BGR)
cv.imshow('equalized_color', img)

cv.waitKey()

角点检测

角点: 平直棱线的交汇点. (颜色梯度方向改变的像素点的位置)

OpenCV提供的角点检测相关API:

gray = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
# Harris角点检测器
# 边缘的水平方向, 垂直方向颜色值改变超过阈值7, 5时即为边缘
# 边缘线的方向改变超过阈值0.04即为一个角点
corners = cv.cornerHarris(gray, 7, 5, 0.04)

案例:

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
"""
    demo05_harris.py 角点检测
"""
import cv2 as cv

img = cv.imread('box.png')

cv.imshow('Image', img)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
print(gray.shape)
cv.imshow('gray', gray)
# 返回所有的角点坐标
corners = cv.cornerHarris(gray, 7, 5, 0.04)
mixture = img.copy()
print(corners.shape)
print(corners.max())
# 对彩色图像做掩码, 找到角点, 颜色直接改变为红色
mixture[corners > corners.max() * 0.01] = [0, 0, 255]
cv.imshow('mixture', mixture)
cv.waitKey()

特征点检测

特征点检测集合了边缘检测与角点检测, 从而可以识别出图像的所有特征点.
常用的特征点检测: STAR特征点检测, SIFT特征点检测
案例:

import cv2 as cv
img = cv.imread('table.jpg')
cv.imshow('Table', img)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

# SIFT特征点检测 创建star特征点检测器
sift = cv.xfeatures2d.SIFT_create()
# 获取检测到的所有特征点
keypoints = sift.detect(gray)
# 绘制特征点
mixture = img.copy()
# (原图, 特征点, 目标图像, flag)
cv.drawKeypoints(img, keypoints, mixture, flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
cv.imshow('Mixture', mixture)

cv.waitKey()

SIFT特征值检测相关API:

# SIFT特征点检测 创建star特征点检测器
sift = cv.xfeatures2d.SIFT_create()
# 获取检测到的所有特征点
keypoints = sift.detect(gray)

特征值矩阵

图像的特征值矩阵(描述矩阵)记录了图像的特征点以及每个特征点的梯度信息. 相似图像的特征值矩阵也相似. 这样, 只要有足够多的样本, 就可以基于隐马尔科夫模型进行图像内容识别.

特征值矩阵相关API:

# SIFT特征点检测 创建star特征点检测器
sift = cv.xfeatures2d.SIFT_create()
# 获取检测到的所有特征点
keypoints = sift.detect(gray)
# 获取图像的特征值矩阵
_, desc = sift.compute(gray, keypoints)

案例:

import cv2 as cv
import matplotlib.pyplot as mp
img = cv.imread('table.jpg')
# cv.imshow('Table', img)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

# SIFT特征点检测 创建star特征点检测器
sift = cv.xfeatures2d.SIFT_create()
# 获取检测到的所有特征点
keypoints = sift.detect(gray)
# 获取特征值矩阵
_, desc = sift.compute(gray, keypoints)
mp.matshow(desc.T, cmap='jet')
mp.title('Description', fontsize=18)
mp.xlabel('Sample', fontsize=12)
mp.ylabel('Feature', fontsize=12)
mp.tick_params(labelsize=10)
mp.show()

训练结果:

car -> car -161423.17783494428
car -> motorbike -108701751.22930637
car -> airplane -66676021.244070016
motorbike -> car -293612.4882782362
motorbike -> motorbike -289474.59398693126
motorbike -> airplane -292388.6435544905
airplane -> car -121632.43038480642
airplane -> motorbike -120033.95974421783
airplane -> airplane -121130.82628787684

案例:

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
"""
    jpg.py 图像识别
"""
import numpy as np
import os
import cv2 as cv
import hmmlearn.hmm as hl

def search_files(directory):
    """
    搜索dicrectory目录下的所有jpg文件, 返回字典
    :param directory: 搜索目录名
    :return: directory下wav文件的路径组成的dict
    """
    directory = os.path.normpath(directory)
    objects = {}
    for curdir, subdirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.jpg'):
                label = curdir.split(os.path.sep)[-1]
                if label not in objects:
                    objects[label] = []
                path = os.path.join(curdir, file)
                objects[label].append(path)
    else:
        return objects

train_samples = search_files('./objects/training')

# 加载训练集样本数据, 训练模型, 存储模型
train_x, train_y = [], []
for label, filenames in train_samples.items():
    descs = np.array([])
    for filename in filenames:
        image = cv.imread(filename)
        gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
        # 整理图片大小
        # 若宽度较小则把宽度缩放到200, 高度自适应,
        # 若高度较小则把高度缩放到200, 宽度自适应
        h, w = gray.shape[:2]
        f = 200/min(h, w)
        gray = cv.resize(gray, None, fx=f, fy=f)
        sift = cv.xfeatures2d.SIFT_create()
        keypoints = sift.detect(gray)
        _, desc = sift.compute(gray, keypoints)
        if len(descs ) == 0:
            descs = desc
        else:
            descs = np.append(descs, desc, axis=0)
    train_x.append(descs)
    train_y.append(label)
# 训练隐马模型
models = {}
for descs, label in zip(train_x, train_y):
    model = hl.GaussianHMM(n_components=4, covariance_type='diag', n_iter=1000)
    models[label] = model.fit(descs)

# 模型测试
test_samples = search_files('./objects/testing')
test_x, test_y = [], []
for label, filenames in test_samples.items():
    descs = np.array([])
    for filename in filenames:
        image = cv.imread(filename)
        gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
        # 整理图片大小
        # 若宽度较小则把宽度缩放到200, 高度自适应,
        # 若高度较小则把高度缩放到200, 宽度自适应
        h, w = gray.shape[:2]
        f = 200/min(h, w)
        gray = cv.resize(gray, None, fx=f, fy=f)
        sift = cv.xfeatures2d.SIFT_create()
        keypoints = sift.detect(gray)
        _, desc = sift.compute(gray, keypoints)
        if len(descs ) == 0:
            descs = desc
        else:
            descs = np.append(descs, desc, axis=0)
    test_x.append(descs)
    test_y.append(label)

for descs, test_label in zip(test_x, test_y):
    for pred_label, model in models.items():
        score = model.score(descs)
        print(test_label, '->', pred_label, score)

人脸识别

人脸识别与图像识别的区别在于人脸识别要识别出两个人的不同点.

识别捕捉

使用opencv访问视频设备, 获取图像帧.

import cv2 as cv
# 获取视频设备
video_capture = cv.videoCapture(0)
# 调用方法读取视频设备捕获的一帧图片
frame = video_capture.read()[1]
cv.imshow('', frame)
video_capture.release()
cv.destoryAllwindows()

案例:

import cv2 as cv
# 0: 下标为0的设备
vc = cv.VideoCapture(0)
while True:
    frame = vc.read()[1]
    cv.imshow('Video Capture', frame)
    if cv.waitKey(33) == 27:
        break
vc.release()
cv.destroyAllWindows()

人脸定位

哈尔级联人脸定位

fd = cv.CascadeClassifier('haar/face.xml')
ed = cv.CascadeClassifier('haar/eye.xml')
nd = cv.CascadeClassifier('haar/nose.xml')
# 构建级联人脸定位器
# 1.3: 物体的最小尺寸
# 5: 定位的最大个数
faces = fd.detectMultiScale(frame, 1.3, 5)
# faces: [(), (), (), (), ...]
    for l, t, w, h in faces:
        a, b = int(w/2), int(h/2)
        cv.ellipse(frame,   # 图片
                (l+a, t+b), # 圆心位置
                (a, b),     # 半径
                0,          # 椭圆旋转角度
                0, 360,     # 起始角终止角
                (255, 0, 0),# 椭圆颜色
                2)          # 线宽

案例:

import cv2 as cv

fd = cv.CascadeClassifier('haar/face.xml')
ed = cv.CascadeClassifier('haar/eye.xml')
nd = cv.CascadeClassifier('haar/nose.xml')

vc = cv.VideoCapture(0)
while True:
    frame = vc.read()[1]
    # 识别脸
    faces = fd.detectMultiScale(frame, 1.3, 5)
    for l, t, w, h in faces:
        a, b = int(w/2), int(h/2)
        cv.ellipse(frame, (l+a, t+b), (a, b), 0, 0, 360, (255, 0, 0), 2)

        # 识别鼻子和眼睛
        face = frame[t: t+h, l: l+w]
        eyes = ed.detectMultiScale(face, 1.3, 2)
        for l, t, w, h in eyes:
            a, b = int(w/2), int(h/2)
            cv.ellipse(face, (l+a, t+h), (a, b), 0, 0, 360, (0, 255, 0), 2)

        nose = nd.detectMultiScale(face, 1.3, 1)
        for l, t, w, h in nose:
            a, b = int(w / 2), int(h / 2)
            cv.ellipse(face, (l + a, t + h), (a, b), 0, 0, 360, (0, 0, 255), 2)

    cv.imshow('Video', frame)
    if cv.waitKey(33) == 27:
        break
vc.release()
cv.destroyAllWindows()

人脸识别

简单人脸识别: OpenCV的LBPH(局部二值模式直方图)
案例:

import os
import numpy as np
import cv2 as cv
import sklearn.preprocessing as sp

# 识别人脸的定位器
fd = cv.CascadeClassifier('haar/face.xml')

def search_files(directory):
    directory = os.path.normpath(directory)
    faces = {}
    for curdir, subdirs, files in os.walk(directory):
        for file in files:
            if file.endswith('jpg'):
                label = curdir.split(os.path.sep)[-1]
                path = os.path.join(curdir, file)
                if label not in faces:
                    faces[label] = [path]
                else:
                    faces[label].append(path)
    else:
        return faces


# 整理训练集与输出集
train_faces = search_files('faces/training')
codec = sp.LabelEncoder()
codec.fit(list(train_faces.keys()))
train_x, train_y = [], []
for label, filenames in train_faces.items():
    for filename in filenames:
        image = cv.imread(filename)
        gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
        faces = fd.detectMultiScale(gray, 1.1, 2, minSize=(100, 100))
        for l, t, w, h in faces:
            train_x.append(gray[t:t+h, l:l+w])
            train_y.append(codec.transform([label])[0])

train_y = np.array(train_y)

# 局部二值模式直方图人脸识别分类器
model = cv.face.LBPHFaceRecognizer_create()
model.train(train_x, train_y)

# 读取测试集图片, 使用model进行测试
test_faces = search_files('faces/testing')
test_x, test_y = [], []
for label, filenames in test_faces.items():
    for filename in filenames:
        image = cv.imread(filename)
        gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
        faces = fd.detectMultiScale(gray, 1.1, 2, minSize=(100, 100))
        for l, t, w, h in faces:
            test_x.append(gray[t:t+h, l:l+w])
            test_y.append(codec.transform([label])[0])
# 调用LBPH模型的predict方法进行分类预测
for face in test_x:
    # 通过当前这张脸预测 类别码
    pred_code = model.predict(face)[0]
    pred_y = codec.inverse_transform([pred_code])[0]
    print(pred_y)

碧落&凡尘

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
机器学习----Day07

语音识别通过傅里叶变换, 将时域的声音函数分解为一系列不同频率的正弦函数的叠加, 通过频率谱线的特殊分布, 建立音频内容和文本的对应关系, 以此作为模型训练的基础.案例: freq.wavimport numpy as npimport numpy.fft as nfimport scipy.io.wavfile as wfimport matplotlib.pyplot as mp...
复制链接

扫一扫