超简单的婴儿哭声检测实现方案–python版
1. 构建项目
项目结构
└─audio_data
├─mp3
├─test
└─wav
├─cry
├─non_cry
└─other
└─get-model.py
└─main.py
└─requirements.txt
get-model.py
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from joblib import dump, load
from packaging import version
def load_audio_file(file_path):
signal, sample_rate = librosa.load(file_path, sr=None, mono=True)
return signal, sample_rate
def extract_features(file_path, sample_rate):
signal, _ = load_audio_file(file_path)
mfccs = librosa.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=13)
mfccs_scaled_features = np.mean(mfccs.T, axis=0)
return mfccs_scaled_features
data_dir = './audio_data/wav'
labels = {'cry': 1, 'non_cry': 0, 'other': 2}
X, y = [], []
for root, dirs, files in os.walk(data_dir):
print(f"Processing directory: {root}")
for file in files:
print(f"File found: {os.path.join(root, file)}")
if file.endswith('.wav'):
file_label = os.path.basename(root).split('_')[0]
print(f"File label: {file_label}")
if file_label in labels:
file_path = os.path.join(root, file)
print(f"Processing file: {file_path}")
try:
feature = extract_features(file_path, sample_rate=22050)
X.append(feature)
y.append(labels[file_label])
print(f"Feature extracted successfully.")
print(f"Feature: {feature}")
print(f"Label: {labels[file_label]}")
except Exception as e:
print(f"Error processing file {file_path}: {e}")
if len(X) == 0 or len(y) == 0:
raise ValueError("No valid data found in the directory.")
X = np.array(X)
y = np.array(y)
print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")
print(f"X: {X}")
print(f"y: {y}")
unique_labels, counts = np.unique(y, return_counts=True)
print(f"Unique labels: {unique_labels}")
print(f"Counts: {counts}")
if len(unique_labels) < 2:
raise ValueError("The dataset must contain at least two different labels.")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
model = SVC(kernel='linear', C=1, decision_function_shape='ovr')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print(report)
from sklearn import __version__ as sklearn_version
if version.parse(sklearn_version) >= version.parse("0.24"):
dump(model, 'model.pkl')
dump(scaler, 'scaler.pkl')
else:
joblib.dump(model, 'model.pkl')
joblib.dump(scaler, 'scaler.pkl')
print("Model and scaler saved.")
main.py
import os
import librosa
import numpy as np
from sklearn.preprocessing import StandardScaler
from joblib import load
def load_audio_file(file_path):
signal, sample_rate = librosa.load(file_path, sr=None, mono=True)
return signal, sample_rate
def extract_features(file_path, sample_rate):
signal, _ = load_audio_file(file_path)
mfccs = librosa.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=13)
mfccs_scaled_features = np.mean(mfccs.T, axis=0)
return mfccs_scaled_features
model = load('model.pkl')
scaler = load('scaler.pkl')
test_dir = './audio_data/test'
for root, dirs, files in os.walk(test_dir):
for file in files:
if file.endswith('.mp3') or file.endswith('.wav'):
file_path = os.path.join(root, file)
print(f"Processing file: {file_path}")
try:
features = extract_features(file_path, sample_rate=22050)
features_scaled = scaler.transform([features])
prediction = model.predict(features_scaled)
print(f"文件名: {file} --- 结果值: {prediction[0]}")
except Exception as e:
print(f"Error processing file {file_path}: {e}")
librosa
numpy
scipy
sklearn
joblib
2. 安装依赖
安装环境
1. cmd或者powershell 直接执行以下命令,Windows10 会自动跳转到应用市场,根据提示安装即可
Python
2. 安装依赖包
pip3 install -r requirements.txt
3.训练模型
python3 get-model.py
4.测试模型
python3 main.py