#!usr/bin/env python
# -*- coding:utf-8 _*-
"""
@author:czc
@file: specgram3.py
@time: 2018/04/17
"""
import wave
import numpy
import matplotlib.pyplot as plt
from PIL import Image
wave_path = '/home/czc/SPEECH_PROJECT/Desktop/xiaolong/_257_6268_1513239470_35.wav'
# target: gain spec from framename
# input: filename, wav file path, string
# window_length_ms(/ms),window length(/ms), int
# window_shift_times(),rate of shit length, float
def getSpectrum(filename, window_length_ms, window_shift_times):
# read data
wav_file = wave.open(filename, 'r')
params = wav_file.getparams()
nchannels, sampwidth, framerate, wav_length = params[:4]
str_data = wav_file.readframes(wav_length)
wave_data = numpy.fromstring(str_data, dtype=numpy.short)
wav_file.close()
# gain log spectrogram
window_length = framerate * window_length_ms / 1000 # change time to points number
Win_hamming = numpy.hamming(window_length)
window_shift = int(window_length * window_shift_times) # change time to points number
nframe = (wav_length - (window_length - window_shift)) / window_shift # gain frame number
spec = numpy.zeros((window_length / 2, nframe)) # store spectrogram [only half part]
for i in xrange(nframe):
start = i * window_shift
end = start + window_length
spec[:, i] = numpy.log1p(numpy.abs(numpy.fft.fft(Win_hamming * wave_data[start:end])))[:window_length / 2]
return spec
# main process
speech_spectrum = getSpectrum(wave_path, 20, 0.5)
plt.imshow(speech_spectrum[:, :])
speech_spectrum = speech_spectrum[::-1, :]
speech_spectrum = speech_spectrum / numpy.max(speech_spectrum)
im = Image.fromarray(numpy.uint8(speech_spectrum * 255))
im = im.resize((256, 256))
im.save('spec3.jpg','P')
plt.xlim(0, 300)
plt.ylim(0, 160)
plt.show()