最近在研究mp3的tag解析,由于涉及到编码的问题,我使用了python作为开发语言,解析了id3v2和id3v1格式的Mp3,计算时长对于定码率的mp3管用
# -*- coding: utf-8 -*-
# coding=utf-8
'''
Created on 2013-8-8
@author: zqm
'''
import struct;
import re;
class ID3_Mp3Parser:
def __init__(self):
self.ID3V2Tag = False;
self.ID3V1Tag = False;
self.ID3V2TagLen = 10;
self.ID3V2Tags = {};
self.ID3V1Tags = [];
self.HeaderSize = 3;
self.VerSize = 1;
self.RevisionSize = 1;
self.FlagSize = 1;
self.LenSize = 4;
self.Type = -1;
self.TagNameSize = 4;
self.TagFlagSize = 2;
self.CodeString = ['gbk', 'UTF_16', 'UTF_16BE', 'UTF_8'];
self.MPEGLIST = ('MPEG2.5', None, 'MPEG2', 'MPEG1');
self.LAYERLIST = (None, 'Layer 3', 'Layer 2', 'Layer 1');
self.patten = re.compile(r'\w+');
self.MPEG1_LAYER1 = [0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, -1];
self.MPEG1_LAYER2 = [0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, -1];
self.MPEG1_LAYER3 = [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, -1];
self.MPEG1 = [self.MPEG1_LAYER1, self.MPEG1_LAYER2, self.MPEG1_LAYER3];
self.MPEG12_2P5_LAYER1 = [0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, -1];
self.MPEG12_2P5_LAYER2_3 = [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, -1];
self.MPEG12_2P5 = [self.MPEG12_2P5_LAYER1, self.MPEG12_2P5_LAYER2_3, self.MPEG12_2P5_LAYER2_3];
self.Bitrary = [self.MPEG1, self.MPEG12_2P5];
self.MPEG1_SAMPLE = [44100, 48000, 32000, 0];
self.MPEG2_SAMPLE = [22050, 24000, 16000, 0];
self.MPEG3_SAMPLE = [11025, 12000, 8000, 0];
self.Sample = [self.MPEG1_SAMPLE, self.MPEG2_SAMPLE, self.MPEG3_SAMPLE];
self.LAYER1_SIZE = [384, 384, 384];
self.LAYER2_SIZE = [1152, 1152, 1152];
self.LAYER3_SIZE = [1152, 576, 576];
self.LAYER_SIZE = [self.LAYER1_SIZE, self.LAYER2_SIZE, self.LAYER3_SIZE];
def HasID3V2Tag(self):
self.fd.seek(0, 0); # move start of file
self.ID3V2Header = self.fd.read(self.HeaderSize);
if self.ID3V2Header is None or len(self.ID3V2Header) != self.HeaderSize:
return False;
elif self.ID3V2Header == "ID3":
return True;
return False;
def HasID3V1Tag(self):
self.fd.seek(0, 2); # seek to file end
if self.fd.tell() < 128:
return False;
self.fd.seek(-128, 1); # seek to -128 from end
self.ID3V1Header = self.fd.read(self.HeaderSize);
if self.ID3V1Header is None or len(self.ID3V1Header) != self.HeaderSize:
return False;
elif self.ID3V1Header == "TAG":
return True;
return False;
def ParseHead(self):
if self.fd.closed:
return False;
if self.HasID3V2Tag():
self.ID3V2Tag = True;
self.GetVerison();
if self.HasID3V1Tag():
self.ID3V1Tag = True;
if self.ID3V1Tag or self.ID3V2Tag:
return True;
return False;
def ParseID3V1Tag(self):
self.fd.seek(-125, 2);
tagName = self.fd.read(32 - 3 + 1);
self.ID3V1Tag_Title = tagName.decode('gbk');
tagName = self.fd.read(62 - 33 + 1);
self.ID3V1Tag_Artist = tagName.decode('gbk');
tagName = self.fd.read(92 - 63 + 1);
self.ID3V1Tag_Album = tagName.decode('gbk');
tagName = self.fd.read(96 - 93 + 1);
self.ID3V1Tag_Date = tagName.decode('gbk');
tagName = self.fd.read(126 - 97 + 1);
self.ID3V1Tag_Remark = tagName.decode('gbk');
tagName = self.fd.read(127 - 127 + 1);
self.ID3V1Tag_Category = ord(tagName);
self.ID3V1Tags = [self.ID3V1Tag_Title, self.ID3V1Tag_Artist, self.ID3V1Tag_Album, self.ID3V1Tag_Date, \
self.ID3V1Tag_Remark, self.ID3V1Tag_Category];
def ParseID3V2Tag(self):
TagAllLen = self.HeaderLen;
self.fd.seek(self.ID3V2TagLen, 0);
while(TagAllLen > 0):
tagName = self.fd.read(self.TagNameSize);
tagLen = self.fd.read(self.LenSize);
self.fd.read(self.TagFlagSize);
if len(tagName.strip()) > 0 and re.match(self.patten, tagName):
nTagLen = self.BigToLittle(tagLen);
strTitle = self.fd.read(nTagLen);
if (nTagLen > 1) and (len(strTitle) == nTagLen) and tagName != "APIC":
strTitle = self.GetNameString(strTitle);
self.ID3V2Tags[tagName] = strTitle;
TagAllLen -= (4 + 4 + 2 + nTagLen);
def ParseLayerInfo(self):
if self.ID3V2Tag:
self.bodyOffset = self.HeaderLen + self.ID3V2TagLen;
else:
self.bodyOffset = 0;
self.fd.seek(0, 0);
self.fd.seek(self.bodyOffset, 0);
twobytes = self.fd.read(4); # read four bytes
(a, b, c, d) = struct.unpack('cccc', twobytes);
nMpeg = self.GetValueFromBytesBetween(b, 3, 5);
nLayer = self.GetValueFromBytesBetween(b, 1, 3);
nBitrary = self.GetValueFromBytesBetween(c, 4, 8);
nSample = self.GetValueFromBytesBetween(c, 2, 4);
self.nChannel = self.GetValueFromBytesBetween(d, 6, 8);
nFill = self.GetValueFromBytesBetween(c, 1, 2);
nLayerSampleNum = self.GetLayerSampleNum(nMpeg, nLayer);
nBitrary = self.GetBitrary(nMpeg, nLayer, nBitrary);
nSample = self.GetSample(nMpeg, nLayer, nSample);
# print self.GetChannelString(nChannel);
# print self.MPEGLIST[nMpeg];
# print self.LAYERLIST[nLayer];
nLayerSize = self.GetPerLayerLength(nMpeg, nLayer, nLayerSampleNum, nSample, nBitrary, nFill);
self.LayerNum = self.GetLayerNum(nLayerSize);
self.TotalTime = self.LayerNum * nLayerSampleNum / nSample;
print '(%d:%d)'%(self.TotalTime/60,self.TotalTime%60);
def GetLayerNum(self, nLayerSize):
self.fd.seek(0, 2);
total = self.fd.tell();
contentSize = total - self.bodyOffset;
if self.ID3V1Tag:
contentSize -= 128;
if nLayerSize != 0:
return contentSize / nLayerSize;
return 0;
def GetPerLayerLength(self, nMpeg, nLayer, nLayerSampleNum, nSample, nBitrary, nFill):
(mpeg_index, layer_index) = self.GetMpegAndLayerIndex(nMpeg, nLayer);
if mpeg_index is None or layer_index is None:
return -1;
layerLen = nLayerSampleNum * nBitrary * 1000 / (nSample * 8);
if layer_index == 0:
layerLen += nFill * 4;
else:
layerLen += nFill;
return layerLen;
def GetLayerSampleNum(self, nMpeg, nLayer):
(mpeg_index, layer_index) = self.GetMpegAndLayerIndex(nMpeg, nLayer);
if mpeg_index is None or layer_index is None:
return -1;
return self.LAYER_SIZE[layer_index][mpeg_index];
def GetChannelString(self, nChannel):
if nChannel == 0:
return '立体声';
elif nChannel == 1:
return '联合立体声(立体声)';
elif nChannel == 2:
return '双声道(立体声)';
else:
return '单声道(单身)';
def GetMpegAndLayerIndex(self, nMpeg, nLayer):
if nMpeg == 3: # mpeg 2
mpeg_index = 0;
elif nMpeg == 2:
mpeg_index = 1;
elif nMpeg == 0:
mpeg_index = 2;
layer_index = 3 - nLayer;
if mpeg_index < 0 or layer_index == 0:
return (None, None);
return (mpeg_index, layer_index);
def GetBitrary(self, nMpeg, nLayer, nIndex):
(mpeg_index, layer_index) = self.GetMpegAndLayerIndex(nMpeg, nLayer);
if mpeg_index is None or layer_index is None:
return -1;
if mpeg_index != 0:
mpeg_index = 1;
return self.Bitrary[mpeg_index][layer_index][nIndex];
def GetSample(self, nMpeg, nLayer, nIndex):
(mpeg_index, layer_index) = self.GetMpegAndLayerIndex(nMpeg, nLayer);
if mpeg_index is None or layer_index is None:
return -1;
return self.Sample[mpeg_index][nIndex];
def GetMPEG(self, byts):
mpeg = (ord(byts) >> 3) & (0x3);
return mpeg;
def GetLayer(self, byts):
layer = (ord(byts) >> 1) & (0x3);
return layer;
def GetValueFromBytesBetween(self, byts, startIndex, endIndex):
val = 0;
if endIndex <= startIndex or endIndex > 8 or startIndex < 0:
return val;
nVal = ord(byts); # convert to int
nVal = nVal >> startIndex;
nLen = endIndex - startIndex;
i = 0
t = 1;
v = 0;
while i < nLen:
v += t;
i += 1;
t *= 2;
val = nVal & v;
return val;
def GetNameString(self, string):
codeIndex = ord(string[0]);
code = self.CodeString[codeIndex];
return string[1:].decode(code);
def BigToLittle(self, strNum):
(a0, a1, a2, a3) = struct.unpack('cccc', strNum);
tmp = a0;
a0 = a3;
a3 = tmp;
tmp = a1;
a1 = a2;
a2 = tmp;
n = struct.pack('cccc', a0, a1, a2, a3);
nRet = struct.unpack('I', n)[0];
return nRet;
def GetVerison(self):
Ver = self.fd.read(self.VerSize);
self.Ver = self.GetIntegerFromByte(Ver);
if self.Ver == 3:
self.Type = 1; # ID3V2.3
elif self.Ver == 4:
self.Type = 2; # ID3V2.4
if self.Type == -1:
return False;
Revision = self.fd.read(self.RevisionSize);
self.Revision = self.GetIntegerFromByte(Revision);
Flag = self.fd.read(self.FlagSize);
self.Flag = self.GetIntegerFromByte(Flag);
Len = self.fd.read(self.LenSize);
self.HeaderLen = self.GetMp3HeadLength(Len);
return True;
def GetMp3HeadLength(self, Len):
IntArray = [];
nArray = struct.unpack('cccc', Len);
for c in nArray:
n = ord(c);
IntArray.append(n);
return self.CalcMp3TagLength(IntArray);
def CalcMp3TagLength(self, nIntArray):
Size = (nIntArray[0] & 0x7F) * 0x200000 + (nIntArray[1] & 0x7F) * 0x4000\
+ (nIntArray[2] & 0x7F) * 0x80 + (nIntArray[3] & 0x7F);
return Size;
def GetIntegerFromByte(self, byte):
try:
tmp = struct.unpack('c', byte)[0];
ret = ord(tmp);
except:
ret = 0;
return ret;
def Parse(self, filePath):
if filePath is None or len(filePath) == 0:
return False;
self.fd = open(filePath, "rb+");
if self.ParseHead() != True:
print 'not id3'
else:
if self.ID3V2Tag:
self.ParseID3V2Tag();
if self.ID3V1Tag:
self.ParseID3V1Tag();
self.ParseLayerInfo();
self.fd.close();
return True;