python牵涉的数据类型比较繁多,编程过程中,经常需要在不同类型的数据之间进行恰当的转换
1、int转bytes
>>> x=97
>>> y=str(x) #转str
>>> print(type(y))
<class 'str'>
>>> print(y.encode()) #转bytes
b'97'
>>>
2、bytes转uint
#
def U4(a):
'''将单个bytes类型的字符a转换成int返回'''
a_int = int.from_bytes(a,byteorder='big',signed=False)
if a_int>=97:#0xa~0xf
a_int = a_int - 87
else:
a_int = a_int - 48
return a_int
def U8(a):
'''将bytes类型的符号a(长度2,对应8bit)转成U8整数并返回'''
if(len(a) != 2):
print('error len')
res = (U4(a[0:1])<<4) + U4(a[1:2])
return res
def U16(a):
'''将bytes类型的符号a(长度4,对应16bit)转成U16整数并返回'''
if(len(a) != 4):
print('error len')
res = (U8(a[0:2])<<8) + (U8(a[2:4]))
return res
def U32(a):
'''将bytes类型的符号a(长度8,对应32bit)转成U32整数并返回'''
if(len(a) != 8):
print('error len')
res = (U16(a[0:4]) <<16) + U16(a[4:8])
return res
#
3、char转int
>>> x='a'
>>> y=ord(x) #转ascii
>>> print(y)
97
>>> z=hex(y)
>>> print(z)
0x61
>>> print(type(z))
<class 'str'>
4、bytes与str互转
>>> x=b'12'
>>> y=x.decode() #bytes转str
>>> print(x)
b'12'
>>> print(y)
12
>>> print(type(y))
<class 'str'>
>>> z=y.encode('utf-8') #str转bytes
>>> print(z)
b'12'
>>> print(type(z))
<class 'bytes'>
5、使用struct模块实现字节流(bytes)数据的处理
def getBytesPack(array_data):
'''将array_data序列数据(如['0x64', '0x0', '0x0', '0x0', '0x0', '0x0'])打包成bytes'''
bytes_data = bytes()
for i in range(len(array_data)):
bytes_data += struct.pack('B', array_data[i])
return bytes_data
def getListfromByts(bytes_array):
'''将字节流数据bytes_array(如b'd\x00\x00\x00\x00\x00')转变成序列数据'''
rslt=list([])
for i in range(len(bytes_array)):
#rslt += struct.unpack('B',bytes_array[i])
rslt.append(hex(bytes_array[i]))
return rslt
6.批量转换c文件为utf8格式
import os
import sys
import codecs
import chardet
def convert(filename,out_enc="utf8"):
try:
content=codecs.open(filename,'rb+').read()
source_encoding=chardet.detect(content)["encoding"]
print(source_encoding)
if source_encoding == "GB2312":
content=content.decode(source_encoding).encode(out_enc)
codecs.open(filename,'wb+').write(content)
except IOError as err:
print("I/O error:{0}".format(err))
def removeBom(file):
'''移除UTF-8文件的BOM字节'''
data = open(file,'rb+').read()
if data[:3] == codecs.BOM_UTF8:
data = data[3:]
data.decode("utf-8")
# print(data.decode("utf-8"))
def explore(dir):
for root,dirs,files in os.walk(dir):
for file in files:
#print(file)
#print(os.path.splitext(file)[1])
if os.path.splitext(file)[1]=='.c' or os.path.splitext(file)[1]=='.h':
print(file)
path=os.path.join(root,file)
convert(path)
removeBom(path)
def main():
explore(sys.argv[1])
if __name__=="__main__":
'''用法:python codeUTF8.py 目标目录'''
main()