python gettextsize_20190320get—python各种文件加载与存储

1.处理txt:python txt文件常用读写操作​www.cnblogs.com

2.pkl:

用cpickle或者pandas中的read_picklehttps://blog.csdn.net/qq_23052951/article/details/53729710​blog.csdn.net

和pickle:

3. xml

主要用import xml.dom.minidom as xmldom

xml文件的读取:DomTree = xmldom.parse(xmlfile)

我之前写的读取图像标注文件xml的代码:

for num, xmlfile in enumerate(self.xml):

# 进度输出

sys.stdout.write('\r>> Converting image %d/%d' % (

num + 1, len(self.xml)))

sys.stdout.write('\n')

sys.stdout.flush()

self.xmlfile = xmlfile

self.num = num

path = os.path.dirname(os.path.dirname(self.xmlfile))

obj_path = glob.glob(os.path.join(path, 'JPEGImages(val)', '*.jpg'))

DomTree = xmldom.parse(xmlfile)

annotation = DomTree.documentElement

filenamelist = annotation.getElementsByTagName('filename')

widthlist = annotation.getElementsByTagName('width')

heightlist = annotation.getElementsByTagName('height')

objectlist = annotation.getElementsByTagName('object')

filename = (filenamelist[0].childNodes[0].data).encode('unicode-escape').decode('string_escape')

self.filen_ame = filename[:-4] + '.jpg'

self.width = int(widthlist[0].childNodes[0].data)

self.height = int(heightlist[0].childNodes[0].data)

self.images.append(self.image())

self.path = os.path.join(path, 'JPEGImages(val)', self.filen_ame)

if self.path not in obj_path:

print('img_name',self.filen_ame)

break

for objects in objectlist:

namelist = objects.getElementsByTagName('name')

self.name = (namelist[0].childNodes[0].data).encode('unicode-escape').decode('string_escape')

if self.name == 'Inert material dumping area':

self.supercategory = 'person'

elif self.name == 'Sea mud dumping area' or self.name == 'Construction waste dumping area' or self.name == 'Empty truck' or self.name == 'Temporary storage area':

self.supercategory = 'vehicle'

if self.name not in self.label:

self.categories.append(self.categorie())

self.label.append(self.name)

print(self.label)

bndbox = objects.getElementsByTagName('bndbox')

for box in bndbox:

x1_list = box.getElementsByTagName('xmin')

x1 = int(x1_list[0].childNodes[0].data)

y1_list = box.getElementsByTagName('ymin')

y1 = int(y1_list[0].childNodes[0].data)

x2_list = box.getElementsByTagName('xmax')

x2 = int(x2_list[0].childNodes[0].data)

y2_list = box.getElementsByTagName('ymax')

y2 = int(y2_list[0].childNodes[0].data)

w = x2 - x1

h = y2 - y1

#self.rectangle = [x1, y1, x2, y2] #用于计算segmentation

self.bbox = [x1, y1, w, h] # COCO 对应格式[x,y,w,h]

self.annotations.append(self.annotation())

self.annID += 1

创建xml文件:

from xml.dom.minidom import Document

def generate_xml(img_name,split_lines,img_size,class_ind):

doc = Document() # 创建DOM文档对象

annotation = doc.createElement('annotation')

doc.appendChild(annotation)

title = doc.createElement('folder')

title_text = doc.createTextNode('Test')

title.appendChild(title_text)

annotation.appendChild(title)

title = doc.createElement('filename')

title_text = doc.createTextNode(img_name)

title.appendChild(title_text)

annotation.appendChild(title)

source = doc.createElement('source')

annotation.appendChild(source)

title = doc.createElement('database')

title_text = doc.createTextNode('Test Database')

title.appendChild(title_text)

source.appendChild(title)

title = doc.createElement('annotations')

title_text = doc.createTextNode('Test')

title.appendChild(title_text)

source.appendChild(title)

size = doc.createElement('size')

annotation.appendChild(size)

title = doc.createElement('width')

title_text = doc.createTextNode(str(img_size[1]))

title.appendChild(title_text)

size.appendChild(title)

title = doc.createElement('height')

title_text = doc.createTextNode(str(img_size[0]))

title.appendChild(title_text)

size.appendChild(title)

title = doc.createElement('depth')

title_text = doc.createTextNode(str(img_size[2]))

title.appendChild(title_text)

size.appendChild(title)

for split_line in split_lines:

elementt = split_line[0:-1].split(',')

x1 = int(elementt[0])

y1 = int(elementt[1])

x2 = int(elementt[2])

y2 = int(elementt[3])

class_id = int(elementt[4])

print('class_id',class_id)

if class_id <= len(class_ind):

hehe = class_ind[class_id-1]

print('class_name',hehe)

object = doc.createElement('object')

annotation.appendChild(object)

title = doc.createElement('name')

title_text = doc.createTextNode(class_ind[class_id-1])

title.appendChild(title_text)

object.appendChild(title)

bndbox = doc.createElement('bndbox')

object.appendChild(bndbox)

title = doc.createElement('xmin')

title_text = doc.createTextNode(str(x1))

title.appendChild(title_text)

bndbox.appendChild(title)

title = doc.createElement('ymin')

title_text = doc.createTextNode(str(y1))

title.appendChild(title_text)

bndbox.appendChild(title)

title = doc.createElement('xmax')

title_text = doc.createTextNode(str(x2))

title.appendChild(title_text)

bndbox.appendChild(title)

title = doc.createElement('ymax')

title_text = doc.createTextNode(str(y2))

title.appendChild(title_text)

bndbox.appendChild(title)

# 将DOM对象doc写入文件

f = open('/home/huichang/Desktop/xml/'+img_name[:-4]+'.xml','w') # create a new xml file

f.write(doc.toprettyxml(indent='\t', encoding='utf-8'))

f.close()

4. json:

看这两篇文章就行,比较简单Python Json序列化与反序列化​www.cnblogs.compython读写json文件 - Bigberg - 博客园​www.cnblogs.com

5. html:

采用lxml库:

from lxml.html import parse

from urllib2 import urlopen

parsed = parse(urlopen('python3读取html文件 - xqnq2007 - 博客园'))

doc = parsed.getroot()

6. HDF5

h5接受的数据是矩阵跟mat方法一致,但是具有更强的压缩性能

相关读取见文章:Python 操作 HDF5文件​www.jianshu.comhttps://blog.csdn.net/dcrmg/article/details/79165951​blog.csdn.net

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值