python gettextsize_20190320get—python各种文件加载与存储

最新推荐文章于 2024-04-21 22:54:08 发布

weixin_39980353

最新推荐文章于 2024-04-21 22:54:08 发布

阅读量85

点赞数

文章标签： python gettextsize

1.处理txt：python txt文件常用读写操作www.cnblogs.com

2.pkl：

用cpickle或者pandas中的read_picklehttps://blog.csdn.net/qq_23052951/article/details/53729710blog.csdn.net

和pickle:

3. xml

主要用import xml.dom.minidom as xmldom

xml文件的读取：DomTree = xmldom.parse(xmlfile)

我之前写的读取图像标注文件xml的代码：

for num, xmlfile in enumerate(self.xml):

# 进度输出

sys.stdout.write('\r>> Converting image %d/%d' % (

num + 1, len(self.xml)))

sys.stdout.write('\n')

sys.stdout.flush()

self.xmlfile = xmlfile

self.num = num

path = os.path.dirname(os.path.dirname(self.xmlfile))

obj_path = glob.glob(os.path.join(path, 'JPEGImages(val)', '*.jpg'))

DomTree = xmldom.parse(xmlfile)

annotation = DomTree.documentElement

filenamelist = annotation.getElementsByTagName('filename')

widthlist = annotation.getElementsByTagName('width')

heightlist = annotation.getElementsByTagName('height')

objectlist = annotation.getElementsByTagName('object')

filename = (filenamelist[0].childNodes[0].data).encode('unicode-escape').decode('string_escape')

self.filen_ame = filename[:-4] + '.jpg'

self.width = int(widthlist[0].childNodes[0].data)

self.height = int(heightlist[0].childNodes[0].data)

self.images.append(self.image())

self.path = os.path.join(path, 'JPEGImages(val)', self.filen_ame)

if self.path not in obj_path:

print('img_name',self.filen_ame)

break

for objects in objectlist:

namelist = objects.getElementsByTagName('name')

self.name = (namelist[0].childNodes[0].data).encode('unicode-escape').decode('string_escape')

if self.name == 'Inert material dumping area':

self.supercategory = 'person'

elif self.name == 'Sea mud dumping area' or self.name == 'Construction waste dumping area' or self.name == 'Empty truck' or self.name == 'Temporary storage area':

self.supercategory = 'vehicle'

if self.name not in self.label:

self.categories.append(self.categorie())

self.label.append(self.name)

print(self.label)

bndbox = objects.getElementsByTagName('bndbox')

for box in bndbox:

x1_list = box.getElementsByTagName('xmin')

x1 = int(x1_list[0].childNodes[0].data)

y1_list = box.getElementsByTagName('ymin')

y1 = int(y1_list[0].childNodes[0].data)

x2_list = box.getElementsByTagName('xmax')

x2 = int(x2_list[0].childNodes[0].data)

y2_list = box.getElementsByTagName('ymax')

y2 = int(y2_list[0].childNodes[0].data)

w = x2 - x1

h = y2 - y1

#self.rectangle = [x1, y1, x2, y2] #用于计算segmentation

self.bbox = [x1, y1, w, h] # COCO 对应格式[x,y,w,h]

self.annotations.append(self.annotation())

self.annID += 1

创建xml文件：

from xml.dom.minidom import Document

def generate_xml(img_name,split_lines,img_size,class_ind):

doc = Document() # 创建DOM文档对象

annotation = doc.createElement('annotation')

doc.appendChild(annotation)

title = doc.createElement('folder')

title_text = doc.createTextNode('Test')

title.appendChild(title_text)

annotation.appendChild(title)

title = doc.createElement('filename')

title_text = doc.createTextNode(img_name)

title.appendChild(title_text)

annotation.appendChild(title)

source = doc.createElement('source')

annotation.appendChild(source)

title = doc.createElement('database')

title_text = doc.createTextNode('Test Database')

title.appendChild(title_text)

source.appendChild(title)

title = doc.createElement('annotations')

title_text = doc.createTextNode('Test')

title.appendChild(title_text)

source.appendChild(title)

size = doc.createElement('size')

annotation.appendChild(size)

title = doc.createElement('width')

title_text = doc.createTextNode(str(img_size[1]))

title.appendChild(title_text)

size.appendChild(title)

title = doc.createElement('height')

title_text = doc.createTextNode(str(img_size[0]))

title.appendChild(title_text)

size.appendChild(title)

title = doc.createElement('depth')

title_text = doc.createTextNode(str(img_size[2]))

title.appendChild(title_text)

size.appendChild(title)

for split_line in split_lines:

elementt = split_line[0:-1].split(',')

x1 = int(elementt[0])

y1 = int(elementt[1])

x2 = int(elementt[2])

y2 = int(elementt[3])

class_id = int(elementt[4])

print('class_id',class_id)

if class_id <= len(class_ind):

hehe = class_ind[class_id-1]

print('class_name',hehe)

object = doc.createElement('object')

annotation.appendChild(object)

title = doc.createElement('name')

title_text = doc.createTextNode(class_ind[class_id-1])

title.appendChild(title_text)

object.appendChild(title)

bndbox = doc.createElement('bndbox')

object.appendChild(bndbox)

title = doc.createElement('xmin')

title_text = doc.createTextNode(str(x1))

title.appendChild(title_text)

bndbox.appendChild(title)

title = doc.createElement('ymin')

title_text = doc.createTextNode(str(y1))

title.appendChild(title_text)

bndbox.appendChild(title)

title = doc.createElement('xmax')

title_text = doc.createTextNode(str(x2))

title.appendChild(title_text)

bndbox.appendChild(title)

title = doc.createElement('ymax')

title_text = doc.createTextNode(str(y2))

title.appendChild(title_text)

bndbox.appendChild(title)

# 将DOM对象doc写入文件

f = open('/home/huichang/Desktop/xml/'+img_name[:-4]+'.xml','w') # create a new xml file

f.write(doc.toprettyxml(indent='\t', encoding='utf-8'))

f.close()

4. json：

看这两篇文章就行，比较简单Python Json序列化与反序列化www.cnblogs.compython读写json文件 - Bigberg - 博客园www.cnblogs.com

5. html：

采用lxml库：

from lxml.html import parse

from urllib2 import urlopen

parsed = parse(urlopen('python3读取html文件 - xqnq2007 - 博客园'))

doc = parsed.getroot()

6. HDF5

h5接受的数据是矩阵跟mat方法一致，但是具有更强的压缩性能

相关读取见文章：Python 操作 HDF5文件www.jianshu.comhttps://blog.csdn.net/dcrmg/article/details/79165951blog.csdn.net

weixin_39980353

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python gettextsize_20190320get—python各种文件加载与存储

1.处理txt：python txt文件常用读写操作www.cnblogs.com2.pkl：用cpickle或者pandas中的read_picklehttps://blog.csdn.net/qq_23052951/article/details/53729710blog.csdn.net和pickle:3. xml主要用import xml.dom.minidom as xmldomxm...
复制链接

扫一扫