- 需求分析:
对HistCounts标签的数据,以“|”为分界,分别放进excel表格里
2.代码:
import os
import os.path
import xml.dom.minidom
import pandas as pd
file_path = 'C:/Users/JING5/Desktop/城市与用地匹配.xlsx' # r对路径进行转义,windows需要
raw_data = pd.read_excel(file_path, header=0)
path="C:/Users/JING5/Desktop/城市/unzip"
files=os.listdir(path) #得到文件夹下所有文件名称
for i in files:
path_city = path + "/" + i
for j in range(2011,2020):
path_xml = path_city + "/【立方数据学社】CLCD_v01_{}_albert.tif.aux.xml".format(j)
dom = xml.dom.minidom.parse(path_xml)
root = dom.documentElement
cc=dom.getElementsByTagName('HistCounts')
c1=cc[0]
str_data = c1.firstChild.data
list_data = str_data.split("|")
for k in range(len(raw_data)):
if raw_data['city'][k] == i[8:] and int(raw_data['year'][k]) == j:
raw_data['农田'][k] = list_data[0]
raw_data['森林'][k] = list_data[1]
raw_data['灌木'][k] = list_data[2]
raw_data['草原'][k] = list_data[3]
raw_data['水域'][k] = list_data[4]
raw_data['冰雪'][k] = list_data[5]
raw_data['裸地'][k] = list_data[6]
raw_data['不透水面'][k] = list_data[7]
raw_data['湿地'][k] = list_data[8]
break
raw_data.to_excel("最新数据.xlsx")