目录
2.将csv文件写到xml中(将查找插入位置的代码放到for循环外部,插入的速度会快一点):
1.按照某个固定大小将一个csv文件切分成多个:
f = open('data_value(1).csv', encoding='utf-8')
l = []
head = f.readline()
l.append(head)
for fin in f:
l.append(fin)
for i in range(186):
filename = str(i) + '.csv'
fout = open(filename, 'w', encoding='utf-8')
for j in range(151):
fout.write(l[i*151:(i+1)*151][j])
2.将csv文件写到xml中(将查找插入位置的代码放到for循环外部,插入的速度会快一点):
from xml.etree import ElementTree as ET
from xml.etree.ElementTree import Element, SubElement
import pandas as pd
import numpy as np
import time
t1 = time.time()
# 要写入的数据1
data = pd.read_csv('coor_16.csv', header=None)
data = np.array(data)
# 要写入的数据2
data1 = pd.read_csv('16.csv', header=None)
data1 = np.array(data1)
data2 = np.transpose(data1)
tree = ET.ElementTree(file='stdData0_1.xml')
ele = tree.find(要插入的指定位置1)
ele2 = tree.find(要插入的指定位置2)
for i in range(1674):
# 创建节点的标签,以及需要添加在标签中的内容
feature = SubElement(ele, 'feature')
feature.set("geoType", "1")
CPoints = SubElement(feature, 'CPoints')
coordinate = SubElement(CPoints, 'coordinate')
x = data[i][1]
y = data[i][2]
x_str = str(x)
y_str = str(y)
coordinate.text = x_str +' ' + y_str + ' ' + str(0.000000)
coordinate.tail = "\n"
CRecord = SubElement(feature, 'CRecord')
Drilling_code = SubElement(CRecord, "钻孔编码")
code = data[i][0]
code_str = str(int(code))
Drilling_code.text = 'Trace_' + code_str
Drilling_code.tail = "\n"
# 等等。。。。。
for m in range (1674):
for n in range (150):
print("现在进行第" + str(m) + "列第" + str(n) + "行的数据")
feature2 = SubElement(ele2, 'feature')
feature2.set("geoType", "4")
CAnyLine = SubElement(feature2, 'CAnyLine')
coordinate2 = SubElement(CAnyLine, 'coordinate')
# x2 = data[m][1]
# y2 = data[m][2]
# x_str2 = str(x2)
# y_str2 = str(y2)
coordinate2.text = '0.000000 0.000000 0.000000'
coordinate2.tail = "\n"
CRecord2 = SubElement(feature2, 'CRecord')
Drilling_code2 = SubElement(CRecord2, "钻孔编码")
code2 = data[m][0]
code2_str = str(int(code2))
Drilling_code2.text = 'Trace_' + code2_str
Drilling_code2.tail = "\n"
# 等等。。。。。
tree.write('result_last.xml', encoding="utf-8")
t2 = time.time()
print('time cost: %.6f' % (t2 - t1))
3.python将excel文件的某几列写入csv文件
# orign_path是原始的excel文件的路径
# save_path是写入csv保存的路径
# name是表格的名称
# list是需要提取的列的列表
def writeExcelToCSV(orign_path, save_path, name, list):
# 读取excel表格
df = pd.read_excel(orign_path, name, header=None)
# 提取所需要的列并保存
sava = df.iloc[:, list]
sava.to_csv(save_path, index=False, header=False, encoding='utf_8_sig')
4.python将csv文件写到txt文件中
# txt_path是传入要写入的txt的路径
# csv_path是读取原始的csv文件路径
def writeCsvToTxt(txt_path, csv_path):
# 以写入的方式打开或新建一个txt文件
f_txt = open(txt_path, 'w')
with open(csv_path, 'r', encoding='utf-8-sig') as f:
# 读取csv文件的每一行
lines = f.readlines()
for line in lines:
# print(line)
# 将读取出来的行写入txt文件,并清除左边的空格
f_txt.write(line.lstrip())
5.将爬取下来的数据写到excel中
# 将爬取的数据写到excel中
# filename是保存excel的名称
# data_list是爬取下来的日期的列表
# tem_high是爬取下来的最高气温的列表
# tem_high是爬取下来的最低气温的列表
# sheet_name是保存工作表的名称
def write_to_excel(filename, data_list, tem_high, tem_low, sheet_name):
# 计算平均气温
avg = []
for i in range(len(tem_high)):
avg.append(((int)(tem_high[i]) + (int)(tem_low[i])) / 2)
# 创建一个excel文件
work_book = xlwt.Workbook(encoding='utf-8')
# 创建一个工作表
sheet = work_book.add_sheet(sheet_name)
# 表头
heads = ['日期', '最高气温', '最低气温', '平均气温']
for i in range(len(heads)):
sheet.write(0, i, heads[i])
# 写入表头后需要从第二行开始写
j = 1
for i in range(len(data_list)):
# 第二行,第一列,下列数面依次类推
sheet.write(j, 0, data_list[i])
sheet.write(j, 1, tem_high[i])
sheet.write(j, 2, tem_low[i])
sheet.write(j, 3, avg[i])
# j加一换到下一行
j += 1
# 保存文件
work_book.save(filename)
print('写入成功')
6.一个有用的python包heapq
# heapq是内置不需要安装
# 导入heapq
import heapq
nums = [4, 5, 21, 6, 9, 25, -1, 7, -10, 55]
# heapq中的函数nlargest可以取出列表最大的几个数
result_max = heapq.nlargest(3, nums)
# [55, 25, 21]
# heapq中的函数nsmallest可以取出列表最小的几个数
result_min = heapq.nsmallest(3, nums)
# [-10, -1, 4]