目录
主题一:数据读取与写入
要求:
(1)读取txt、EXCEL、CSV等至少三种格式的数据,数据内容见附件1;
(2)对读取的数字格式数据进行开根号处理,文本类型的数据保持不变;
(3)数据处理完成后将所有数据按照原格式数据输出。
# -*-coding:utf-8 -*-
"""
作者:XJQ
"""
from numpy import *
import pandas as pd
import numpy as np
import os
import xlrd
from openpyxl import load_workbook
import math
from pandas import read_csv
#读取文件夹文件
def readname(f_path):
filePath = f_path #文件所在位置
name = os.listdir(filePath)
return name
def date(para):
delta = pd.Timedelta(str(int(para))+'days')
time = pd.to_datetime('1899-12-30') + delta
return time
def do_excel(path,name):
books1 = pd.read_excel(path+'\\'+name, header=0, sheet_name='Sheet1')
books1['时间'] = books1['时间'].apply(date)
books1['数据1'] = books1['数据1'].apply(math.sqrt)
books1['数据2'] = books1['数据2'].apply(math.sqrt)
writer = pd.ExcelWriter(path+'\data_sqrt.xlsx')
books1.to_excel(writer, sheet_name='data_sqrt', index=False)
writer.save()
return
def do_txt(path,name):
returnMat = pd.read_csv(path+'\\'+name, header = 0, delimiter='\t',encoding='gbk')
returnMat['数据1'] = returnMat['数据1'].apply(math.sqrt)
returnMat['数据2'] = returnMat['数据2'].apply(math.sqrt)
np.savetxt(path+'\\data_sqrt.txt', returnMat, fmt='%s')
return
def do_csv(path,name):
# 使用Pandas导入csv数据
filename = path+'\\'+name
data = read_csv(filename,header=0,encoding='gbk')
data['数据1'] = data['数据1'].apply(math.sqrt)
data['数据2'] = data['数据2'].apply(math.sqrt)
data.to_csv(path+'\\'+'data_sqrt.csv')
def f_into():
#输入文件夹所在位置,例如'C:\Users\86136\Desktop\read_data'
f_path = input("请输入文件夹地址:")
name = readname(f_path)
for i in name:
if ('xlsx' in i):
do_excel(f_path,i)
if('txt' in i):
do_txt(f_path,i)
if('csv' in i):
do_csv(f_path,i)
if __name__ == "__main__":
f_into()
"""
作者:LK
"""
import pandas as pd
# txt读写
def data_txt(data_path, save_path):
data = pd.read_table(data_path, encoding='GBK') # 读取数据
process_list = ['数据1', '数据2'] # 待处理列名
for i in process_list:
data[i] = [round(x**0.5, 2) for x in data[i]] # 数据处理
data.to_csv(save_path, sep=' ', index=0,header=0) # 数据写入
# csv读写
def data_csv(data_path, save_path):
data = pd.read_csv(data_path, encoding='GBK')
process_list = ['数据1', '数据2']
for i in process_list:
data[i] = [round(x**0.5, 2) for x in data[i]]
data.to_csv(save_path, sep=',', index=0, header=0)
# excel读写
def data_xls(data_path, save_path):
data = pd.read_excel(data_path)
process_list = ['数据1', '数据2']
for i in process_list:
data[i] = [round(x**0.5, 2) for x in data[i]]
data.to_excel(save_path)
if __name__ == '__main__':
data_path = './data.xlsx' # 数据路径
save_path = './data_output.xlsx' # 保存路径
data_xls(data_path, save_path) # 数据读写
"""
作者:HWY
"""
import pandas as pd
import math
# 读取 txt 文件
df1 = pd.read_csv(r'C:\Users\郝文宇\Desktop\开根号.txt', delimiter='\t')
# 读取 Excel 文件
df2 = pd.read_excel(r'C:\Users\郝文宇\Desktop\开根号.xlsx')
# 读取 CSV 文件
df3 = pd.read_csv(r'C:\Users\郝文宇\Desktop\开根号.csv')
# 定义函数:开根号处理数字数据
def sqrt_if_number(x):
if isinstance(x, (int, float)) and x > 0:
return math.sqrt(x)
else:
return x
# 对实际值的两列进行开根号处理
df1['数据1'] = df1['数据1'].apply(sqrt_if_number)
df1['数据2'] = df1['数据2'].apply(sqrt_if_number)
df2['数据1'] = df2['数据1'].apply(sqrt_if_number)
df2['数据2'] = df2['数据2'].apply(sqrt_if_number)
df3['数据1'] = df3['数据1'].apply(sqrt_if_number)
df3['数据2'] = df3['数据2'].apply(sqrt_if_number)
# 将所有数据按原格式排列
df1.to_csv('output1.txt', sep='\t', index=False)
df2.to_excel('output2.xlsx', index=False)
df3.to_csv('output3.csv', index=False)
"""
作者:DWK
"""
import pandas as pd
import numpy as np
# 定义一个函数,用于处理数据
def process_data(data):
# 如果数据是数字,对其进行开方处理
if isinstance(data, (int, float)):
return np.sqrt(data)
# 如果数据是文本,则不进行处理
else:
return data
# 读取txt、Excel和CSV文件,并处理数据
txt_data_processed = pd.read_table(r'C:\Users\16370\Desktop\read_data\data.txt', encoding='gbk').applymap(process_data)
excel_data_processed = pd.read_excel(r'C:\Users\16370\Desktop\read_data\data.xlsx').applymap(process_data)
csv_data_processed = pd.read_csv(r'C:\Users\16370\Desktop\read_data\data.csv', encoding='gbk').applymap(process_data)
# 将处理后的数据写回文件
txt_data_processed.to_csv('data_processed.txt', sep='\t', index=False)
excel_data_processed.to_excel('data_processed.xlsx', index=False)
csv_data_processed.to_csv('data_processed.csv', index=False)
"""
作者:CYF
"""
import math
import pandas as pd
import numpy as np
import xlrd
import xlwt
def csv():
newData = []
rand = np.random.default_rng()
idx = []
with open('./py_csv.csv', encoding='GBK') as f:
data = pd.read_csv(f)
i = 0
for line in data.values:
newData.append([line[0], math.sqrt(line[1]), math.sqrt(line[2])])
idx.append(i)
i += 1
rand.shuffle(newData)
file = pd.DataFrame(newData, columns=['日期', '数据1', '数据2'])
file.index = idx
file.to_csv('./new_py_csv.csv')
def txt():
newData = []
rand = np.random.default_rng()
data = pd.read_csv('./py_txt.txt')
for line in data.values:
newData.append([line[0], math.sqrt(line[1]), math.sqrt(line[2])])
rand.shuffle(newData)
file = pd.DataFrame(newData, columns=['日期', '数据1', '数据2'])
file.to_csv('./new_py_txt.txt', index=False, sep=' ')
def excel():
file = xlrd.open_workbook('./py_excel.xlsx')
sheet = file.sheet_by_index(0)
wbk = xlwt.Workbook(encoding='utf-8')
newSheet = wbk.add_sheet('Sheet1', cell_overwrite_ok=True)
for j in range(0, sheet.ncols):
newSheet.write(0, j, sheet.col_values(j)[0])
for i in range(1, sheet.nrows):
for j in range(0, sheet.ncols):
if j > 0:
newSheet.write(i, j, math.sqrt(sheet.row_values(i)[j]))
else:
newSheet.write(i, j, sheet.row_values(i)[j])
wbk.save('new_py_excel.xls')
csv()
txt()
excel()
"""
作者:ZK
"""
import pandas as pd
import math
# 将 Excel 转换为 TXT 文件
def excel_to_txt(excel_file_path, txt_file_path):
df = pd.read_excel(excel_file_path)
df.to_csv(txt_file_path, sep='\t', index=False)
# 将 Excel 转换为 CSV 文件
def excel_to_csv(excel_file_path, csv_file_path):
df = pd.read_excel(excel_file_path)
df.to_csv(csv_file_path, index=False)
# 读取 TXT 文件,对数字开根号处理,并输出为 TXT格式
def read_txt(txt_file_path):
df = pd.read_csv(txt_file_path, header=0,delimiter='\t',encoding='utf-8')
df['数据1'] = df['数据1'].apply(math.sqrt)
df['数据2'] = df['数据2'].apply(math.sqrt)
df.to_csv(r'E:\Pythonxx\data_out.txt', sep='\t', index=False)
# 读取 CSV 文件,对数字开根号处理,并输出为 CSV格式
def read_csv(csv_file_path):
df = pd.read_csv(csv_file_path,header=0,encoding='utf-8')
df['数据1'] = df['数据1'].apply(math.sqrt)
df['数据2'] = df['数据2'].apply(math.sqrt)
df.to_csv(r'E:\Pythonxx\data_out.csv', index=False)
# 读取 Excel 文件,对数字开根号处理,并输出为Excel 格式
def read_excel(excel_file_path):
df = pd.read_excel(excel_file_path,header=0)
df['数据1'] = df['数据1'].apply(math.sqrt)
df['数据2'] = df['数据2'].apply(math.sqrt)
df.to_excel(r'E:\Pythonxx\data_out.xlsx', index=False)
# 实数转时间
def date(para):
delta = pd.Timedelta(str(int(para))+'days')
time = pd.to_datetime('1899-12-30') + delta
return time
# 文件路径
excel_file_path =r'E:\Pythonxx\data-excel.xlsx'
txt_file_path = r'E:\Pythonxx\data-txt.txt'
csv_file_path = r'E:\Pythonxx\data-csv.csv'
# 将 Excel 转换为 TXT 和 CSV
excel_to_txt(excel_file_path, txt_file_path)
excel_to_csv(excel_file_path, csv_file_path)
# 读取 TXT、CSV 和 Excel 文件,并输出为三种格式
read_txt(txt_file_path)
read_csv(csv_file_path)
read_excel(excel_file_path)
主题二:Python GDAL库研究
GDAL(Geospatial Data Abstraction Library)是一个在X/MIT许可协议下的开源栅格空间数据转换库。它利用抽象数据模型来表达所支持的各种。它还有一系列命令行工具来进行数据转换和处理。
OGR是GDAL项目的一个分支,提供对矢量数据的支持。
有很多著名的GIS类产品都使用了GDAL/OGR库,包括ESRI的ARCGIS 9.3,Google Earth和跨平台的GRASS GIS系统。利用GDAL/OGR库,可以使基于Linux的地理空间数据管理系统提供对矢量和栅格文件数据的支持。
参考资料:
《GDAL源码剖析与开发指南》李民录著 人民邮电出版社
附件
附件一:数据内容
时间 | 数据1 | 数据2 |
2023年3月16日 | 1 | 0.5 |
2023年3月17日 | 2 | 0.6 |
2023年3月18日 | 3.2 | 0.7 |
2023年3月19日 | 4 | 0.5 |