Python开发之处理常见的txt、excel、csv文档
前言:主要是python处理数据常见的txt、excel、csv文件数据!
(一)txt文件处理
1.读取小文件
代码:
def read_small_txt():
"""读取小文件"""
file_name = "data.txt"
with open(file_name, "r", encoding="utf-8") as f:
print(f.read())
2.按行读取大文件
代码:
def read_large_txt():
"""按行读取大文件"""
file_name = "data.txt"
file = open(file_name, "r", encoding="utf-8")
for line in file:
line = line.rstrip("\n")
if isinstance(line, str) and line != "":
print(line) #这里读取的数据可以进行字典或者元组或者list的转换,根据具体的文本形式
file.close()
3.写文件
代码:
def write_txt():
"""写文件"""
with open("data2.txt", 'w',encoding="utf-8") as f:
f.write('Hello, python!')
4.追加写文件
多用于爬虫的时候或者从服务器获取很多的数据的时候,获取一行数据,保存一条数据。
代码:
def write_zhujia_txt():
"""追加写文件"""
with open("data2.txt", "a+", encoding="utf-8") as f:
f.write("我是追加的内容.........." + "\n")
5.具有一定格式的txt转换成excel文件
比如:
可以发现事每三行是一个类别。所以是可以转换成excel的
代码:
import pandas as pd
def txt_to_df(file_name):
file = open(file_name, "r", encoding = "utf-8")
df = pd.DataFrame() # 保存结果
tmp_dict = dict() # 当前行
index = 0 # 计数, 3行一条记录
for line in file:
line = line.rstrip("\n")
if isinstance(line, str) and line != "":
tmp_dict["编号"] = 2018
if index == 0:
tmp_dict["项目名称"] = line
index += 1
elif index == 1:
tmp_dict["合作单位"] = line
index += 1
elif index == 2:
tmp_dict["主要完成人"] = line
df = df.append(tmp_dict, ignore_index = True)
index = 0
return df
if __name__ == '__main__':
df = txt_to_df("安徽省人民政府关于2015年度安徽省科学技术奖励的决定.txt")
print(df)
file_path = pd.ExcelWriter('安徽省人民政府关于2015年度安徽省科学技术奖励的决定.xlsx')
df.to_excel(file_path, encoding='utf-8', index=False)
file_path.save()
pass
(二)excel文件处理
本次介绍主要使用pandas来处理文件,如果你对pandas不太熟悉,可以看看我的另外一个博客《Python开发之Pandas的简单使用(一)》
《Python开发之Pandas的简单使用(二)》
这里推荐一个人的博客《使用pandas读取excel》
data.xlsx的内容格式:
1.读取excel的单列数据,返回list
代码:
import pandas as pd
def read01_excel_data():
"""
https://blog.csdn.net/weixin_38546295/article/details/83537558
返回单列的list,用元组接收
:return: 元组
"""
file_path = "data.xlsx"
df = pd.read_excel(file_path, sheet_name=0)
title1_list = df['title1'].tolist()
title2_list = df['title2'].tolist()
title3_list = df['title3'].tolist()
return title1_list, title2_list, title3_list
2.读取excel文件,返回list 元组数据
def read02_excel_data():
"""
:return: [(),(),()]
"""
file_path = "data.xlsx"
df = pd.read_excel(file_path, sheet_name=0)
title1_list = df['title1'].tolist()
title2_list = df['title2'].tolist()
title3_list = df['title3'].tolist()
data_list = []
for i,j,k in zip(title1_list,title2_list,title3_list):
data_tup = (i,j,k)
data_list.append(data_tup)
return data_list
3.读取excel文件,返回list 字典数据
def read03_excel_data():
"""
:return: [{},{},{}]
"""
file_path = "data.xlsx"
df = pd.read_excel(file_path, sheet_name=0)
title1_list = df['title1'].tolist()
title2_list = df['title2'].tolist()
title3_list = df['title3'].tolist()
data_list = []
for i,j,k in zip(title1_list,title2_list,title3_list):
data_dict = {"title1":i,"title2":j,"title3":k}
data_list.append(data_dict)
return data_list
4.list保存成excel文件(一)
def save_excel():
data_df = pd.DataFrame()
data_dict = {}
for i in range(100):
data_dict["标题1"] = "我是内容{}".format(i)
data_df = data_df.append(data_dict, ignore_index=True)
with pd.ExcelWriter("data2.xlsx") as w:
data_df.to_excel(w, encoding="utf-8", index=False, sheet_name="sheet01")
5.list保存成excel文件(二)
def save2_excel():
title = {'标题1': [], '标题2': []}
for i in range(100):
title['标题1'].append("我是标题1的{}".format(i))
title['标题2'].append("我是标题2的{}".format(i))
data_df = pd.DataFrame.from_dict(title)
with pd.ExcelWriter("data3.xlsx") as w:
data_df.to_excel(w, encoding="utf-8", index=False, sheet_name="sheet01")
(三)csv文件处理
csv文件内容形式:
1.csv文件的读取
代码:
import pandas as pd
def read_csv_data():
df = pd.read_csv("data.csv")
df = df["num1"].tolist()
print(df)
2.list转存成csv文件
import pandas as pd
list = [[1, 2, 3], [4, 5, 6], [7, 9, 9]]
name = ['a', 'b', 'c']
test = pd.DataFrame(columns=name, data=list) # 数据有三列,a,b,c
print(test)
test.to_csv('data.csv', encoding='utf-8')
再见!