数据处理
这里主要整理的是.txt
, .json
, .csv
, .excel
, .pkl
文件之间的数据存储和转化
1. txt部分
1.1 List[dict] 互转 txt
具体代码
def save_jsonlist_txt(json_data, target_path):
""" 保存json列表到txt文件中
"""
with open(target_path, "w", encoding="utf-8") as file:
for item in json_data:
item = str(item).replace("\n", "\\n")
file.write(item)
file.write("\n")
def load_jsonlist_txt(source_path):
""" 从txt文件中加载json列表
"""
json_list = []
try:
with open(source_path, "r", encoding="utf-8") as file:
for line in file:
try:
json_data = eval(line.strip())
json_list.append(json_data)
except json.JSONDecodeError:
print(f"无法解析Json数据: {line.strip()}")
return json_list
except FileNotFoundError:
print(f"文件 '{source_path} 未找到'")
1.2 List[str] 互转 txt
def save_str_list_txt(str_list, target_path):
""" 保存str列表到txt文件中
"""
with open(target_path, "w", encoding="utf-8") as file:
for item in str_list:
item = item.replace("\n", "\\n")
file.write(item)
file.write("\n")
def load_strlist_txt(file_path):
""" 从txt文件中加载str列表
"""
txt_list = []
try:
with open(file_path, "r", encoding="utf-8") as file:
for line in file.readlines():
txt_list.append(line.strip())
return txt_list
except FileNotFoundError:
print(f"文件 ‘{file_path} 未找到’")
1.3 txt相关操作的整体代码
import json
def save_json_list_txt(json_data, target_path):
""" 保存json列表到txt文件中
"""
with open(target_path, "w", encoding="utf-8") as file:
for item in json_data:
item = str(item).replace("\n", "\\n")
file.write(item)
# file.write(str(item))
file.write("\n")
def load_json_list_txt(source_path):
""" 从txt文件中加载json列表
"""
json_list = []
try:
with open(source_path, "r", encoding="utf-8") as file:
for line in file:
try:
json_data = eval(line.strip())
json_list.append(json_data)
except json.JSONDecodeError:
print(f"无法解析Json数据: {line.strip()}")
return json_list
except FileNotFoundError:
print(f"文件 '{source_path} 未找到'")
def save_str_list_txt(str_list, target_path):
""" 保存str列表到txt文件中
"""
with open(target_path, "w", encoding="utf-8") as file:
for item in str_list:
item = item.replace("\n", "\\n")
file.write(item)
file.write("\n")
def load_str_list_txt(file_path):
""" 从txt文件中加载str列表
"""
txt_list = []
try:
with open(file_path, "r", encoding="utf-8") as file:
for line in file.readlines():
txt_list.append(line.strip())
return txt_list
except FileNotFoundError:
print(f"文件 ‘{file_path} 未找到’")
if __name__ == '__main__':
human_list = [
{
"name": "jordan\n",
"age": 30,
"city": "南昌",
"list": [3, 6, 0]
},
{
"name": "John",
"age": 30,
"city": "广州",
"list": [3, 6, 1]
},
{
"name": "JiWang",
"age": 30,
"city": "深圳",
"list": [2, 3, 1]
},
]
day_list = ["星期一\n\n", "星期二", "星期三", "星期四", "星期⑤", "星期六", "星期天"]
save_json_list_txt(human_list, "human_list.txt")
new_human_list = load_json_list_txt("human_list.txt")
print(new_human_list)
# print(type(new_human_list))
# print(type(new_human_list[0]))
save_str_list_txt(day_list, "day.txt")
new_day_list = load_str_list_txt("day.txt")
print(new_day_list)
2. json格式部分
这里建议了解一下json.load, json.loads, json.dump, json.dumps的区别
2.1 List[dict] 互转 json
具体方法代码
def save_jsonline_json(json_list, target_path):
"""
json_list ---> jsonline
:param json_list:
:param target_path:
:return:
"""
with open(target_path, 'w', encoding="utf-8") as json_file:
for item in json_list:
json.dump(item, json_file, ensure_ascii=False)
json_file.write("\n")
print(f"Json列表已经保存到 {target_path} 文件中。 每一行为一个json对象")
def load_jsonline_json(source_file):
"""
jsonline --> json_list
:param source_file:
:return: json_list
"""
json_list = []
with open(source_file, "r", encoding="utf-8") as json_file:
for line in json_file:
try:
json_data = json.loads(line.strip())
json_list.append(json_data)
except json.JSONDecodeError as e:
print(f"JSON解析错误: {e}")
return json_list
2.2 ditc, List[dict] 互转json文件
场景1:
场景2:
def save_json(json_list, target_path):
"""save_json"""
with open(target_path, "w", encoding="utf-8") as json_file:
json.dump(json_list, json_file, indent=4, ensure_ascii=False)
print(f"Json列表已经保存到 {target_path} 文件中。")
def load_json(source_file):
"""load_json"""
json_list = []
with open(source_file, "r", encoding="utf-8") as json_file:
try:
json_list = json.load(json_file)
except json.JSONDecodeError as e:
print(f"JSON解析错误: {e}")
return json_list
2.3 List[dict] 互转 excel
场景:
这里需要注意excel转回来的json, 向list这种他是str类型的, 不会保留list类型, 如需进一步操作需要eval等操作
def save_jsonlist_excel(json_list, excel_targe_file):
"""jsonlist_trans_excel"""
df = pd.DataFrame(json_list)
df.to_excel(excel_targe_file, index=False)
def load_jsonlist_excel(excel_source_path):
"""excel_trans_jsonlist"""
df = pd.read_excel(excel_source_path)
json_list = df.to_dict(orient="records")
return json_list
2.4 jsonlist 转化为 csv
def save_jsonlist_csv(data_list, csv_target_path):
"""save_jsonlist_csv"""
with open(csv_target_path, 'w', newline='', encoding="utf-8") as csv_file:
fieldnames = data_list[0].keys() # 使用第一个字典的键作为列名
csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
csv_writer.writeheader() # 写入 CSV 头部(列名)
for data in data_list:
csv_writer.writerow(data)
2.5 json相关操作整体代码
import json
import pandas as pd
import csv
def save_json(json_list, target_path):
"""save_json"""
with open(target_path, "w", encoding="utf-8") as json_file:
json.dump(json_list, json_file, indent=4, ensure_ascii=False)
print(f"Json列表已经保存到 {target_path} 文件中。")
def load_json(source_file):
"""load_json"""
json_list = []
with open(source_file, "r", encoding="utf-8") as json_file:
try:
json_list = json.load(json_file)
except json.JSONDecodeError as e:
print(f"JSON解析错误: {e}")
return json_list
def save_jsonline_json(json_list, target_path):
"""
json_list ---> jsonline
:param json_list:
:param target_path:
:return:
"""
with open(target_path, 'w', encoding="utf-8") as json_file:
for item in json_list:
json.dump(item, json_file, ensure_ascii=False)
json_file.write("\n")
print(f"Json列表已经保存到 {target_path} 文件中。 每一行为一个json对象")
def load_jsonline_json(source_file):
"""
jsonline --> json_list
:param source_file:
:return: json_list
"""
json_list = []
with open(source_file, "r", encoding="utf-8") as json_file:
for line in json_file:
try:
json_data = json.loads(line.strip())
json_list.append(json_data)
except json.JSONDecodeError as e:
print(f"JSON解析错误: {e}")
return json_list
def save_jsonlist_excel(json_list, excel_targe_file):
"""jsonlist_trans_excel"""
df = pd.DataFrame(json_list)
df.to_excel(excel_targe_file, index=False)
def load_jsonlist_excel(excel_source_path):
"""excel_trans_jsonlist"""
df = pd.read_excel(excel_source_path)
json_list = df.to_dict(orient="records")
return json_list
def save_jsonlist_csv(data_list, csv_target_path):
"""save_jsonlist_csv"""
with open(csv_target_path, 'w', newline='', encoding="utf-8") as csv_file:
fieldnames = data_list[0].keys() # 使用第一个字典的键作为列名
csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
csv_writer.writeheader() # 写入 CSV 头部(列名)
for data in data_list:
csv_writer.writerow(data)
if __name__ == '__main__':
human_list = [{"name": "jordan", "age": 30, "city": "南昌", "list": [3, 6, 0]},
{"name": "John", "age": 30, "city": "广州", "list": [3, 6, 1]},
{"name": "JiWang", "age": 30, "city": "深圳", "list": [2, 3, 1]}]
human = {
"name": "jordan",
"age": 30,
"city": "南昌",
"list": [3, 6, 0]
}
# json_path = "json/humanline_list.json"
# save_jsonline_json(human_list, json_path)
# new_human_list = load_jsonline_json(json_path)
# print(new_human_list)
# save_json(human_list, "json/human_list.json")
# human_json = load_json("json/human_list.json")
# print(human_json)
# save_jsonlist_excel(human_list, "excel/human.xlsx")
# dic_list = load_jsonlist_excel("excel/human.xlsx")
# print(dic_list)
# print(dic_list[0]["list"])
# print(type(dic_list[0]["list"]))
save_jsonlist_csv(human_list, "csv/human_list.csv")
#
# save_json(human, "json/human.json")
# human_json = load_json("json/human.json")
# print(human_json)
# new_json_list = load_json_list(json_path)
# print(new_json_list)
3. excel相关代码操作
3.1 excel <----> List[dict]
函数代码
def save_jsonlist_excel(json_list, excel_targe_file):
"""jsonlist_trans_excel"""
df = pd.DataFrame(json_list)
df.to_excel(excel_targe_file, index=False)
def load_jsonlist_excel(excel_source_path):
"""excel_trans_jsonlist"""
df = pd.read_excel(excel_source_path)
json_list = df.to_dict(orient="records")
return json_list
3.2 加载某一列的数据
函数代码
def load_one_column(excel_source_path, column_name):
df = pd.read_excel(excel_source_path)
column = df[column_name]
return list(column)
3.3 新添加一列
def add_list_column(excel_source_path, data_list, column_name):
df = pd.read_excel(excel_source_path)
df[column_name] = data_list
df.to_excel(excel_source_path, index=False)
3.4 抽取excel表格中的关键信息列生成新的excel表格
def excel_key_info(excel_source_path, excel_target_path, column_name_list):
"""
抽取excel表格中的关键信息列生成新的excel表格
:param excel_source_path: 源文件
:param excel_target_path: 目标文件
:param column_name_list: 需要的列名
:return: 无返回值
"""
main_df = pd.read_excel(excel_source_path, usecols=column_name_list) #
main_df.to_excel(excel_target_path, index=False)
3.5 excel 表格内容转化为jsonline文件
转所有所有信息或者是关键信息
def excel_trans_jsonline(excel_source_path, json_target_path, column_name_list=None):
"""
excel 表格内容转化为jsonline文件
column_name_list 里面有值那么只转化关键信息 否则全部转化
"""
if column_name_list:
main_df = pd.read_excel(excel_source_path, usecols=column_name_list)
else:
main_df = pd.read_excel(excel_source_path)
json_list = main_df.to_dict(orient="records")
with open(json_target_path, 'w', encoding="utf-8") as json_file:
for item in json_list:
json.dump(item, json_file, ensure_ascii=False)
json_file.write("\n")
print(f"Json列表已经保存到 {excel_source_path} 文件中。 每一行为一个json对象")
3.6 读取前n行数据
def load_top_n_excel(excel_source_path, rows):
"""读取前n行数据"""
df = pd.read_excel(excel_source_path).head(rows)
3.7 合并多个excel表格
def merge_excel(excel_list_path, excel_target_path):
"""合并多个excel表格"""
df_list = []
for current_excel_path in excel_list_path:
current_df = pd.read_excel(current_excel_path)
df_list.append(current_df)
merge_df = pd.concat(df_list)
merge_df.to_excel(excel_target_path, index=False)
3.8 删除/添加符合条件的行
def condition_delete_rows(excel_source_path, excel_target_path, column_name, delete_list):
"""删除/添加符合条件的数据"""
df = pd.read_excel(excel_source_path)
df = df[~df[column_name].isin(delete_list)] # 如果没有~就是表示添加
df.to_excel(excel_target_path, index=False)
3.9 总体代码
import pandas as pd
import json
def save_jsonlist_excel(json_list, excel_targe_file):
"""jsonlist_trans_excel"""
df = pd.DataFrame(json_list)
df.to_excel(excel_targe_file, index=False)
def load_jsonlist_excel(excel_source_path):
"""excel_trans_jsonlist"""
df = pd.read_excel(excel_source_path)
json_list = df.to_dict(orient="records")
return json_list
def load_one_column(excel_source_path, column_name):
df = pd.read_excel(excel_source_path)
column = df[column_name]
return list(column)
def add_list_column(excel_source_path, data_list, column_name):
df = pd.read_excel(excel_source_path)
df[column_name] = data_list
df.to_excel(excel_source_path, index=False)
def excel_key_info(excel_source_path, excel_target_path, column_name_list):
"""
抽取excel表格中的关键信息列生成新的excel表格
:param excel_source_path: 源文件
:param excel_target_path: 目标文件
:param column_name_list: 需要的列名
:return: 无返回值
"""
main_df = pd.read_excel(excel_source_path, usecols=column_name_list) #
main_df.to_excel(excel_target_path, index=False)
def excel_trans_jsonline(excel_source_path, json_target_path, column_name_list=None):
"""
excel 表格内容转化为jsonline文件
column_name_list 里面有值那么只转化关键信息 否则全部转化
"""
if column_name_list:
main_df = pd.read_excel(excel_source_path, usecols=column_name_list)
else:
main_df = pd.read_excel(excel_source_path)
json_list = main_df.to_dict(orient="records")
with open(json_target_path, 'w', encoding="utf-8") as json_file:
for item in json_list:
json.dump(item, json_file, ensure_ascii=False)
json_file.write("\n")
print(f"Json列表已经保存到 {excel_source_path} 文件中。 每一行为一个json对象")
def load_top_n_excel(excel_source_path, rows):
"""读取前n行数据"""
df = pd.read_excel(excel_source_path).head(rows)
print(df)
def merge_excel(excel_list_path, excel_target_path):
"""合并多个excel表格"""
df_list = []
for current_excel_path in excel_list_path:
current_df = pd.read_excel(current_excel_path)
df_list.append(current_df)
merge_df = pd.concat(df_list)
merge_df.to_excel(excel_target_path, index=False)
def condition_delete_rows(excel_source_path, excel_target_path, column_name, delete_list):
"""删除/添加符合条件的数据"""
df = pd.read_excel(excel_source_path)
df = df[~df[column_name].isin(delete_list)] # 如果没有~就是表示添加
df.to_excel(excel_target_path, index=False)
if __name__ == '__main__':
human_list = [{"name": "jordan", "age": 30, "city": "南昌", "list": [3, 6, 0]},
{"name": "John", "age": 30, "city": "广州", "list": [3, 6, 1]},
{"name": "JiWang", "age": 30, "city": "深圳", "list": [2, 3, 1]},
{"name": "kobe", "age": 38, "city": "南昌", "list": [1, 2, 3]}]
index_list = [1, 2, 3, 4]
save_jsonlist_excel(human_list, "human.xlsx")
city_list = load_one_column("human.xlsx", "city")
print(city_list)
print(type(city_list))
add_list_column("human.xlsx", index_list, "index")
condition_delete_rows("human.xlsx", "human_delete.xlsx", "city", ["深圳", "广州"])
4. json.load, loads, dump, dumps的区别
json.load
, json.loads
, json.dump
, 和 json.dumps
是 Python 中用于处理 JSON 数据的函数,它们的作用和用法有所不同:
4.1 json.load():
从文件中读取 JSON 数据并将其解析为 Python 对象。
多用于加载json格式的文件, 返回一个json对象或者json列表
import json
with open('data.json', 'r') as file:
data = json.load(file)
4.2 json.loads(): str —> dict/json
将 JSON 格式的字符串解析为 Python 对象。
import json
json_string = '{"name": "John", "age": 30, "city": "New York"}'
data = json.loads(json_string) # str ---> dict/json
print(type(data))
print(data)
输出结果
4.3 json.dump():
将 Python 对象序列化为 JSON 格式并写入文件。
import json
data = {"name": "John", "age": 30, "city": "New York"}
with open('output.json', 'w') as file:
json.dump(data, file)
4.4 json.dumps(): json/dict --> str
将 Python 对象序列化为 JSON 格式的字符串。
注意ensure_ascii=False
的作用
import json
data = {"name": "中国", "age": 30, "city": "New York"}
json_string_one = json.dumps(data)
json_string_two = json.dumps(data, ensure_ascii=False)
print(type(json_string_one))
print(json_string_one)
print(json_string_two)
输出结果:
4.5 总结:
json.load
用于从文件中加载 JSON 数据。
json.loads
用于从 JSON 字符串中加载 JSON 数据。
json.dump
用于将 JSON 数据写入文件。
json.dumps
用于将 Python 对象转换为 JSON 格式的字符串。