数据处理文件相互转化

数据处理

这里主要整理的是.txt, .json, .csv, .excel, .pkl 文件之间的数据存储和转化

1. txt部分

1.1 List[dict] 互转 txt

在这里插入图片描述

具体代码

def save_jsonlist_txt(json_data, target_path):
    """ 保存json列表到txt文件中
    """
    with open(target_path, "w", encoding="utf-8") as file:
        for item in json_data:
            item = str(item).replace("\n", "\\n")
            file.write(item)
            file.write("\n")


def load_jsonlist_txt(source_path):
    """ 从txt文件中加载json列表
    """
    json_list = []
    try:
        with open(source_path, "r", encoding="utf-8") as file:
            for line in file:
                try:
                    json_data = eval(line.strip())
                    json_list.append(json_data)
                except json.JSONDecodeError:
                    print(f"无法解析Json数据: {line.strip()}")
            return json_list
    except FileNotFoundError:
        print(f"文件 '{source_path} 未找到'")

1.2 List[str] 互转 txt

在这里插入图片描述

def save_str_list_txt(str_list, target_path):
    """ 保存str列表到txt文件中
    """
    with open(target_path, "w", encoding="utf-8") as file:
        for item in str_list:
            item = item.replace("\n", "\\n")
            file.write(item)
            file.write("\n")


def load_strlist_txt(file_path):
    """ 从txt文件中加载str列表
    """
    txt_list = []
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            for line in file.readlines():
                txt_list.append(line.strip())
            return txt_list
    except FileNotFoundError:
        print(f"文件 ‘{file_path} 未找到’")

1.3 txt相关操作的整体代码

import json


def save_json_list_txt(json_data, target_path):
    """ 保存json列表到txt文件中
    """
    with open(target_path, "w", encoding="utf-8") as file:
        for item in json_data:
            item = str(item).replace("\n", "\\n")
            file.write(item)
            # file.write(str(item))
            file.write("\n")


def load_json_list_txt(source_path):
    """ 从txt文件中加载json列表
    """
    json_list = []
    try:
        with open(source_path, "r", encoding="utf-8") as file:
            for line in file:
                try:
                    json_data = eval(line.strip())
                    json_list.append(json_data)
                except json.JSONDecodeError:
                    print(f"无法解析Json数据: {line.strip()}")
            return json_list
    except FileNotFoundError:
        print(f"文件 '{source_path} 未找到'")


def save_str_list_txt(str_list, target_path):
    """ 保存str列表到txt文件中
    """
    with open(target_path, "w", encoding="utf-8") as file:
        for item in str_list:
            item = item.replace("\n", "\\n")
            file.write(item)
            file.write("\n")


def load_str_list_txt(file_path):
    """ 从txt文件中加载str列表
    """
    txt_list = []
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            for line in file.readlines():
                txt_list.append(line.strip())
            return txt_list
    except FileNotFoundError:
        print(f"文件 ‘{file_path} 未找到’")




if __name__ == '__main__':

    human_list = [
        {
            "name": "jordan\n",
            "age": 30,
            "city": "南昌",
            "list": [3, 6, 0]
        },
        {
            "name": "John",
            "age": 30,
            "city": "广州",
            "list": [3, 6, 1]
        },
        {
            "name": "JiWang",
            "age": 30,
            "city": "深圳",
            "list": [2, 3, 1]
        },
    ]
    day_list = ["星期一\n\n", "星期二", "星期三", "星期四", "星期⑤", "星期六", "星期天"]

    save_json_list_txt(human_list, "human_list.txt")
    new_human_list = load_json_list_txt("human_list.txt")
    print(new_human_list)
    # print(type(new_human_list))
    # print(type(new_human_list[0]))
    save_str_list_txt(day_list, "day.txt")
    new_day_list = load_str_list_txt("day.txt")
    print(new_day_list)

2. json格式部分

这里建议了解一下json.load, json.loads, json.dump, json.dumps的区别

2.1 List[dict] 互转 json

在这里插入图片描述
具体方法代码

def save_jsonline_json(json_list, target_path):
    """
    json_list ---> jsonline
    :param json_list: 
    :param target_path: 
    :return: 
    """
    with open(target_path, 'w', encoding="utf-8") as json_file:
        for item in json_list:
            json.dump(item, json_file, ensure_ascii=False)
            json_file.write("\n")
    print(f"Json列表已经保存到 {target_path} 文件中。 每一行为一个json对象")


def load_jsonline_json(source_file):
    """
    jsonline --> json_list
    :param source_file: 
    :return: json_list
    """
    json_list = []
    with open(source_file, "r", encoding="utf-8") as json_file:
        for line in json_file:
            try:
                json_data = json.loads(line.strip())
                json_list.append(json_data)
            except json.JSONDecodeError as e:
                print(f"JSON解析错误: {e}")
        return json_list

2.2 ditc, List[dict] 互转json文件

场景1:
在这里插入图片描述
场景2:
在这里插入图片描述

def save_json(json_list, target_path):
    """save_json"""
    with open(target_path, "w", encoding="utf-8") as json_file:
        json.dump(json_list,  json_file, indent=4, ensure_ascii=False)
    print(f"Json列表已经保存到 {target_path} 文件中。")

def load_json(source_file):
    """load_json"""
    json_list = []
    with open(source_file, "r", encoding="utf-8") as json_file:
        try:
            json_list = json.load(json_file)
        except json.JSONDecodeError as e:
            print(f"JSON解析错误: {e}")
        return json_list

2.3 List[dict] 互转 excel

场景:
在这里插入图片描述
这里需要注意excel转回来的json, 向list这种他是str类型的, 不会保留list类型, 如需进一步操作需要eval等操作

def save_jsonlist_excel(json_list, excel_targe_file):
    """jsonlist_trans_excel"""
    df = pd.DataFrame(json_list)
    df.to_excel(excel_targe_file, index=False)


def load_jsonlist_excel(excel_source_path):
    """excel_trans_jsonlist"""
    df = pd.read_excel(excel_source_path)
    json_list = df.to_dict(orient="records")
    return json_list

2.4 jsonlist 转化为 csv

在这里插入图片描述

def save_jsonlist_csv(data_list, csv_target_path):
	"""save_jsonlist_csv"""
    with open(csv_target_path, 'w', newline='', encoding="utf-8") as csv_file:
        fieldnames = data_list[0].keys()  # 使用第一个字典的键作为列名
        csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        csv_writer.writeheader()  # 写入 CSV 头部(列名)
        for data in data_list:
            csv_writer.writerow(data)

2.5 json相关操作整体代码

import json
import pandas as pd
import csv


def save_json(json_list, target_path):
    """save_json"""
    with open(target_path, "w", encoding="utf-8") as json_file:
        json.dump(json_list,  json_file, indent=4, ensure_ascii=False)
    print(f"Json列表已经保存到 {target_path} 文件中。")


def load_json(source_file):
    """load_json"""
    json_list = []
    with open(source_file, "r", encoding="utf-8") as json_file:
        try:
            json_list = json.load(json_file)
        except json.JSONDecodeError as e:
            print(f"JSON解析错误: {e}")
        return json_list


def save_jsonline_json(json_list, target_path):
    """
    json_list ---> jsonline
    :param json_list:
    :param target_path:
    :return:
    """
    with open(target_path, 'w', encoding="utf-8") as json_file:
        for item in json_list:
            json.dump(item, json_file, ensure_ascii=False)
            json_file.write("\n")
    print(f"Json列表已经保存到 {target_path} 文件中。 每一行为一个json对象")


def load_jsonline_json(source_file):
    """
    jsonline --> json_list
    :param source_file:
    :return: json_list
    """
    json_list = []
    with open(source_file, "r", encoding="utf-8") as json_file:
        for line in json_file:
            try:
                json_data = json.loads(line.strip())
                json_list.append(json_data)
            except json.JSONDecodeError as e:
                print(f"JSON解析错误: {e}")
        return json_list


def save_jsonlist_excel(json_list, excel_targe_file):
    """jsonlist_trans_excel"""
    df = pd.DataFrame(json_list)
    df.to_excel(excel_targe_file, index=False)


def load_jsonlist_excel(excel_source_path):
    """excel_trans_jsonlist"""
    df = pd.read_excel(excel_source_path)
    json_list = df.to_dict(orient="records")
    return json_list


def save_jsonlist_csv(data_list, csv_target_path):
    """save_jsonlist_csv"""
    with open(csv_target_path, 'w', newline='', encoding="utf-8") as csv_file:
        fieldnames = data_list[0].keys()  # 使用第一个字典的键作为列名
        csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        csv_writer.writeheader()  # 写入 CSV 头部(列名)
        for data in data_list:
            csv_writer.writerow(data)



if __name__ == '__main__':

    human_list = [{"name": "jordan", "age": 30, "city": "南昌", "list": [3, 6, 0]},
                  {"name": "John", "age": 30, "city": "广州", "list": [3, 6, 1]},
                  {"name": "JiWang", "age": 30, "city": "深圳", "list": [2, 3, 1]}]

    human = {
            "name": "jordan",
            "age": 30,
            "city": "南昌",
            "list": [3, 6, 0]
        }

    # json_path = "json/humanline_list.json"
    # save_jsonline_json(human_list, json_path)
    # new_human_list = load_jsonline_json(json_path)
    # print(new_human_list)
    # save_json(human_list, "json/human_list.json")
    # human_json = load_json("json/human_list.json")
    # print(human_json)

    # save_jsonlist_excel(human_list, "excel/human.xlsx")
    # dic_list = load_jsonlist_excel("excel/human.xlsx")
    # print(dic_list)
    # print(dic_list[0]["list"])
    # print(type(dic_list[0]["list"]))
    save_jsonlist_csv(human_list, "csv/human_list.csv")
    #
    # save_json(human, "json/human.json")
    # human_json = load_json("json/human.json")
    # print(human_json)
    # new_json_list = load_json_list(json_path)
    # print(new_json_list)

3. excel相关代码操作

3.1 excel <----> List[dict]

在这里插入图片描述
函数代码

def save_jsonlist_excel(json_list, excel_targe_file):
    """jsonlist_trans_excel"""
    df = pd.DataFrame(json_list)
    df.to_excel(excel_targe_file, index=False)


def load_jsonlist_excel(excel_source_path):
    """excel_trans_jsonlist"""
    df = pd.read_excel(excel_source_path)
    json_list = df.to_dict(orient="records")
    return json_list

3.2 加载某一列的数据

在这里插入图片描述
函数代码

def load_one_column(excel_source_path, column_name):
    df = pd.read_excel(excel_source_path)
    column = df[column_name]
    return list(column)

3.3 新添加一列

在这里插入图片描述

def add_list_column(excel_source_path, data_list, column_name):
    df = pd.read_excel(excel_source_path)
    df[column_name] = data_list
    df.to_excel(excel_source_path, index=False)

3.4 抽取excel表格中的关键信息列生成新的excel表格

def excel_key_info(excel_source_path, excel_target_path, column_name_list):
    """
    抽取excel表格中的关键信息列生成新的excel表格
    :param excel_source_path: 源文件
    :param excel_target_path: 目标文件
    :param column_name_list: 需要的列名
    :return: 无返回值
    """
    main_df = pd.read_excel(excel_source_path, usecols=column_name_list)    #
    main_df.to_excel(excel_target_path, index=False)

3.5 excel 表格内容转化为jsonline文件

转所有所有信息或者是关键信息
在这里插入图片描述

def excel_trans_jsonline(excel_source_path, json_target_path, column_name_list=None):
    """
    excel 表格内容转化为jsonline文件
    column_name_list 里面有值那么只转化关键信息 否则全部转化
    """
    if column_name_list:
        main_df = pd.read_excel(excel_source_path, usecols=column_name_list)
    else:
        main_df = pd.read_excel(excel_source_path)
    json_list = main_df.to_dict(orient="records")
    with open(json_target_path, 'w', encoding="utf-8") as json_file:
        for item in json_list:
            json.dump(item, json_file, ensure_ascii=False)
            json_file.write("\n")
    print(f"Json列表已经保存到 {excel_source_path} 文件中。 每一行为一个json对象")

3.6 读取前n行数据

def load_top_n_excel(excel_source_path, rows):
    """读取前n行数据"""
    df = pd.read_excel(excel_source_path).head(rows)

3.7 合并多个excel表格

def merge_excel(excel_list_path, excel_target_path):
    """合并多个excel表格"""
    df_list = []
    for current_excel_path in excel_list_path:
        current_df = pd.read_excel(current_excel_path)
        df_list.append(current_df)

    merge_df = pd.concat(df_list)
    merge_df.to_excel(excel_target_path, index=False)

3.8 删除/添加符合条件的行

在这里插入图片描述

def condition_delete_rows(excel_source_path, excel_target_path, column_name,  delete_list):
    """删除/添加符合条件的数据"""
    df = pd.read_excel(excel_source_path)
    df = df[~df[column_name].isin(delete_list)]  # 如果没有~就是表示添加
    df.to_excel(excel_target_path, index=False)

3.9 总体代码

import pandas as pd
import json


def save_jsonlist_excel(json_list, excel_targe_file):
    """jsonlist_trans_excel"""
    df = pd.DataFrame(json_list)
    df.to_excel(excel_targe_file, index=False)


def load_jsonlist_excel(excel_source_path):
    """excel_trans_jsonlist"""
    df = pd.read_excel(excel_source_path)
    json_list = df.to_dict(orient="records")
    return json_list


def load_one_column(excel_source_path, column_name):
    df = pd.read_excel(excel_source_path)
    column = df[column_name]
    return list(column)


def add_list_column(excel_source_path, data_list, column_name):
    df = pd.read_excel(excel_source_path)
    df[column_name] = data_list
    df.to_excel(excel_source_path, index=False)


def excel_key_info(excel_source_path, excel_target_path, column_name_list):
    """
    抽取excel表格中的关键信息列生成新的excel表格
    :param excel_source_path: 源文件
    :param excel_target_path: 目标文件
    :param column_name_list: 需要的列名
    :return: 无返回值
    """
    main_df = pd.read_excel(excel_source_path, usecols=column_name_list)    #
    main_df.to_excel(excel_target_path, index=False)


def excel_trans_jsonline(excel_source_path, json_target_path, column_name_list=None):
    """
    excel 表格内容转化为jsonline文件
    column_name_list 里面有值那么只转化关键信息 否则全部转化
    """
    if column_name_list:
        main_df = pd.read_excel(excel_source_path, usecols=column_name_list)
    else:
        main_df = pd.read_excel(excel_source_path)
    json_list = main_df.to_dict(orient="records")
    with open(json_target_path, 'w', encoding="utf-8") as json_file:
        for item in json_list:
            json.dump(item, json_file, ensure_ascii=False)
            json_file.write("\n")
    print(f"Json列表已经保存到 {excel_source_path} 文件中。 每一行为一个json对象")


def load_top_n_excel(excel_source_path, rows):
    """读取前n行数据"""
    df = pd.read_excel(excel_source_path).head(rows)
    print(df)


def merge_excel(excel_list_path, excel_target_path):
    """合并多个excel表格"""
    df_list = []
    for current_excel_path in excel_list_path:
        current_df = pd.read_excel(current_excel_path)
        df_list.append(current_df)

    merge_df = pd.concat(df_list)
    merge_df.to_excel(excel_target_path, index=False)


def condition_delete_rows(excel_source_path, excel_target_path, column_name,  delete_list):
    """删除/添加符合条件的数据"""
    df = pd.read_excel(excel_source_path)
    df = df[~df[column_name].isin(delete_list)]  # 如果没有~就是表示添加
    df.to_excel(excel_target_path, index=False)


if __name__ == '__main__':
    human_list = [{"name": "jordan", "age": 30, "city": "南昌", "list": [3, 6, 0]},
                  {"name": "John", "age": 30, "city": "广州", "list": [3, 6, 1]},
                  {"name": "JiWang", "age": 30, "city": "深圳", "list": [2, 3, 1]},
                  {"name": "kobe", "age": 38, "city": "南昌", "list": [1, 2, 3]}]
    index_list = [1, 2, 3, 4]
    save_jsonlist_excel(human_list, "human.xlsx")
    city_list = load_one_column("human.xlsx", "city")
    print(city_list)
    print(type(city_list))
    add_list_column("human.xlsx", index_list, "index")

    condition_delete_rows("human.xlsx", "human_delete.xlsx", "city", ["深圳", "广州"])

4. json.load, loads, dump, dumps的区别

json.load, json.loads, json.dump, 和 json.dumps 是 Python 中用于处理 JSON 数据的函数,它们的作用和用法有所不同:

4.1 json.load():

从文件中读取 JSON 数据并将其解析为 Python 对象。
多用于加载json格式的文件, 返回一个json对象或者json列表

import json

with open('data.json', 'r') as file:
    data = json.load(file)

4.2 json.loads(): str —> dict/json

将 JSON 格式的字符串解析为 Python 对象。

import json

json_string = '{"name": "John", "age": 30, "city": "New York"}'
data = json.loads(json_string)  # str ---> dict/json
print(type(data))
print(data)

输出结果
在这里插入图片描述

4.3 json.dump():

将 Python 对象序列化为 JSON 格式并写入文件

import json

data = {"name": "John", "age": 30, "city": "New York"}
with open('output.json', 'w') as file:
    json.dump(data, file)

4.4 json.dumps(): json/dict --> str

将 Python 对象序列化为 JSON 格式的字符串。
注意ensure_ascii=False的作用

import json
data = {"name": "中国", "age": 30, "city": "New York"}
json_string_one = json.dumps(data)
json_string_two = json.dumps(data, ensure_ascii=False)
print(type(json_string_one))
print(json_string_one)
print(json_string_two)

输出结果:
在这里插入图片描述

4.5 总结:

json.load 用于从文件中加载 JSON 数据。
json.loads 用于从 JSON 字符串中加载 JSON 数据。
json.dump 用于将 JSON 数据写入文件。
json.dumps 用于将 Python 对象转换为 JSON 格式的字符串。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值