python使用pandas分块读取大csv文件并根据字段值分割

用datetime 处理表格中的时间,计算两列的时间差

import os
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sqlalchemy.types import NVARCHAR,INT,DECIMAL,FLOAT
from datetime import datetime, date

import re


# 文件路径
path = 'E:\\_ToDo_\\data\\data_zd.csv'
print('this is paths')
print(path)

#计算时间差的函数
def calculate_time_difference(start_time, end_time):
    # 将时间字符串转换为datetime对象
    start = datetime.strptime(start_time, "%Y/%m/%d %H:%M")
    end = datetime.strptime(end_time, "%Y/%m/%d %H:%M")
    
    # 计算时间差,并以分钟为单位返回结果
    time_difference = (end - start).total_seconds() / 60
    
    return time_difference


j = 0

print('正在处理:' + path)
data_list = []                  # 存储数据的列表
df = pd.read_csv(path)
# df= df.drop(' ', 1)         # 删除空白列
df = df.fillna(value='未知')   # 将缺失的数据填充为无

i = 0
for index, row in df.iterrows():
    infos = {}
    try:
        date = row['date']
    except:
        date = row[' date']
    try:
        batch = row['batch']
    except:
        batch = row[' batch']
    try:
        brand = row['brand']
    except:
        brand = row[' brand']
    try:
        type = row['type']
    except:
        type = row[' type']
    try:
        group = row['group']
    except:
        group = row[' group']
    try:
        state = row['state']
    except:
        state = row[' state']
    try:
        property = row['property']
    except:
        property = row[' property']
    try:
        begintime = row['begintime']
    except:
        begintime = row[' begintime']
    try:
        endtime = row['endtime']
    except:
        endtime = row[' endtime']

    infos['date'] = date
    infos['batch'] = batch
    infos['brand'] = brand
    infos['type'] = type
    infos['group'] = group
    infos['state'] = state
    infos['property'] = property
    infos['begintime'] = begintime
    infos['endtime'] = endtime

    time = calculate_time_difference(begintime, endtime)
    infos['time'] = time


    data_list.append(infos)

    # print(infos)
    if index % 10 == 0:
        print('--------'+str(index)+'----------')
    # print(data_list)



#根据提取出来的数据创建一个新的Dataframe
df = pd.DataFrame(data_list)


df.to_csv('E:\\_ToDo_\\data_processed3.csv',mode='a',index=False,header=False) 
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值