pandas读取csv,按每天的时间区间筛选数据

@[TOC]pandas读取csv,按每天的时间区间筛选数据

需求

读取csv内容,以0到6点, 6到12点,12到18点,18点到24点为4个区间,把读取的内容筛进4个区间里
然后获取同周期CRC字段>100超过10次的数据
把数据生成到模板里,输出到csv文件

代码说明

1.读取数据
引用了pandas读取csv数据,方便快捷
data_extract = pd.read_csv(r’OLT上联端口CRC越限告警明细.csv’, engine=‘python’, encoding=‘gb18030’)
2.日期去重
对于设置时间区间上,因为每天都有一个区间,要先读取到日期,由于日期是带有年月日时分秒格式的,这里采用了一个全局列表,把日期先截取到年月日,再放入列表里,对于同一天的日期数据,先判断有没有添加,已添加则不加入列表
3.每天的周期设置
采用timedelta(hours=6),以每天的零点,往后延6,12,18,24小时,分好4个区间
4.dataframe格式数据清洗
dataframe格式的数据有独特的数据筛选方式,比如

data_cleaning_0to6 = data_extract[
            (data_extract['startTime'] >= range_0) & (data_extract['startTime'] <= range_6) & (data_extract['CRC'] > 100)]

5.编写模板

6.数据写入csv

def clientToServer(self, strData):
    today = time.strftime('%Y%m%d', time.localtime(time.time()))
    # self.filename = "/data/FBAndIPTVsocket/result/sourceData/OLTUOOCRCAlarm6%s.csv" % (today)
    # self.filenameOK = "/data/FBAndIPTVsocket/result/sourceDataOK/OLTUOOCRCAlarm6%s.csv.ok" % (today)
    script_path = os.path.dirname(__file__)
    self.filename = script_path+"/OLT上联端口CRC越限告警明细%s.csv" % (today)
    self.filenameOK = script_path+"/OLT上联端口CRC越限告警明细%s.csv.ok" % (today)

    with open(self.filename.encode('utf-8'), "a") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow([strData])
    with open(self.filenameOK.encode('utf-8'), "a") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow([strData])
    cmd = "cp -f %s %s " % (self.filename, self.filenameOK)
        ret = os.system(cmd)
        self.logger.info(str(ret))

全部代码

import csv
import os
import time

import pandas as pd
from datetime import date
from datetime import datetime
from datetime import timedelta
from getLog import FinalLogger

date_list = []
data_lists = []
ds = date.today().strftime(“%Y%m%d”)

class warningProcess():
def init(self):
self.logger = FinalLogger.getLogger()
self.fileName = “”
self.totalWarningCnt = 0

def clientToServer(self, strData):
    today = time.strftime('%Y%m%d', time.localtime(time.time()))
    # self.filename = "/data/FBAndIPTVsocket/result/sourceData/OLTUOOCRCAlarm6%s.csv" % (today)
    # self.filenameOK = "/data/FBAndIPTVsocket/result/sourceDataOK/OLTUOOCRCAlarm6%s.csv.ok" % (today)
    script_path = os.path.dirname(__file__)
    self.filename = script_path+"/OLT上联端口CRC越限告警明细%s.csv" % (today)
    self.filenameOK = script_path+"/OLT上联端口CRC越限告警明细%s.csv.ok" % (today)

    with open(self.filename.encode('utf-8'), "a") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow([strData])
    with open(self.filenameOK.encode('utf-8'), "a") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow([strData])

def alarmListingTemplate(self, data_cleaning):

    for index, row in data_cleaning.iterrows():
        id = row['id'],
        reportTime = row['reportTime'],
        manufacturerName = str(row['manufacturerName']),
        startTime = row['startTime'],
        oltIp = row['oltIp'],
        upLinkPort = row['upLinkPort'],
        CRC = row['CRC'],
        oltName = row['oltName']

        manufacturerName = manufacturerName[0].strip()
        if manufacturerName == 'ZTE':
            manufacturerCHName = '中兴'
            maintain_group = '中兴'
        elif manufacturerName == 'HW':
            manufacturerCHName = '华为'
            maintain_group = '华为贝尔'
        elif manufacturerName == 'BeiEr':
            manufacturerCHName = '贝尔'
            maintain_group = '华为贝尔'
        else:
            manufacturerCHName = '未知'
            maintain_group = '未知'

        strData1 = "<AlarmStart>"
        strData2 = "IntVersion:V1.0.0"
        strData3 = "MsgSerial:"
        strData4 = "AlarmUniqueId:" + str(id[0])
        strData5 = "AlarmUniqueClearId:"
        strData6 = "NeName:" + str(oltName).strip()
        strData7 = "NeIp:" + str(oltIp[0]).strip()
        strData8 = "SystemName:集中性能系统"
        strData9 = "EquipmentClass:"
        strData10 = "Version:V1.0"
        strData11 = "LocateNeName:" + str(upLinkPort[0])
        strData12 = "LocateNeType:"
        strData13 = "LocateInfo:000000000"
        strData14 = "EventTime:" + str(reportTime[0])
        strData15 = "CancelTime:" + str(startTime[0])
        strData16 = "VendorSeverity:三级告警"
        strData17 = "VendorAlarmId:"
        strData18 = "AlarmTitle:" + "OLT上联端口CRC误码越限"
        strData19 = "ProbableCauseTxt:"
        strData20 = "AlarmText:" + "周期内上联端口" + "CRC大于100的次数大于10次"
        strData21 = "AlarmStatus:1"
        strData22 = "Vendor:" + manufacturerCHName
        strData23 = "maintain_group:" + maintain_group
        strData24 = "<AlarmEnd>"
        strDataU8 = "\r\n" + strData1 + "\r\n" + strData2 + "\r\n" + strData3 + "\r\n" + strData4 + "\r\n" + strData5 + "\r\n" + strData6 + "\r\n" + strData7 + "\r\n" + strData8 + "\r\n" + strData9 + "\r\n" + strData10 + "\r\n" + strData11 + "\r\n" + strData12 + "\r\n" + strData13 + "\r\n" + strData14 + "\r\n" + strData15 + "\r\n" + strData16 + "\r\n" + strData17 + "\r\n" + strData18 + "\r\n" + strData19 + "\r\n" + strData20 + "\r\n" + strData21 + "\r\n" + strData22 + "\r\n" + strData23 + "\r\n" + strData24 + "\r\n"
        self.logger.info("生成告警模块")
        self.logger.info(strDataU8)
        print(strDataU8)
        self.clientToServer(strDataU8)
        self.totalWarningCnt = self.totalWarningCnt + 1
        break

def data_to_excel(self, data_cleaning, data_lists):
    for index, row in data_cleaning.iterrows():
        id = row['id']
        reportTime = row['reportTime']
        manufacturerName = row['manufacturerName']
        startTime = row['startTime']
        oltIp = row['oltIp']
        upLinkPort = row['upLinkPort']
        CRC = row['CRC']
        oltName = row['oltName']
        data_list = [id, reportTime, manufacturerName, startTime, oltIp, upLinkPort, CRC, oltName]
        data_lists.append(data_list)

        # 输出到excel
        # col = ['id', 'reportTime', 'manufacturerName', 'startTime', 'oltIp', 'upLinkPort', 'CRC', 'oltName']
        # result = pd.DataFrame(data_lists, columns=col)
        # result.to_excel(excel_writer=r"周期内上联端口CRC大于100的次数大于10次告警输出结果%s.xlsx" % ds, index=False)
        # print('结果输出成功!')

def read_data(self):
    # 读取数据
    data_extract = pd.read_csv(r'OLT上联端口CRC越限告警明细.csv', engine='python', encoding='gb18030')
    data_extract['startTime'] = pd.to_datetime(data_extract['startTime'])
    DateStart = data_extract['startTime']

    # 获取日
    for date in DateStart:
        d = date.strftime("%Y-%m-%d")
        if d not in date_list:
            date_list.append(d)

    # 获取同周期CRC>100超过10次
    for date in date_list:
        range_0 = datetime.strptime(date, "%Y-%m-%d")
        range_6 = range_0 + timedelta(hours=6)
        range_12 = range_0 + timedelta(hours=12)
        range_18 = range_0 + timedelta(hours=18)
        range_24 = range_0 + timedelta(hours=24)

        # 设置区间
        data_cleaning_0to6 = data_extract[
            (data_extract['startTime'] >= range_0) & (data_extract['startTime'] <= range_6) & (
                    data_extract['CRC'] > 100)]
        data_cleaning_6to12 = data_extract[
            (data_extract['startTime'] > range_6) & (data_extract['startTime'] <= range_12) & (
                    data_extract['CRC'] > 100)]
        data_cleaning_12to18 = data_extract[
            (data_extract['startTime'] > range_12) & (data_extract['startTime'] <= range_18) & (
                    data_extract['CRC'] > 100)]
        data_cleaning_18to24 = data_extract[
            (data_extract['startTime'] > range_18) & (data_extract['startTime'] <= range_24) & (
                    data_extract['CRC'] > 100)]

        if len(data_cleaning_0to6) > 10:
            self.alarmListingTemplate(data_cleaning_0to6)

        if len(data_cleaning_6to12) > 10:
            self.alarmListingTemplate(data_cleaning_6to12)

        if len(data_cleaning_12to18) > 10:
            self.alarmListingTemplate(data_cleaning_12to18)

        if len(data_cleaning_18to24) > 10:
            self.alarmListingTemplate(data_cleaning_18to24)

    if self.totalWarningCnt > 0:
        print(self.filename)
        cmd = "cp -f %s %s " % (self.filename, self.filenameOK)
        ret = os.system(cmd)
        self.logger.info(str(ret))
    else:
        self.logger.info("当前告警数为:%d" % self.totalWarningCnt) 
 if __name__ == '__main__':
     WarningProcess = warningProcess()
     WarningProcess.read_data()

数据

在这里插入图片描述

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值