@[TOC]pandas读取csv,按每天的时间区间筛选数据
需求
读取csv内容,以0到6点, 6到12点,12到18点,18点到24点为4个区间,把读取的内容筛进4个区间里
然后获取同周期CRC字段>100超过10次的数据
把数据生成到模板里,输出到csv文件
代码说明
1.读取数据
引用了pandas读取csv数据,方便快捷
data_extract = pd.read_csv(r’OLT上联端口CRC越限告警明细.csv’, engine=‘python’, encoding=‘gb18030’)
2.日期去重
对于设置时间区间上,因为每天都有一个区间,要先读取到日期,由于日期是带有年月日时分秒格式的,这里采用了一个全局列表,把日期先截取到年月日,再放入列表里,对于同一天的日期数据,先判断有没有添加,已添加则不加入列表
3.每天的周期设置
采用timedelta(hours=6),以每天的零点,往后延6,12,18,24小时,分好4个区间
4.dataframe格式数据清洗
dataframe格式的数据有独特的数据筛选方式,比如
data_cleaning_0to6 = data_extract[
(data_extract['startTime'] >= range_0) & (data_extract['startTime'] <= range_6) & (data_extract['CRC'] > 100)]
5.编写模板
略
6.数据写入csv
def clientToServer(self, strData):
today = time.strftime('%Y%m%d', time.localtime(time.time()))
# self.filename = "/data/FBAndIPTVsocket/result/sourceData/OLTUOOCRCAlarm6%s.csv" % (today)
# self.filenameOK = "/data/FBAndIPTVsocket/result/sourceDataOK/OLTUOOCRCAlarm6%s.csv.ok" % (today)
script_path = os.path.dirname(__file__)
self.filename = script_path+"/OLT上联端口CRC越限告警明细%s.csv" % (today)
self.filenameOK = script_path+"/OLT上联端口CRC越限告警明细%s.csv.ok" % (today)
with open(self.filename.encode('utf-8'), "a") as csvfile:
writer = csv.writer(csvfile)
writer.writerow([strData])
with open(self.filenameOK.encode('utf-8'), "a") as csvfile:
writer = csv.writer(csvfile)
writer.writerow([strData])
cmd = "cp -f %s %s " % (self.filename, self.filenameOK)
ret = os.system(cmd)
self.logger.info(str(ret))
全部代码
import csv
import os
import time
import pandas as pd
from datetime import date
from datetime import datetime
from datetime import timedelta
from getLog import FinalLogger
date_list = []
data_lists = []
ds = date.today().strftime(“%Y%m%d”)
class warningProcess():
def init(self):
self.logger = FinalLogger.getLogger()
self.fileName = “”
self.totalWarningCnt = 0
def clientToServer(self, strData):
today = time.strftime('%Y%m%d', time.localtime(time.time()))
# self.filename = "/data/FBAndIPTVsocket/result/sourceData/OLTUOOCRCAlarm6%s.csv" % (today)
# self.filenameOK = "/data/FBAndIPTVsocket/result/sourceDataOK/OLTUOOCRCAlarm6%s.csv.ok" % (today)
script_path = os.path.dirname(__file__)
self.filename = script_path+"/OLT上联端口CRC越限告警明细%s.csv" % (today)
self.filenameOK = script_path+"/OLT上联端口CRC越限告警明细%s.csv.ok" % (today)
with open(self.filename.encode('utf-8'), "a") as csvfile:
writer = csv.writer(csvfile)
writer.writerow([strData])
with open(self.filenameOK.encode('utf-8'), "a") as csvfile:
writer = csv.writer(csvfile)
writer.writerow([strData])
def alarmListingTemplate(self, data_cleaning):
for index, row in data_cleaning.iterrows():
id = row['id'],
reportTime = row['reportTime'],
manufacturerName = str(row['manufacturerName']),
startTime = row['startTime'],
oltIp = row['oltIp'],
upLinkPort = row['upLinkPort'],
CRC = row['CRC'],
oltName = row['oltName']
manufacturerName = manufacturerName[0].strip()
if manufacturerName == 'ZTE':
manufacturerCHName = '中兴'
maintain_group = '中兴'
elif manufacturerName == 'HW':
manufacturerCHName = '华为'
maintain_group = '华为贝尔'
elif manufacturerName == 'BeiEr':
manufacturerCHName = '贝尔'
maintain_group = '华为贝尔'
else:
manufacturerCHName = '未知'
maintain_group = '未知'
strData1 = "<AlarmStart>"
strData2 = "IntVersion:V1.0.0"
strData3 = "MsgSerial:"
strData4 = "AlarmUniqueId:" + str(id[0])
strData5 = "AlarmUniqueClearId:"
strData6 = "NeName:" + str(oltName).strip()
strData7 = "NeIp:" + str(oltIp[0]).strip()
strData8 = "SystemName:集中性能系统"
strData9 = "EquipmentClass:"
strData10 = "Version:V1.0"
strData11 = "LocateNeName:" + str(upLinkPort[0])
strData12 = "LocateNeType:"
strData13 = "LocateInfo:000000000"
strData14 = "EventTime:" + str(reportTime[0])
strData15 = "CancelTime:" + str(startTime[0])
strData16 = "VendorSeverity:三级告警"
strData17 = "VendorAlarmId:"
strData18 = "AlarmTitle:" + "OLT上联端口CRC误码越限"
strData19 = "ProbableCauseTxt:"
strData20 = "AlarmText:" + "周期内上联端口" + "CRC大于100的次数大于10次"
strData21 = "AlarmStatus:1"
strData22 = "Vendor:" + manufacturerCHName
strData23 = "maintain_group:" + maintain_group
strData24 = "<AlarmEnd>"
strDataU8 = "\r\n" + strData1 + "\r\n" + strData2 + "\r\n" + strData3 + "\r\n" + strData4 + "\r\n" + strData5 + "\r\n" + strData6 + "\r\n" + strData7 + "\r\n" + strData8 + "\r\n" + strData9 + "\r\n" + strData10 + "\r\n" + strData11 + "\r\n" + strData12 + "\r\n" + strData13 + "\r\n" + strData14 + "\r\n" + strData15 + "\r\n" + strData16 + "\r\n" + strData17 + "\r\n" + strData18 + "\r\n" + strData19 + "\r\n" + strData20 + "\r\n" + strData21 + "\r\n" + strData22 + "\r\n" + strData23 + "\r\n" + strData24 + "\r\n"
self.logger.info("生成告警模块")
self.logger.info(strDataU8)
print(strDataU8)
self.clientToServer(strDataU8)
self.totalWarningCnt = self.totalWarningCnt + 1
break
def data_to_excel(self, data_cleaning, data_lists):
for index, row in data_cleaning.iterrows():
id = row['id']
reportTime = row['reportTime']
manufacturerName = row['manufacturerName']
startTime = row['startTime']
oltIp = row['oltIp']
upLinkPort = row['upLinkPort']
CRC = row['CRC']
oltName = row['oltName']
data_list = [id, reportTime, manufacturerName, startTime, oltIp, upLinkPort, CRC, oltName]
data_lists.append(data_list)
# 输出到excel
# col = ['id', 'reportTime', 'manufacturerName', 'startTime', 'oltIp', 'upLinkPort', 'CRC', 'oltName']
# result = pd.DataFrame(data_lists, columns=col)
# result.to_excel(excel_writer=r"周期内上联端口CRC大于100的次数大于10次告警输出结果%s.xlsx" % ds, index=False)
# print('结果输出成功!')
def read_data(self):
# 读取数据
data_extract = pd.read_csv(r'OLT上联端口CRC越限告警明细.csv', engine='python', encoding='gb18030')
data_extract['startTime'] = pd.to_datetime(data_extract['startTime'])
DateStart = data_extract['startTime']
# 获取日
for date in DateStart:
d = date.strftime("%Y-%m-%d")
if d not in date_list:
date_list.append(d)
# 获取同周期CRC>100超过10次
for date in date_list:
range_0 = datetime.strptime(date, "%Y-%m-%d")
range_6 = range_0 + timedelta(hours=6)
range_12 = range_0 + timedelta(hours=12)
range_18 = range_0 + timedelta(hours=18)
range_24 = range_0 + timedelta(hours=24)
# 设置区间
data_cleaning_0to6 = data_extract[
(data_extract['startTime'] >= range_0) & (data_extract['startTime'] <= range_6) & (
data_extract['CRC'] > 100)]
data_cleaning_6to12 = data_extract[
(data_extract['startTime'] > range_6) & (data_extract['startTime'] <= range_12) & (
data_extract['CRC'] > 100)]
data_cleaning_12to18 = data_extract[
(data_extract['startTime'] > range_12) & (data_extract['startTime'] <= range_18) & (
data_extract['CRC'] > 100)]
data_cleaning_18to24 = data_extract[
(data_extract['startTime'] > range_18) & (data_extract['startTime'] <= range_24) & (
data_extract['CRC'] > 100)]
if len(data_cleaning_0to6) > 10:
self.alarmListingTemplate(data_cleaning_0to6)
if len(data_cleaning_6to12) > 10:
self.alarmListingTemplate(data_cleaning_6to12)
if len(data_cleaning_12to18) > 10:
self.alarmListingTemplate(data_cleaning_12to18)
if len(data_cleaning_18to24) > 10:
self.alarmListingTemplate(data_cleaning_18to24)
if self.totalWarningCnt > 0:
print(self.filename)
cmd = "cp -f %s %s " % (self.filename, self.filenameOK)
ret = os.system(cmd)
self.logger.info(str(ret))
else:
self.logger.info("当前告警数为:%d" % self.totalWarningCnt)
if __name__ == '__main__':
WarningProcess = warningProcess()
WarningProcess.read_data()