项目——预处理4

20170416

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#天气等级处理
#天气和故障对接

import pandas as pd
import string
'''
天气的等级划分
'''
# weather = pd.read_csv(r'C:\Users\DELL\Desktop\20170515\weather-province.csv')
# weather['SYNC_ORG_NO'] = weather['SYNC_ORG_NO'].str.strip()
# print weather.head()
# print weather.info()
# #均值上下5%为1,大于均值为2,小于均值为0
# weather['V10004_701_'] = (weather['V10004_701'] - weather['V10004_701'].mean())/weather['V10004_701'].mean()
# print weather['V10004_701_']
# weather['V10004_701_'][abs(weather['V10004_701_']) < 0.05] = 1
# weather['V10004_701_'][(weather['V10004_701_'] > 0) & (weather['V10004_701_'] != 1)] = 2
# weather['V10004_701_'][weather['V10004_701_'] < 0] = 0
# print weather['V10004_701_']
#
# weather['V12001_701_'] = (weather['V12001_701'] - weather['V12001_701'].mean())/weather['V12001_701'].mean()
# print weather['V12001_701_']
# weather['V12001_701_'][abs(weather['V12001_701_']) < 0.05] = 1
# weather['V12001_701_'][(weather['V12001_701_'] > 0) & (weather['V12001_701_'] != 1)] = 2
# weather['V12001_701_'][weather['V12001_701_'] < 0] = 0
#
# weather['V12306_'] = (weather['V12306']- weather['V12306'].mean())/weather['V12306'].mean()
# print weather['V12306_']
# weather['V12306_'][abs(weather['V12306_']) < 0.05]=1
# weather['V12306_'][(weather['V12306_'] > 0) & (weather['V12306_'] != 1)] = 2
# weather['V12306_'][weather['V12306_'] < 0] = 0
#
# weather['V12011_701_'] = (weather['V12011_701']- weather['V12011_701'].mean())/weather['V12011_701'].mean()
# print weather['V12011_701_']
# weather['V12011_701_'][abs(weather['V12011_701_']) < 0.05] = 1
# weather['V12011_701_'][(weather['V12011_701_'] > 0) & (weather['V12011_701_'] != 1)] = 2
# weather['V12011_701_'][weather['V12011_701_'] < 0] = 0
#
# weather['V12012_701_'] = (weather['V12012_701'] - weather['V12012_701'].mean())/weather['V12012_701'].mean()
# print weather['V12012_701_']
# weather['V12012_701_'][abs(weather['V12012_701_']) < 0.05] = 1
# weather['V12012_701_'][(weather['V12012_701_'] > 0) & (weather['V12012_701_'] != 1)] = 2
# weather['V12012_701_'][weather['V12012_701_'] < 0] = 0
#
# weather['V12303_701_'] = (weather['V12303_701']- weather['V12303_701'].mean())/weather['V12303_701'].mean()
# print weather['V12303_701_']
# weather['V12303_701_'][abs(weather['V12303_701_']) < 0.05] = 1
# weather['V12303_701_'][(weather['V12303_701_'] > 0) & (weather['V12303_701_'] != 1)] = 2
# weather['V12303_701_'][weather['V12303_701_'] < 0] = 0
#
# weather['V13003_701_'] = (weather['V13003_701']- weather['V13003_701'].mean())/weather['V13003_701'].mean()
# print weather['V13003_701_']
# weather['V13003_701_'][abs(weather['V13003_701_'])<0.05]=1
# weather['V13003_701_'][(weather['V13003_701_'] > 0) & (weather['V13003_701_'] != 1)] = 2
# weather['V13003_701_'][weather['V13003_701_'] < 0] = 0
#
#
# weather['V11291_701_'] = (weather['V11291_701']- weather['V11291_701'].mean())/weather['V11291_701'].mean()
# print weather['V11291_701_']
# weather['V11291_701_'][abs(weather['V11291_701_']) < 0.05] = 1
# weather['V11291_701_'][(weather['V11291_701_'] > 0) & (weather['V11291_701_'] != 1)] = 2
# weather['V11291_701_'][weather['V11291_701_'] < 0] = 0
#
# print weather.head()
# weather.to_csv(r'C:\Users\DELL\Desktop\20170515\weather-province-level.csv', index=False)

'''
故障与天气合并
'''
#故障数据
data1 = pd.read_csv(r'C:\Users\DELL\Desktop\20170515\all-2.csv',
                   dtype={ 'FAULT_TYPE':object, 'SYNC_ORG_NO':object, 'ORG_NO':object,'ORG_NAME':object,'SORT_CODE':object,
                          'SPEC_CODE':object, 'COMM_MODE':object, 'ARRIVE_BATCH_NO':object,'QTY': object,'MANUFACTURER':object,'EQUIP_ID':object,
                           'ORG_NO_5': object, 'FAULT_TYPE_1': object, 'FAULT_TYPE_3': object,'FAULT_DATE1': object, 'INST_DATE1': object,
                           'DETECT_DATE1': object, 'FAULT_MONTH': object, 'INST_MONTH': object})
print data1.head()
print data1.info()
data1['SYNC_ORG_NO'] = data1['SYNC_ORG_NO'].str.strip()
#天气数据
data2 = pd.read_csv(r'C:\Users\DELL\Desktop\20170515\weather-province-level.csv',
                    dtype={ 'SYNC_ORG_NO':object, 'sheng':object,'V10004_701':object, 'V12001_701':object,
                           'V12306':object, 'V12011_701':object, 'V12012_701':object,
                           'V12303_701':object, 'V13003_701':object,'V11291_701':object,'V10004_701_':object, 'V12001_701_':object,
                           'V12306_':object, 'V12011_701_':object, 'V12012_701_':object,
                           'V12303_701_':object, 'V13003_701_':object,'V11291_701_':object})
print data2.head()
print data2.info()
data2['SYNC_ORG_NO'] = data2['SYNC_ORG_NO'].str.strip()
#合并数据,左连接
all_2_weather = pd.DataFrame(pd.merge(data1, data2, on="SYNC_ORG_NO", how='left'))
print all_2_weather.head()
print all_2_weather.info()
all_2_weather.drop(['SORT_CODE', 'FAULT_TYPE_1','work_days','save_days','sheng'],
         axis=1, inplace=True)

all_2_weather.to_csv(r'C:\Users\DELL\Desktop\20170515\all-2-weather.csv', index=False)


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值