数据分析-01

import pandas as pd
import numpy as np

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# 文件目录,相对路径
INPUT_PATH = './'

# 文件读取行数
#MAX_ROWS = 100000
#数据处理
#巡游车gps
taxigps2019 = pd.read_csv(INPUT_PATH + 'taxiGps20190603.csv', #nrows=MAX_ROWS,
                         dtype = {
                             'DRIVING_DIRECTION': np.uint16,
                             'OPERATING_STATUS': np.uint8,
                             'LONGITUDE': np.float32,
                             'LATITUDE': np.float32,
                             'GPS_SPEED': np.float16
                         })

#taxigps2019.info()
taxigps2019 = taxigps2019[taxigps2019.columns[::-1]]
taxigps2019.sort_values(by=['CARNO','GPS_TIME'], inplace=True)
taxigps2019.reset_index(inplace=True, drop=True)
#taxigps2019.head()
#巡游车订单
taxiorder2019 = pd.read_csv(INPUT_PATH + 'taxiOrder20190603.csv', #nrows=MAX_ROWS,
                           dtype = {
                               'GETON_LONGITUDE': np.float32,
                               'GETON_LATITUDE': np.float32,
                               'GETOFF_LONGITUDE': np.float32,
                               'GETOFF_LATITUDE': np.float32,
                               'PASS_MILE': np.float16,
                               'NOPASS_MILE': np.float16,
                               'WAITING_TIME': np.float16
                           })

taxiorder2019 = taxiorder2019.rename(columns={'CAR_NO':'CARNO'})
taxiorder2019.sort_values(by=['CARNO','GETON_DATE'], inplace=True)
taxiorder2019.reset_index(inplace=True, drop=True)
#网约车gps
wycgps2019 = pd.read_csv(INPUT_PATH + 'wycGps20190603.csv', #nrows=MAX_ROWS,
                        dtype={
                            'LONGITUDE': np.float32,
                            'LATITUDE': np.float32,
                            'SPEED': np.float16
                        })

wycgps2019 = wycgps2019.rename(columns={'CAR_NO':'CARNO'})
wycgps2019 = wycgps2019[wycgps2019.columns[::-1]]
wycgps2019.sort_values(by=['CARNO','POSITION_TIME'], inplace=True)

wycgps2019['BIZ_STATUS'] = wycgps2019['BIZ_STATUS'].fillna(-1).astype(np.int8)
wycgps2019['ENCRYPT'] = wycgps2019['ENCRYPT'].fillna(-1).astype(np.int8)
#网约车订单
wycorder2019 = pd.read_csv(INPUT_PATH + 'wycOrder20190603.csv', #nrows=MAX_ROWS,
                        dtype={
                            'DEP_LONGITUDE': np.float32,
                            'DEP_LATITUDE': np.float32,
                            'DEST_LONGITUDE': np.float32,
                            'DEST_LATITUDE': np.float32,
                        })
wycorder2019 = wycorder2019.rename(columns={'CAR_NO':'CARNO'})
wycorder2019.sort_values(by=['CARNO','DEP_TIME'], inplace=True)


#统计巡游车GPS数据在20190603中包含多少俩出租车
print("1.")
print(taxigps2019['CARNO'].nunique())
#统计网约车GPS数据在20190603中包含多少俩网约车
print("2.")
print(wycgps2019['CARNO'].nunique())
#统计巡游车订单数据在20190603中上车经纬度的最大最小值
print("3.1")
print(taxiorder2019['GETON_LONGITUDE'].max())
print(taxiorder2019['GETON_LATITUDE'].max())
print("3.2")
print(min(taxiorder2019[taxiorder2019['GETON_LONGITUDE']>0]['GETON_LONGITUDE']))
print(min(taxiorder2019[taxiorder2019['GETON_LATITUDE']>0]['GETON_LATITUDE']))
#统计网约车订单数据集在20190603中下车经纬度最常见的位置
print("4.")
for item in taxiorder2019['GETOFF_LATITUDE']:
    round(item,3)
for item in taxiorder2019['GETOFF_LONGITUDE']:
    round(item,3)
position=pd.concat([taxiorder2019['GETOFF_LONGITUDE'],
          taxiorder2019['GETOFF_LATITUDE']])
print(position.value_counts())

第四题最后的结果还是不太对

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值