import pandas as pd
import numpy as np
from datetime import datetime
import os
import warnings
warnings.filterwarnings("ignore")
import plotly.offline as py
py.init_notebook_mode(connected=True)
def runlog(str1):
run_log_time = datetime.now().strftime("[%Y-%m-%d %H:%M:%S]")
print(run_log_time + " " + str1)
def file_read_to_df(path, file_type, sep=',',header='infer'):
filelist = os.listdir(path)
df = pd.DataFrame()
runlog(f'Load file path {path} start...')
for file in filelist:
if file_type in file:
runlog(f'Load file {file} start...')
if file_type in ['csv', 'txt']:
dftmp = pd.read_csv(f'{path}/{file}', sep=sep, header=header)
elif file_type in ['xls', 'xlsx']:
dftmp = pd.read_excel(f'{path}/{file}', header=header, sheet_name = "",usecols="L:P",nrows=93)
dftmp['日期'] = file[0:9]
else:
raise Exception("Please provide valid file extension")
df = df.append(dftmp,ignore_index = True)
else:
runlog(f'Ingrore file:{file}')
runlog(f'Load file path {path} end.')
return df
df_data = file_read_to_df(path,'xlsx')
df_data = df_data.round({'DWS':2, 'DWD':2,'ODS':2})#保留两位小数
# df1['HW-DWS'].round(decimals=3)
provience.rename(columns={'prov_id':'Prov_id'}, inplace = True)
df = df_data[df_data["Traffic_Type"]=='ALL']
import missingno as msno
#fig = msno.matrix(df)
msno.matrix(df, figsize=(10, 5), width_ratios=(15, 1));
#msno.bar(df);
msno.bar(df, figsize=(10, 5), );
df.describe()
df_matrix = df[["DWS","DWD","ODS"]]
df_matrix["index"] = np.arange(len(df_matrix))
import plotly.figure_factory as ff
import plotly.offline as py
fig = ff.create_scatterplotmatrix(df_matrix, diag='box',
index='index',
colormap_type='cat',
colormap='RdBu',
height=600, width=600
)
fig.update_layout(title_text='散点图和箱线图', title_x=0.5)
py.iplot(fig)
import plotly
import plotly.io as pio
import plotly.graph_objs as go
plotly.offline.init_notebook_mode(connected=True)
import plotly as py
plotly.offline.init_notebook_mode()
from plotly.offline import init_notebook_mode, iplot, plot
def df_draw(date,value):
trace1 =go.Scatter(
x = date,
y = value["DWS"],
mode = "lines",
name = "DWS",
marker = dict(color = 'rgba(139, 139, 139, 0.9)'),
text= 'DWS')
trace2 =go.Scatter(
x = date,
y = value["DWD"],
mode = "lines",
name = "DWD",
marker = dict(color = 'rgba(255, 215, 0, 0.9)'),
text= 'DWD')
trace3 =go.Scatter(
x = date,
y = value["ODS"],
mode = "lines",
name = "ODS",
marker = dict(color = 'rgba(255, 0, 0, 0.9)'),
text= 'ODS')
data = [trace1, trace2, trace3]
prov_list = ['...']
layout = dict(title = prov_list[i-101]+"xxx变化情况",
title_x=0.5,
#plot_bgcolor='#E6E6FA',
#title_y=0.8,
xaxis= dict(title= 'time',ticklen= 5, zeroline= False),
yaxis= dict(title= 'picture',ticklen= 5,zeroline= False)
)
fig = dict(data = data, layout = layout)
py.offline.iplot(fig)
#pio.write_html(fig, file='D:/Desktop/iris.html')
for i in range(101,132):
#df1 = pd.merge(s,df,how ='inner',on = ['prov_id'])
df1 = df[(df["Traffic_Type"]=='ALL') & (df["Prov_id"]== i)]
df1["Date"] = pd.to_datetime(df1["Date"]) #转换为时间格式
df1 = df1.sort_values(by = 'Date')
#时间格式化,2020-07-20 -> 07/20
dates1 = pd.to_datetime(df1["Date"], format='%Y/%m/%d')
df1["date_value"] = dates1.dt.strftime('%m/%d')
date = df1["date_value"]
value = df1[["DWS","DWD","ODS"]]
df_draw(date,value)
# create trace 1 that is 3d scatter
trace1 = go.Scatter3d(
x=value.DWS,
y=value.DWD,
z=value.ODS,
mode='markers',
marker=dict(
size=10,
color='rgb(255,0,0)', # set color to an array/list of desired values
)
)
data = [trace1]
layout = go.Layout(
margin=dict(
l=0,
r=0,
b=0,
t=0
)
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)
import pymysql
from sqlalchemy import create_engine
# 数据库连接
connect = create_engine('mysql+pymysql://root:password@localhost:330/database')
# 保存到数据库
df_data.to_sql('table_name',
con=connect,
if_exists='append',#如果表名存在,追加数据
index=None)#不保存索引
# 从数据库里读数据
sql = 'select * from traffic_monitor_rseult \
where Prov_id="xxx" and Traffic_Type="x" \
order by Date'
pd.read_sql(sql,connect)
# import pymysql
# db_pymysql = pymysql.connect(host='localhost',
# port=330,
# user='name',
# passwd='password',
# db='test',
# use_unicode=True,
# charset="utf8")
# df = pd.read_sql('select Python from score where Python>40 ',con=db_pymysql)
df1["prov_id"].str.split("=").apply(lambda x:x[-1]).value_counts().plot(kind='bar')
Plotly:https://plotly.com/python/
Seaborn:https://seaborn.pydata.org/tutorial.html
matplotlib:https://matplotlib.org/stable/gallery/index.html
PyViz:https://pyviz.org/overviews/index.html
数据分析代码备份
最新推荐文章于 2022-10-19 10:45:00 发布