一、读取csv
import numpy as np # linear algebra
import pandas as pd
heart_data = pd.read_csv('kaggle/framingham_heart_disease.csv')
二、plotly用于动态绘图
1、画直方图
参与调查的人群的年龄分布
import matplotlib.pyplot as plt
import plotly.figure_factory as ff
hist_data = [heart_data["age"].values]
group_labels = ['age']
fig = ff.create_distplot(hist_data, group_labels)
fig.update_layout(title_text='Age Distribution plot')
fig.show()
2、饼图
gender与CHD之间的关系
male得病的比例更大
import plotly.graph_objs as go
male = heart_data[heart_data["male"]==1]
female = heart_data[heart_data["male"]==0]
male_CHD = male[heart_data["TenYearCHD"]==1]
male_notCHD = male[heart_data["TenYearCHD"]==0]
female_CHD = female[heart_data["TenYearCHD"]==0]
female_notCHD = female[heart_data["TenYearCHD"]==1]
labels = ['Male - CHD','Male - Not CHD', "Female - CHD", "Female - Not CHD"]
values = [len(male[heart_data["TenYearCHD"]==0]),len(male[heart_data["TenYearCHD"]==1]),
len(female[heart_data["TenYearCHD"]==0]),len(female[heart_data["TenYearCHD"]==1])]
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.4)])
fig.update_layout(
title_text="Analysis on CHD - Gender")
fig.show()
3、箱形图
得病的人群年纪更大
import plotly.express as px
fig = px.box(heart_data, x='TenYearCHD', y='age', points="all")
fig.update_layout(
title_text="TenYearCHD wise Age Spread - CHD = 1 nonCHD = 0")
fig.show()
5、折线图
import matplotlib.pyplot as plt
import plotly.graph_objs as go
fig = plt.figure()
# trace就代表折线的条数
trace1=go.Scatter(x=[1,2,3,4,5],y=[2,1,3,5,2])
trace2=go.Scatter(x=[1,2,3,4,5],y=[2,1,4,6,7])
py.plot([trace1,trace2])
fig.show()
#填充区域
trace1=go.Scatter(x=[1,2,3,4,5],y=[2,1,3,5,2],fill="tonexty",fillcolor="#FF0")
py.plot([trace1])
fig.show()
6、散点图
trace1=go.Scatter(x=[1,2,3,4,5],y=[2,1,3,5,2],mode='markers')
trace1=go.Scatter(x=df['Attack'],y=df['Defense'],mode='markers')
py.plot([trace1],filename='basic-scatter')