回归分析demo

#!/usr/bin/env python
from pandas import DataFrame, Series
import pandas as pd
import numpy as np
import statsmodels.formula.api as sm
from sklearn.linear_model import LinearRegression
import scipy, scipy.stats
import matplotlib.pyplot as plt

data_str = """Region|Alcohol|Tobacco
North|6.47|4.03
Yorkshire|6.13|3.76
Northeast|6.19|3.77
East Midlands|4.89|3.34
West Midlands|5.63|3.47
East Anglia|4.52|2.92
Southeast|5.89|3.20
Southwest|4.79|2.71
Wales|5.27|3.53
Scotland|6.08|4.51
Northern Ireland|4.02|4.56"""

d = data_str.split('\n')
d = [i.split('|') for i in d]

for i in range(len(d)):
    for j in range(len(d[0])):
        try:
            d[i][j] = float(d[i][j])
        except:
            pass
        
df = DataFrame(d[1:], columns=d[0])
plt.scatter(df['Tobacco'], df['Alcohol'],
         marker='o',
         edgecolor='b',
         facecolor='none',
         alpha=0.5)
plt.xlabel('Tobacco')
plt.ylabel('Alcohol')
plt.savefig('alcohol_v_tobacco.png', fmt='png', dpi=100)


df['Eins'] = np.ones((len(df), ))
Y = df.Alcohol[:-1]
X = df[['Tobacco','Eins']][:-1]
result = sm.OLS(Y, X).fit()
result.summary()

result.params   ##'Eins'即为截距项
"""
Out[22]: 
Tobacco    1.005896
Eins       2.041223
dtype: float64
"""

转载于:https://my.oschina.net/kyo4321/blog/1068537

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值