python回归分析实验_使用python的sklearn库进行逻辑回归分析

最新推荐文章于 2024-06-12 20:59:02 发布

weixin_39876595

最新推荐文章于 2024-06-12 20:59:02 发布

阅读量719

点赞数

文章标签： python回归分析实验

0.前言

本文于2016年7月编写。

1.介绍

sklearn，Python的机器学习算法库，目前是Python做机器学习最常用的库。它基于NumPy、SciPy和matplotlib等实现的。本文主要用到它的逻辑回归模型和评价报表功能。

pandas，Python的数据解析工具，功能强大使用广泛。它基于NumPy实现。本文主要用到它的csv文件载入功能。

iris，一种花，有较多种类。有人提供了关于这种花的数据用于分类模型的学习，这份数据已近成为非常经典且简单的多分类实验数据。

2.使用sklearn库自带的数据进行逻辑回归分析

使用红色字体标注的为关键代码

# iris.py

from sklearn.cross_validation import train_test_split

from sklearn.datasets import load_iris

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import classification_report

# Iris is flower. The function load_iris() provides 150 records with feature and target about iris.

print "\nLoading data..."

iris = load_iris()

print "Data shape: ", iris.data.shape, iris.target.shape

# The function train_test_split() splits the data.

# By default 75% of the data is used to train, and 25% of the data is used to test.

print "\nSpliting data..."

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target)

print "Train data shape: ", X_train.shape, y_train.shape

print "Test data shape: ", X_test.shape, y_test.shape

print "\nTraining..."

clf = LogisticRegression()

clf.fit(X_train, y_train)

print "intercept: \n", clf.intercept_

print "coef: \n", clf.coef_

print "\nTesting..."

y_true = y_test

y_pred = clf.predict(X_test)

print "true: ", y_true

print "pred: ", y_pred

target_names = ['class 0', 'class 1', 'class 2']

print classification_report(y_true, y_pred, target_names=target_names)

print "\nPredict..."

X_pred = [5, 3, 5, 2.5]

y_pred = clf.predict(X_pred)

print X_pred," = ", y_pred

3.使用csv格式数据进行逻辑回归分析

3.1.文件

lr/

lr/data.csv

lr/logistic.py

3.2.数据文件

文件名为data.csv，数据与load_iris()相同。

x1、x2、x3、x4是特征，y是目标。

x1,x2,x3,x4,y

5.1,3.5,1.4,0.2,0

4.9,3.0,1.4,0.2,0

4.7,3.2,1.3,0.2,0

4.6,3.1,1.5,0.2,0

5.0,3.6,1.4,0.2,0

5.4,3.9,1.7,0.4,0

4.6,3.4,1.4,0.3,0

5.0,3.4,1.5,0.2,0

4.4,2.9,1.4,0.2,0

4.9,3.1,1.5,0.1,0

5.4,3.7,1.5,0.2,0

4.8,3.4,1.6,0.2,0

4.8,3.0,1.4,0.1,0

4.3,3.0,1.1,0.1,0

5.8,4.0,1.2,0.2,0

5.7,4.4,1.5,0.4,0

5.4,3.9,1.3,0.4,0

5.1,3.5,1.4,0.3,0

5.7,3.8,1.7,0.3,0

5.1,3.8,1.5,0.3,0

5.4,3.4,1.7,0.2,0

5.1,3.7,1.5,0.4,0

4.6,3.6,1.0,0.2,0

5.1,3.3,1.7,0.5,0

4.8,3.4,1.9,0.2,0

5.0,3.0,1.6,0.2,0

5.0,3.4,1.6,0.4,0

5.2,3.5,1.5,0.2,0

5.2,3.4,1.4,0.2,0

4.7,3.2,1.6,0.2,0

4.8,3.1,1.6,0.2,0

5.4,3.4,1.5,0.4,0

5.2,4.1,1.5,0.1,0

5.5,4.2,1.4,0.2,0

4.9,3.1,1.5,0.1,0

5.0,3.2,1.2,0.2,0

5.5,3.5,1.3,0.2,0

4.9,3.1,1.5,0.1,0

4.4,3.0,1.3,0.2,0

5.1,3.4,1.5,0.2,0

5.0,3.5,1.3,0.3,0

4.5,2.3,1.3,0.3,0

4.4,3.2,1.3,0.2,0

5.0,3.5,1.6,0.6,0

5.1,3.8,1.9,0.4,0

4.8,3.0,1.4,0.3,0

5.1,3.8,1.6,0.2,0

4.6,3.2,1.4,0.2,0

5.3,3.7,1.5,0.2,0

5.0,3.3,1.4,0.2,0

7.0,3.2,4.7,1.4,1

6.4,3.2,4.5,1.5,1

6.9,3.1,4.9,1.5,1

5.5,2.3,4.0,1.3,1

6.5,2.8,4.6,1.5,1

5.7,2.8,4.5,1.3,1

6.3,3.3,4.7,1.6,1

4.9,2.4,3.3,1.0,1

6.6,2.9,4.6,1.3,1

5.2,2.7,3.9,1.4,1

5.0,2.0,3.5,1.0,1

5.9,3.0,4.2,1.5,1

6.0,2.2,4.0,1.0,1

6.1,2.9,4.7,1.4,1

5.6,2.9,3.6,1.3,1

6.7,3.1,4.4,1.4,1

5.6,3.0,4.5,1.5,1

5.8,2.7,4.1,1.0,1

6.2,2.2,4.5,1.5,1

5.6,2.5,3.9,1.1,1

5.9,3.2,4.8,1.8,1

6.1,2.8,4.0,1.3,1

6.3,2.5,4.9,1.5,1

6.1,2.8,4.7,1.2,1

6.4,2.9,4.3,1.3,1

6.6,3.0,4.4,1.4,1

6.8,2.8,4.8,1.4,1

6.7,3.0,5.0,1.7,1

6.0,2.9,4.5,1.5,1

5.7,2.6,3.5,1.0,1

5.5,2.4,3.8,1.1,1

5.5,2.4,3.7,1.0,1

5.8,2.7,3.9,1.2,1

6.0,2.7,5.1,1.6,1

5.4,3.0,4.5,1.5,1

6.0,3.4,4.5,1.6,1

6.7,3.1,4.7,1.5,1

6.3,2.3,4.4,1.3,1

5.6,3.0,4.1,1.3,1

5.5,2.5,4.0,1.3,1

5.5,2.6,4.4,1.2,1

6.1,3.0,4.6,1.4,1

5.8,2.6,4.0,1.2,1

5.0,2.3,3.3,1.0,1

5.6,2.7,4.2,1.3,1

5.7,3.0,4.2,1.2,1

5.7,2.9,4.2,1.3,1

6.2,2.9,4.3,1.3,1

5.1,2.5,3.0,1.1,1

5.7,2.8,4.1,1.3,1

6.3,3.3,6.0,2.5,2

5.8,2.7,5.1,1.9,2

7.1,3.0,5.9,2.1,2

6.3,2.9,5.6,1.8,2

6.5,3.0,5.8,2.2,2

7.6,3.0,6.6,2.1,2

4.9,2.5,4.5,1.7,2

7.3,2.9,6.3,1.8,2

6.7,2.5,5.8,1.8,2

7.2,3.6,6.1,2.5,2

6.5,3.2,5.1,2.0,2

6.4,2.7,5.3,1.9,2

6.8,3.0,5.5,2.1,2

5.7,2.5,5.0,2.0,2

5.8,2.8,5.1,2.4,2

6.4,3.2,5.3,2.3,2

6.5,3.0,5.5,1.8,2

7.7,3.8,6.7,2.2,2

7.7,2.6,6.9,2.3,2

6.0,2.2,5.0,1.5,2

6.9,3.2,5.7,2.3,2

5.6,2.8,4.9,2.0,2

7.7,2.8,6.7,2.0,2

6.3,2.7,4.9,1.8,2

6.7,3.3,5.7,2.1,2

7.2,3.2,6.0,1.8,2

6.2,2.8,4.8,1.8,2

6.1,3.0,4.9,1.8,2

6.4,2.8,5.6,2.1,2

7.2,3.0,5.8,1.6,2

7.4,2.8,6.1,1.9,2

7.9,3.8,6.4,2.0,2

6.4,2.8,5.6,2.2,2

6.3,2.8,5.1,1.5,2

6.1,2.6,5.6,1.4,2

7.7,3.0,6.1,2.3,2

6.3,3.4,5.6,2.4,2

6.4,3.1,5.5,1.8,2

6.0,3.0,4.8,1.8,2

6.9,3.1,5.4,2.1,2

6.7,3.1,5.6,2.4,2

6.9,3.1,5.1,2.3,2

5.8,2.7,5.1,1.9,2

6.8,3.2,5.9,2.3,2

6.7,3.3,5.7,2.5,2

6.7,3.0,5.2,2.3,2

6.3,2.5,5.0,1.9,2

6.5,3.0,5.2,2.0,2

6.2,3.4,5.4,2.3,2

5.9,3.0,5.1,1.8,2

3.3.代码文件

使用红色字体标注的为关键代码

# logistic.py

import pandas as pd

from sklearn.cross_validation import train_test_split

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import classification_report

print "\nLoading data..."

data = pd.read_csv('./data.csv', header=0)

feature = ['x1', 'x2', 'x3', 'x4']

target = ['y']

# The function train_test_split() splits the data.

print "\nSpliting data..."

X_train, X_test, y_train, y_test = train_test_split(data[feature], data[target])

# The function ravel() change [[0][0]...[1][1]] to [0,0,...,1,1]. Call it, otherwise there will be an error 'DataConversionWarning'.

y_train = y_train.ravel()

y_test = y_test.ravel()

print "Train data shape: ", X_train.shape, y_train.shape

print "Test data shape: ", X_test.shape, y_test.shape

print "\nTraining..."

clf = LogisticRegression()

clf.fit(X_train, y_train)

print "intercept: \n", clf.intercept_

print "coef: \n", clf.coef_

print "\nTesting..."

y_true = y_test

y_pred = clf.predict(X_test)

print "true: ", y_true

print "pred: ", y_pred

target_names = ['class 0', 'class 1', 'class 2']

print classification_report(y_true, y_pred, target_names=target_names)

print "\nPredict..."

X_pred = [5, 3, 5, 2.5]

y_pred = clf.predict(X_pred)

print X_pred," = ", y_pred

weixin_39876595

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python回归分析实验_使用python的sklearn库进行逻辑回归分析

0.前言本文于2016年7月编写。1.介绍sklearn，Python的机器学习算法库，目前是Python做机器学习最常用的库。它基于NumPy、SciPy和matplotlib等实现的。本文主要用到它的逻辑回归模型和评价报表功能。pandas，Python的数据解析工具，功能强大使用广泛。它基于NumPy实现。本文主要用到它的csv文件载入功能。iris，一种花，有较多种类。有人提供了关于这种花...
复制链接

扫一扫