版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
</div>
<!--一个博主专栏付费入口-->
<!--一个博主专栏付费入口结束-->
<link rel="stylesheet" href="https://csdnimg.cn/release/phoenix/template/css/ck_htmledit_views-4a3473df85.css">
<link rel="stylesheet" href="https://csdnimg.cn/release/phoenix/template/css/ck_htmledit_views-4a3473df85.css">
<div class="htmledit_views" id="content_views">
<h3><a name="t0"></a>查看对应的版本</h3>
# 下载和安装Python和SciPy # Python version import sys print('Python: {}'.format(sys.version)) # scipy import scipy print('scipy: {}'.format(scipy.__version__)) # numpy import numpy print('numpy: {}'.format(numpy.__version__)) # matplotlib import matplotlib print('matplotlib: {}'.format(matplotlib.__version__)) # pandas import pandas print('pandas: {}'.format(pandas.__version__)) # scikit-learn import sklearn print('sklearn: {}'.format(sklearn.__version__))
新建一个简单的Dataframe(数据帧)
import numpy import pandas import matplotlib.pyplot as plt # 散点矩形 from pandas.plotting import scatter_matrix # 特征缩放 from sklearn.preprocessing import StandardScaler # 交叉验证法 from sklearn.model_selection import KFold # 逻辑回归 from sklearn.linear_model import LogisticRegression myarray = numpy.array([[1, 2, 3], [4, 5, 6]]) rownames = ['a', 'b'] colnames = ['one', 'two', 'three'] mydataframe = pandas.DataFrame(myarray, index=rownames, columns=colnames) print(mydataframe)
one two three a 1 2 3 b 4 5 6
从csv加载数据 输出的为行列
import pandas url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv" names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'] data = pandas.read_csv(url, names=names) # 行列 print(data.shape)
csv数据转换 用描述性统计理解数据
data = pandas.read_csv(url, names=names) description = data.describe() print(description)
绘图 用可视化理解数据
data = pandas.read_csv(url, names=names) scatter_matrix(data) plt.show()
为预处理数据建模做准备
dataframe = pandas.read_csv(url, names=names) array = dataframe.values # 数组分成输入和输出组件 X = array[:,0:8] Y = array[:,8] scaler = StandardScaler().fit(X) # 通过定心和定标来实现标准化 rescaledX = scaler.transform(X) # 汇总转换后的数据 # 设置打印选项 numpy.set_printoptions(precision=3) print(rescaledX[0:5,:])
\