机器学习朴素贝叶斯GaussianNB鸢尾花数据集分类

import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import  datasets

iris=datasets.load_iris()
print(iris)
{'DESCR': '.. _iris_dataset:\n\nIris plants dataset\n--------------------\n\n**Data Set Characteristics:**\n\n    :Number of Instances: 150 (50 in each of three classes)\n    :Number of Attributes: 4 numeric, predictive attributes and the class\n    :Attribute Information:\n        - sepal length in cm\n        - sepal width in cm\n        - petal length in cm\n        - petal width in cm\n        - class:\n                - Iris-Setosa\n                - Iris-Versicolour\n                - Iris-Virginica\n                \n    :Summary Statistics:\n\n    ============== ==== ==== ======= ===== ====================\n                    Min  Max   Mean    SD   Class Correlation\n    ============== ==== ==== ======= ===== ====================\n    sepal length:   4.3  7.9   5.84   0.83    0.7826\n    sepal width:    2.0  4.4   3.05   0.43   -0.4194\n    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)\n    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)\n    ============== ==== ==== ======= ===== ====================\n\n    :Missing Attribute Values: None\n    :Class Distribution: 33.3% for each of 3 classes.\n    :Creator: R.A. Fisher\n    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n    :Date: July, 1988\n\nThe famous Iris database, first used by Sir R.A. Fisher. The dataset is taken\nfrom Fisher\'s paper. Note that it\'s the same as in R, but not as in the UCI\nMachine Learning Repository, which has two wrong data points.\n\nThis is perhaps the best known database to be found in the\npattern recognition literature.  Fisher\'s paper is a classic in the field and\nis referenced frequently to this day.  (See Duda & Hart, for example.)  The\ndata set contains 3 classes of 50 instances each, where each class refers to a\ntype of iris plant.  One class is linearly separable from the other 2; the\nlatter are NOT linearly separable from each other.\n\n.. topic:: References\n\n   - Fisher, R.A. "The use of multiple measurements in taxonomic problems"\n     Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to\n     Mathematical Statistics" (John Wiley, NY, 1950).\n   - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\n     (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.\n   - Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System\n     Structure and Classification Rule for Recognition in Partially Exposed\n     Environments".  IEEE Transactions on Pattern Analysis and Machine\n     Intelligence, Vol. PAMI-2, No. 1, 67-71.\n   - Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule".  IEEE Transactions\n     on Information Theory, May 1972, 431-433.\n   - See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al"s AUTOCLASS II\n     conceptual clustering system finds 3 classes in the data.\n   - Many, many more ...', 'filename': 'e:\\application\\python\\lib\\site-packages\\sklearn\\datasets\\data\\iris.csv', 'data': array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.2],
       [5. , 3.2, 1.2, 0.2],
       [5.5, 3.5, 1.3, 0.2],
       [4.9, 3.6, 1.4, 0.1],
       [4.4, 3. , 1.3, 0.2],
       [5.1, 3.4, 1.5, 0.2],
       [5. , 3.5, 1.3, 0.3],
       [4.5, 2.3, 1.3, 0.3],
       [4.4, 3.2, 1.3, 0.2],
       [5. , 3.5, 1.6, 0.6],
       [5.1, 3.8, 1.9, 0.4],
       [4.8, 3. , 1.4, 0.3],
       [5.1, 3.8, 1.6, 0.2],
       [4.6, 3.2, 1.4, 0.2],
       [5.3, 3.7, 1.5, 0.2],
       [5. , 3.3, 1.4, 0.2],
       [7. , 3.2, 4.7, 1.4],
       [6.4, 3.2, 4.5, 1.5],
       [6.9, 3.1, 4.9, 1.5],
       [5.5, 2.3, 4. , 1.3],
       [6.5, 2.8, 4.6, 1.5],
       [5.7, 2.8, 4.5, 1.3],
       [6.3, 3.3, 4.7, 1.6],
       [4.9, 2.4, 3.3, 1. ],
       [6.6, 2.9, 4.6, 1.3],
       [5.2, 2.7, 3.9, 1.4],
       [5. , 2. , 3.5, 1. ],
       [5.9, 3. , 4.2, 1.5],
       [6. , 2.2, 4. , 1. ],
       [6.1, 2.9, 4.7, 1.4],
       [5.6, 2.9, 3.6, 1.3],
       [6.7, 3.1, 4.4, 1.4],
       [5.6, 3. , 4.5, 1.5],
       [5.8, 2.7, 4.1, 1. ],
       [6.2, 2.2, 4.5, 1.5],
       [5.6, 2.5, 3.9, 1.1],
       [5.9, 3.2, 4.8, 1.8],
       [6.1, 2.8, 4. , 1.3],
       [6.3, 2.5, 4.9, 1.5],
       [6.1, 2.8, 4.7, 1.2],
       [6.4, 2.9, 4.3, 1.3],
       [6.6, 3. , 4.4, 1.4],
       [6.8, 2.8, 4.8, 1.4],
       [6.7, 3. , 5. , 1.7],
       [6. , 2.9, 4.5, 1.5],
       [5.7, 2.6, 3.5, 1. ],
       [5.5, 2.4, 3.8, 1.1],
       [5.5, 2.4, 3.7, 1. ],
       [5.8, 2.7, 3.9, 1.2],
       [6. , 2.7, 5.1, 1.6],
       [5.4, 3. , 4.5, 1.5],
       [6. , 3.4, 4.5, 1.6],
       [6.7, 3.1, 4.7, 1.5],
       [6.3, 2.3, 4.4, 1.3],
       [5.6, 3. , 4.1, 1.3],
       [5.5, 2.5, 4. , 1.3],
       [5.5, 2.6, 4.4, 1.2],
       [6.1, 3. , 4.6, 1.4],
       [5.8, 2.6, 4. , 1.2],
       [5. , 2.3, 3.3, 1. ],
       [5.6, 2.7, 4.2, 1.3],
       [5.7, 3. , 4.2, 1.2],
       [5.7, 2.9, 4.2, 1.3],
       [6.2, 2.9, 4.3, 1.3],
       [5.1, 2.5, 3. , 1.1],
       [5.7, 2.8, 4.1, 1.3],
       [6.3, 3.3, 6. , 2.5],
       [5.8, 2.7, 5.1, 1.9],
       [7.1, 3. , 5.9, 2.1],
       [6.3, 2.9, 5.6, 1.8],
       [6.5, 3. , 5.8, 2.2],
       [7.6, 3. , 6.6, 2.1],
       [4.9, 2.5, 4.5, 1.7],
       [7.3, 2.9, 6.3, 1.8],
       [6.7, 2.5, 5.8, 1.8],
       [7.2, 3.6, 6.1, 2.5],
       [6.5, 3.2, 5.1, 2. ],
       [6.4, 2.7, 5.3, 1.9],
       [6.8, 3. , 5.5, 2.1],
       [5.7, 2.5, 5. , 2. ],
       [5.8, 2.8, 5.1, 2.4],
       [6.4, 3.2, 5.3, 2.3],
       [6.5, 3. , 5.5, 1.8],
       [7.7, 3.8, 6.7, 2.2],
       [7.7, 2.6, 6.9, 2.3],
       [6. , 2.2, 5. , 1.5],
       [6.9, 3.2, 5.7, 2.3],
       [5.6, 2.8, 4.9, 2. ],
       [7.7, 2.8, 6.7, 2. ],
       [6.3, 2.7, 4.9, 1.8],
       [6.7, 3.3, 5.7, 2.1],
       [7.2, 3.2, 6. , 1.8],
       [6.2, 2.8, 4.8, 1.8],
       [6.1, 3. , 4.9, 1.8],
       [6.4, 2.8, 5.6, 2.1],
       [7.2, 3. , 5.8, 1.6],
       [7.4, 2.8, 6.1, 1.9],
       [7.9, 3.8, 6.4, 2. ],
       [6.4, 2.8, 5.6, 2.2],
       [6.3, 2.8, 5.1, 1.5],
       [6.1, 2.6, 5.6, 1.4],
       [7.7, 3. , 6.1, 2.3],
       [6.3, 3.4, 5.6, 2.4],
       [6.4, 3.1, 5.5, 1.8],
       [6. , 3. , 4.8, 1.8],
       [6.9, 3.1, 5.4, 2.1],
       [6.7, 3.1, 5.6, 2.4],
       [6.9, 3.1, 5.1, 2.3],
       [5.8, 2.7, 5.1, 1.9],
       [6.8, 3.2, 5.9, 2.3],
       [6.7, 3.3, 5.7, 2.5],
       [6.7, 3. , 5.2, 2.3],
       [6.3, 2.5, 5. , 1.9],
       [6.5, 3. , 5.2, 2. ],
       [6.2, 3.4, 5.4, 2.3],
       [5.9, 3. , 5.1, 1.8]]), 'feature_names': ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'], 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), 'target_names': array(['setosa', 'versicolor', 'virginica'], dtype='<U10')}
X_train,Xtest,y_train,y_test=train_test_split(iris.data,iris.target,
                                              random_state=12)

print(X_train.shape)
print(Xtest.shape)
(112, 4)
(38, 4)
clf=GaussianNB()
clf.fit(X_train,y_train)
GaussianNB(priors=None, var_smoothing=1e-09)
clf.predict(Xtest)
array([0, 2, 0, 1, 2, 2, 2, 0, 2, 0, 1, 0, 0, 0, 1, 2, 2, 1, 0, 1, 0, 1,
       2, 1, 0, 2, 2, 1, 0, 0, 0, 1, 2, 0, 2, 0, 1, 1])
clf.predict_proba(Xtest)
array([[1.00000000e+000, 2.32926069e-017, 1.81656357e-023],
       [4.28952299e-154, 2.48576754e-002, 9.75142325e-001],
       [1.00000000e+000, 7.45528845e-018, 3.79800436e-024],
       [3.59748710e-076, 9.99751806e-001, 2.48194200e-004],
       [2.20411871e-239, 4.45798016e-009, 9.99999996e-001],
       [1.23795145e-173, 1.95814902e-003, 9.98041851e-001],
       [2.45866589e-206, 2.34481513e-007, 9.99999766e-001],
       [1.00000000e+000, 2.61810906e-017, 2.67446831e-023],
       [3.07448595e-259, 9.07196639e-011, 1.00000000e+000],
       [1.00000000e+000, 1.14549667e-010, 3.00314173e-017],
       [1.64566141e-101, 9.87428016e-001, 1.25719837e-002],
       [1.00000000e+000, 5.62770009e-016, 8.77233124e-022],
       [1.00000000e+000, 9.78098062e-014, 4.81247272e-020],
       [1.00000000e+000, 3.96616431e-015, 3.17162008e-021],
       [2.58159395e-110, 7.85918892e-001, 2.14081108e-001],
       [8.01004975e-208, 8.36611920e-006, 9.99991634e-001],
       [2.27845999e-193, 5.52863568e-004, 9.99447136e-001],
       [2.52133012e-090, 9.94597495e-001, 5.40250471e-003],
       [1.00000000e+000, 4.06675976e-017, 2.53312064e-023],
       [3.29537129e-123, 9.22312452e-001, 7.76875484e-002],
       [1.00000000e+000, 4.66765440e-017, 1.99662820e-023],
       [7.54708431e-074, 9.99690656e-001, 3.09343577e-004],
       [6.27117035e-136, 1.83265786e-001, 8.16734214e-001],
       [4.68960290e-103, 9.82756006e-001, 1.72439943e-002],
       [1.00000000e+000, 2.15636250e-014, 2.25086772e-020],
       [5.92924136e-199, 5.41122729e-007, 9.99999459e-001],
       [4.07679795e-141, 7.38689632e-002, 9.26131037e-001],
       [2.77929930e-083, 9.99806458e-001, 1.93541791e-004],
       [1.00000000e+000, 4.48465501e-017, 4.36464333e-023],
       [1.00000000e+000, 1.64440161e-014, 1.13341951e-021],
       [1.00000000e+000, 8.68192867e-017, 6.71630735e-023],
       [7.15007036e-050, 9.99997055e-001, 2.94492877e-006],
       [1.73414331e-178, 2.06441448e-003, 9.97935586e-001],
       [1.00000000e+000, 4.90168069e-019, 3.86471595e-024],
       [1.35600871e-156, 2.28929843e-002, 9.77107016e-001],
       [1.00000000e+000, 1.78544881e-015, 1.09390819e-020],
       [1.86074590e-058, 9.99948860e-001, 5.11400371e-005],
       [3.69548269e-057, 9.99992986e-001, 7.01435008e-006]])
accuracy_score(y_test,clf.predict(Xtest))
0.9736842105263158


import numpy as np 
import pandas as pd
import random
dataSet=pd.read_csv('iris.txt',header=None)
dataSet.head(10)
01234
05.13.51.40.2Iris-setosa
14.93.01.40.2Iris-setosa
24.73.21.30.2Iris-setosa
34.63.11.50.2Iris-setosa
45.03.61.40.2Iris-setosa
55.43.91.70.4Iris-setosa
64.63.41.40.3Iris-setosa
75.03.41.50.2Iris-setosa
84.42.91.40.2Iris-setosa
94.93.11.50.1Iris-setosa
dataSet.shape
dataSet.index
list(dataSet.index)

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149]
import random
def randSplit(dataSet,rate):
    l=list(dataSet.index)
    random.shuffle(l)
    dataSet.index=l
    n=dataSet.shape[0]
    m=int(n*rate)
    train=dataSet.loc[range(m),:]
    test=dataSet.loc[range(m,n),:]
    dataSet.index=range(dataSet.shape[0])
    test.index=range(test.shape[0])
    return train,test
    
x_train,x_test=randSplit(dataSet,0.8)
x_train
01234
05.03.51.30.3Iris-setosa
15.62.74.21.3Iris-versicolor
26.33.34.71.6Iris-versicolor
34.43.01.30.2Iris-setosa
45.52.64.41.2Iris-versicolor
56.43.15.51.8Iris-virginica
64.92.43.31.0Iris-versicolor
75.43.91.30.4Iris-setosa
86.03.44.51.6Iris-versicolor
96.42.85.62.2Iris-virginica
105.03.51.60.6Iris-setosa
116.02.75.11.6Iris-versicolor
125.02.03.51.0Iris-versicolor
134.93.01.40.2Iris-setosa
145.13.31.70.5Iris-setosa
156.32.54.91.5Iris-versicolor
165.62.93.61.3Iris-versicolor
175.03.31.40.2Iris-setosa
187.32.96.31.8Iris-virginica
194.63.21.40.2Iris-setosa
205.84.01.20.2Iris-setosa
216.53.05.22.0Iris-virginica
225.52.34.01.3Iris-versicolor
235.13.81.90.4Iris-setosa
246.32.95.61.8Iris-virginica
255.22.73.91.4Iris-versicolor
266.72.55.81.8Iris-virginica
274.92.54.51.7Iris-virginica
286.73.05.22.3Iris-virginica
297.13.05.92.1Iris-virginica
..................
907.23.26.01.8Iris-virginica
917.03.24.71.4Iris-versicolor
925.43.41.70.2Iris-setosa
935.82.74.11.0Iris-versicolor
946.83.05.52.1Iris-virginica
955.13.71.50.4Iris-setosa
965.63.04.11.3Iris-versicolor
975.72.94.21.3Iris-versicolor
986.02.24.01.0Iris-versicolor
997.63.06.62.1Iris-virginica
1006.53.25.12.0Iris-virginica
1014.52.31.30.3Iris-setosa
1025.72.55.02.0Iris-virginica
1035.43.41.50.4Iris-setosa
1047.42.86.11.9Iris-virginica
1055.82.75.11.9Iris-virginica
1066.23.45.42.3Iris-virginica
1074.63.61.00.2Iris-setosa
1086.13.04.91.8Iris-virginica
1095.03.61.40.2Iris-setosa
1105.52.54.01.3Iris-versicolor
1116.22.84.81.8Iris-virginica
1126.53.05.82.2Iris-virginica
1136.73.14.41.4Iris-versicolor
1144.83.01.40.1Iris-setosa
1156.12.94.71.4Iris-versicolor
1165.93.24.81.8Iris-versicolor
1174.93.11.50.1Iris-setosa
1186.33.45.62.4Iris-virginica
1195.82.73.91.2Iris-versicolor

120 rows × 5 columns

x_test
01234
05.74.41.50.4Iris-setosa
16.42.75.31.9Iris-virginica
26.03.04.81.8Iris-virginica
35.13.81.50.3Iris-setosa
44.83.41.60.2Iris-setosa
54.63.11.50.2Iris-setosa
66.53.05.51.8Iris-virginica
74.93.11.50.1Iris-setosa
86.32.55.01.9Iris-virginica
95.43.91.70.4Iris-setosa
105.13.41.50.2Iris-setosa
115.13.51.40.2Iris-setosa
124.83.01.40.3Iris-setosa
136.62.94.61.3Iris-versicolor
145.93.05.11.8Iris-virginica
155.23.41.40.2Iris-setosa
167.72.66.92.3Iris-virginica
175.43.04.51.5Iris-versicolor
185.82.75.11.9Iris-virginica
196.73.05.01.7Iris-versicolor
205.82.64.01.2Iris-versicolor
214.73.21.60.2Iris-setosa
226.33.36.02.5Iris-virginica
235.02.33.31.0Iris-versicolor
245.33.71.50.2Iris-setosa
255.73.81.70.3Iris-setosa
266.73.14.71.5Iris-versicolor
277.93.86.42.0Iris-virginica
285.12.53.01.1Iris-versicolor
296.22.94.31.3Iris-versicolor
labels=x_train.loc[:,4]#标签索引
labels=x_train.iloc[:,-1]#位置索引
labels
0          Iris-setosa
1      Iris-versicolor
2      Iris-versicolor
3          Iris-setosa
4      Iris-versicolor
5       Iris-virginica
6      Iris-versicolor
7          Iris-setosa
8      Iris-versicolor
9       Iris-virginica
10         Iris-setosa
11     Iris-versicolor
12     Iris-versicolor
13         Iris-setosa
14         Iris-setosa
15     Iris-versicolor
16     Iris-versicolor
17         Iris-setosa
18      Iris-virginica
19         Iris-setosa
20         Iris-setosa
21      Iris-virginica
22     Iris-versicolor
23         Iris-setosa
24      Iris-virginica
25     Iris-versicolor
26      Iris-virginica
27      Iris-virginica
28      Iris-virginica
29      Iris-virginica
            ...       
90      Iris-virginica
91     Iris-versicolor
92         Iris-setosa
93     Iris-versicolor
94      Iris-virginica
95         Iris-setosa
96     Iris-versicolor
97     Iris-versicolor
98     Iris-versicolor
99      Iris-virginica
100     Iris-virginica
101        Iris-setosa
102     Iris-virginica
103        Iris-setosa
104     Iris-virginica
105     Iris-virginica
106     Iris-virginica
107        Iris-setosa
108     Iris-virginica
109        Iris-setosa
110    Iris-versicolor
111     Iris-virginica
112     Iris-virginica
113    Iris-versicolor
114        Iris-setosa
115    Iris-versicolor
116    Iris-versicolor
117        Iris-setosa
118     Iris-virginica
119    Iris-versicolor
Name: 4, Length: 120, dtype: object
labels=x_train.iloc[:,-1].value_counts()
labels
Index(['Iris-versicolor', 'Iris-virginica', 'Iris-setosa'], dtype='object')
labels=x_train.iloc[:,-1].value_counts().index
labels
Index(['Iris-versicolor', 'Iris-virginica', 'Iris-setosa'], dtype='object')
### 计算方差与均值
mean=[]
std=[]

for i in labels:

    item=x_train.loc[x_train.iloc[:,-1]==i,:]
    m=item.iloc[:,:-1]
    
item
01234
06.02.25.01.5Iris-virginica
26.03.04.81.8Iris-virginica
35.82.75.11.9Iris-virginica
67.23.66.12.5Iris-virginica
116.33.36.02.5Iris-virginica
136.73.05.22.3Iris-virginica
186.83.05.52.1Iris-virginica
227.42.86.11.9Iris-virginica
256.33.45.62.4Iris-virginica
266.43.15.51.8Iris-virginica
316.13.04.91.8Iris-virginica
367.23.05.81.6Iris-virginica
376.32.95.61.8Iris-virginica
406.12.65.61.4Iris-virginica
467.32.96.31.8Iris-virginica
526.32.55.01.9Iris-virginica
637.93.86.42.0Iris-virginica
647.73.86.72.2Iris-virginica
656.23.45.42.3Iris-virginica
694.92.54.51.7Iris-virginica
726.73.35.72.1Iris-virginica
736.53.05.22.0Iris-virginica
746.93.25.72.3Iris-virginica
816.32.74.91.8Iris-virginica
826.93.15.42.1Iris-virginica
836.42.75.31.9Iris-virginica
847.13.05.92.1Iris-virginica
867.72.66.92.3Iris-virginica
907.73.06.12.3Iris-virginica
935.62.84.92.0Iris-virginica
946.53.05.82.2Iris-virginica
976.42.85.62.1Iris-virginica
1016.32.85.11.5Iris-virginica
1026.42.85.62.2Iris-virginica
1096.83.25.92.3Iris-virginica
1117.23.26.01.8Iris-virginica
1126.93.15.12.3Iris-virginica
m
0    4.970270
1    3.383784
2    1.443243
3    0.243243
dtype: float64
mean=[]
std=[]

for i in labels:

    item=x_train.loc[x_train.iloc[:,-1]==i,:]
    m=item.iloc[:,:-1].mean()
    
m
0    4.970270
1    3.383784
2    1.443243
3    0.243243
dtype: float64
mean=[]
std=[]

for i in labels:

    item=x_train.loc[x_train.iloc[:,-1]==i,:]
    m=item.iloc[:,:-1].mean()
    
    s=np.sum((item.iloc[:,:-1]-m)**2)/item.shape[0]
(item.iloc[:,:-1]-m)**2
0123
00.0008840.0135060.0205190.003221
30.3252080.1472900.0205190.001870
70.1846680.2664790.0205190.024573
100.0008840.0135060.0245730.127275
130.0049380.1472900.0018700.001870
140.0168300.0070200.0659240.065924
170.0008840.0070200.0018700.001870
190.1371000.0337760.0018700.001870
200.6884510.3797220.0591670.001870
230.0168300.1732360.2086270.024573
370.3252080.2340470.0018700.001870
400.0527760.0135060.0032210.001870
430.0008840.0337760.0591670.001870
440.1371000.0002630.0018700.003221
450.0008840.1472900.0245730.001870
490.1846680.0999930.0032210.001870
560.0289920.0002630.2086270.001870
590.4492620.1472900.1178160.020519
650.0168300.1732360.0245730.001870
710.0168300.0135060.0018700.003221
740.0730460.0337760.0205190.001870
750.2806140.6662090.0018700.001870
760.3252080.0337760.0205190.001870
770.0049380.0805330.0032210.020519
800.0527760.5129660.0032210.020519
810.0008840.0002630.0032210.001870
840.0008840.0002630.0245730.024573
860.2806140.0135060.0205190.001870
870.0289920.0805330.0245730.001870
920.1846680.0002630.0659240.001870
950.0168300.0999930.0032210.024573
1010.2211541.1745870.0205190.003221
1030.1846680.0002630.0032210.024573
1070.1371000.0467490.1964650.001870
1090.0008840.0467490.0018700.001870
1140.0289920.1472900.0018700.020519
1170.0049380.0805330.0032210.020519
s
0    0.119386
1    0.137034
2    0.034887
3    0.012725
dtype: float64
mean=[]
std=[]

for i in labels:

    item=x_train.loc[x_train.iloc[:,-1]==i,:]
    m=item.iloc[:,:-1].mean()
    
    s=np.sum((item.iloc[:,:-1]-m)**2)/item.shape[0]
    mean.append(m)
    std.append(s)
means=pd.DataFrame(mean,index=labels)
stds=pd.DataFrame(std,index=labels)
mean
[0    5.935714
 1    2.766667
 2    4.276190
 3    1.326190
 dtype: float64, 0    6.600000
 1    2.978049
 2    5.548780
 3    2.034146
 dtype: float64, 0    4.970270
 1    3.383784
 2    1.443243
 3    0.243243
 dtype: float64]
std
[0    0.226105
 1    0.101270
 2    0.174195
 3    0.036695
 dtype: float64, 0    0.370732
 1    0.092445
 2    0.264450
 3    0.077858
 dtype: float64, 0    0.119386
 1    0.137034
 2    0.034887
 3    0.012725
 dtype: float64]
means
0123
Iris-versicolor5.9357142.7666674.2761901.326190
Iris-virginica6.6000002.9780495.5487802.034146
Iris-setosa4.9702703.3837841.4432430.243243
stds
0123
Iris-versicolor0.2261050.1012700.1741950.036695
Iris-virginica0.3707320.0924450.2644500.077858
Iris-setosa0.1193860.1370340.0348870.012725
for j in range(x_test.shape[0]):
    iset=x_test.iloc[j,:-1]
iset
0    6.2
1    2.9
2    4.3
3    1.3
Name: 29, dtype: object
for j in range(x_test.shape[0]):
    iset=x_test.iloc[j,:-1].tolist()
iset
[6.2, 2.9, 4.3, 1.3]
for j in range(x_test.shape[0]):
    iset=x_test.iloc[j,:-1].tolist()
    iprob=np.exp(-1*(iset-means)**2/(stds*2))/np.sqrt(2*np.pi*stds)
iset-means
0123
Iris-versicolor0.2642860.1333330.023810-0.026190
Iris-virginica-0.400000-0.078049-1.248780-0.734146
Iris-setosa1.229730-0.4837842.8567571.056757
iprob

0123
Iris-versicolor0.7189111.1482869.543013e-012.063229e+00
Iris-virginica0.5280371.2695794.066561e-024.488144e-02
Iris-setosa0.0020510.4587973.406877e-513.100002e-19
iprob[0]
Iris-versicolor    0.718911
Iris-virginica     0.528037
Iris-setosa        0.002051
Name: 0, dtype: float64
for j in range(x_test.shape[0]):
    iset=x_test.iloc[j,:-1].tolist()
    iprob=np.exp(-1*(iset-means)**2/(stds*2))/np.sqrt(2*np.pi*stds)
    prob=1
    for k in range(x_test.shape[1]-1):
        prob*=iprob[k]
prob
Iris-versicolor    1.625391e+00
Iris-virginica     1.223539e-03
Iris-setosa        9.936339e-73
Name: 0, dtype: float64
prob.values
array([1.62539149e+00, 1.22353864e-03, 9.93633857e-73])
prob.index
Index(['Iris-versicolor', 'Iris-virginica', 'Iris-setosa'], dtype='object')

np.argmax(prob.values)
0
result=[]
for j in range(x_test.shape[0]):
    iset=x_test.iloc[j,:-1].tolist()
    iprob=np.exp(-1*(iset-means)**2/(stds*2))/np.sqrt(2*np.pi*stds)
    prob=1
    for k in range(x_test.shape[1]-1):
        prob*=iprob[k]
    cla=prob.index[(np.argmax(prob.values))]
    result.append(cla)
x_test['predict']=result
    acc=(x_test.iloc[:,-1]==x_test.iloc[:,-2]).mean()
    print("accuracy rate is",acc)

  File "<ipython-input-374-38f3e0c25417>", line 11
    acc=(x_test.iloc[:,-1]==x_test.iloc[:,-2]).mean()
    ^
IndentationError: unexpected indent
result
x_test
cla

def gnb_classify(x_train,x_test):
    labels=x_train.iloc[:,-1].value_counts()
    mean=[]
    std=[]
    result=[]

    for i in labels:

        item=x_train.loc[x_train.iloc[:,-1]==i,:]
        m=item.iloc[:,:-1].mean()

        s=np.sum((item.iloc[:,:-1]-m)**2)/item.shape[0]
        mean.append(m)
        std.append(s)
    means=pd.DataFrame(mean,index=labels)
    stds=pd.DataFrame(std,index=labels)
    for j in range(x_test.shape[0]):
        iset=x_test.iloc[j,:-1].tolist()
        iprob=np.exp(-1*(iset-means)**2/(stds*2))/np.sqrt(2*np.pi*stds)
        prob=1
        for k in range(x_test.shape[1]-1):
            prob*=iprob[k]
        cla=prob.index[(np.argmax(prob.values))]
        result.append(cla)
    x_test['predict']=result
#     acc=(x_test.iloc[:,-1]==x_test.iloc[:,-2]).mean()
#     print("accuracy rate is",acc)
    return x_test
gnb_classify(x_train,x_test)

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值