import pandas as pd
from sklearn. naive_bayes import GaussianNB
from sklearn. model_selection import train_test_split
from sklearn. metrics import accuracy_score
from sklearn import datasets
iris= datasets. load_iris( )
print ( iris)
{'DESCR': '.. _iris_dataset:\n\nIris plants dataset\n--------------------\n\n**Data Set Characteristics:**\n\n :Number of Instances: 150 (50 in each of three classes)\n :Number of Attributes: 4 numeric, predictive attributes and the class\n :Attribute Information:\n - sepal length in cm\n - sepal width in cm\n - petal length in cm\n - petal width in cm\n - class:\n - Iris-Setosa\n - Iris-Versicolour\n - Iris-Virginica\n \n :Summary Statistics:\n\n ============== ==== ==== ======= ===== ====================\n Min Max Mean SD Class Correlation\n ============== ==== ==== ======= ===== ====================\n sepal length: 4.3 7.9 5.84 0.83 0.7826\n sepal width: 2.0 4.4 3.05 0.43 -0.4194\n petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)\n petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)\n ============== ==== ==== ======= ===== ====================\n\n :Missing Attribute Values: None\n :Class Distribution: 33.3% for each of 3 classes.\n :Creator: R.A. Fisher\n :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n :Date: July, 1988\n\nThe famous Iris database, first used by Sir R.A. Fisher. The dataset is taken\nfrom Fisher\'s paper. Note that it\'s the same as in R, but not as in the UCI\nMachine Learning Repository, which has two wrong data points.\n\nThis is perhaps the best known database to be found in the\npattern recognition literature. Fisher\'s paper is a classic in the field and\nis referenced frequently to this day. (See Duda & Hart, for example.) The\ndata set contains 3 classes of 50 instances each, where each class refers to a\ntype of iris plant. One class is linearly separable from the other 2; the\nlatter are NOT linearly separable from each other.\n\n.. topic:: References\n\n - Fisher, R.A. "The use of multiple measurements in taxonomic problems"\n Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to\n Mathematical Statistics" (John Wiley, NY, 1950).\n - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\n (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.\n - Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System\n Structure and Classification Rule for Recognition in Partially Exposed\n Environments". IEEE Transactions on Pattern Analysis and Machine\n Intelligence, Vol. PAMI-2, No. 1, 67-71.\n - Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE Transactions\n on Information Theory, May 1972, 431-433.\n - See also: 1988 MLC Proceedings, 54-64. Cheeseman et al"s AUTOCLASS II\n conceptual clustering system finds 3 classes in the data.\n - Many, many more ...', 'filename': 'e:\\application\\python\\lib\\site-packages\\sklearn\\datasets\\data\\iris.csv', 'data': array([[5.1, 3.5, 1.4, 0.2],
[4.9, 3. , 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
[4.6, 3.1, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2],
[5.4, 3.9, 1.7, 0.4],
[4.6, 3.4, 1.4, 0.3],
[5. , 3.4, 1.5, 0.2],
[4.4, 2.9, 1.4, 0.2],
[4.9, 3.1, 1.5, 0.1],
[5.4, 3.7, 1.5, 0.2],
[4.8, 3.4, 1.6, 0.2],
[4.8, 3. , 1.4, 0.1],
[4.3, 3. , 1.1, 0.1],
[5.8, 4. , 1.2, 0.2],
[5.7, 4.4, 1.5, 0.4],
[5.4, 3.9, 1.3, 0.4],
[5.1, 3.5, 1.4, 0.3],
[5.7, 3.8, 1.7, 0.3],
[5.1, 3.8, 1.5, 0.3],
[5.4, 3.4, 1.7, 0.2],
[5.1, 3.7, 1.5, 0.4],
[4.6, 3.6, 1. , 0.2],
[5.1, 3.3, 1.7, 0.5],
[4.8, 3.4, 1.9, 0.2],
[5. , 3. , 1.6, 0.2],
[5. , 3.4, 1.6, 0.4],
[5.2, 3.5, 1.5, 0.2],
[5.2, 3.4, 1.4, 0.2],
[4.7, 3.2, 1.6, 0.2],
[4.8, 3.1, 1.6, 0.2],
[5.4, 3.4, 1.5, 0.4],
[5.2, 4.1, 1.5, 0.1],
[5.5, 4.2, 1.4, 0.2],
[4.9, 3.1, 1.5, 0.2],
[5. , 3.2, 1.2, 0.2],
[5.5, 3.5, 1.3, 0.2],
[4.9, 3.6, 1.4, 0.1],
[4.4, 3. , 1.3, 0.2],
[5.1, 3.4, 1.5, 0.2],
[5. , 3.5, 1.3, 0.3],
[4.5, 2.3, 1.3, 0.3],
[4.4, 3.2, 1.3, 0.2],
[5. , 3.5, 1.6, 0.6],
[5.1, 3.8, 1.9, 0.4],
[4.8, 3. , 1.4, 0.3],
[5.1, 3.8, 1.6, 0.2],
[4.6, 3.2, 1.4, 0.2],
[5.3, 3.7, 1.5, 0.2],
[5. , 3.3, 1.4, 0.2],
[7. , 3.2, 4.7, 1.4],
[6.4, 3.2, 4.5, 1.5],
[6.9, 3.1, 4.9, 1.5],
[5.5, 2.3, 4. , 1.3],
[6.5, 2.8, 4.6, 1.5],
[5.7, 2.8, 4.5, 1.3],
[6.3, 3.3, 4.7, 1.6],
[4.9, 2.4, 3.3, 1. ],
[6.6, 2.9, 4.6, 1.3],
[5.2, 2.7, 3.9, 1.4],
[5. , 2. , 3.5, 1. ],
[5.9, 3. , 4.2, 1.5],
[6. , 2.2, 4. , 1. ],
[6.1, 2.9, 4.7, 1.4],
[5.6, 2.9, 3.6, 1.3],
[6.7, 3.1, 4.4, 1.4],
[5.6, 3. , 4.5, 1.5],
[5.8, 2.7, 4.1, 1. ],
[6.2, 2.2, 4.5, 1.5],
[5.6, 2.5, 3.9, 1.1],
[5.9, 3.2, 4.8, 1.8],
[6.1, 2.8, 4. , 1.3],
[6.3, 2.5, 4.9, 1.5],
[6.1, 2.8, 4.7, 1.2],
[6.4, 2.9, 4.3, 1.3],
[6.6, 3. , 4.4, 1.4],
[6.8, 2.8, 4.8, 1.4],
[6.7, 3. , 5. , 1.7],
[6. , 2.9, 4.5, 1.5],
[5.7, 2.6, 3.5, 1. ],
[5.5, 2.4, 3.8, 1.1],
[5.5, 2.4, 3.7, 1. ],
[5.8, 2.7, 3.9, 1.2],
[6. , 2.7, 5.1, 1.6],
[5.4, 3. , 4.5, 1.5],
[6. , 3.4, 4.5, 1.6],
[6.7, 3.1, 4.7, 1.5],
[6.3, 2.3, 4.4, 1.3],
[5.6, 3. , 4.1, 1.3],
[5.5, 2.5, 4. , 1.3],
[5.5, 2.6, 4.4, 1.2],
[6.1, 3. , 4.6, 1.4],
[5.8, 2.6, 4. , 1.2],
[5. , 2.3, 3.3, 1. ],
[5.6, 2.7, 4.2, 1.3],
[5.7, 3. , 4.2, 1.2],
[5.7, 2.9, 4.2, 1.3],
[6.2, 2.9, 4.3, 1.3],
[5.1, 2.5, 3. , 1.1],
[5.7, 2.8, 4.1, 1.3],
[6.3, 3.3, 6. , 2.5],
[5.8, 2.7, 5.1, 1.9],
[7.1, 3. , 5.9, 2.1],
[6.3, 2.9, 5.6, 1.8],
[6.5, 3. , 5.8, 2.2],
[7.6, 3. , 6.6, 2.1],
[4.9, 2.5, 4.5, 1.7],
[7.3, 2.9, 6.3, 1.8],
[6.7, 2.5, 5.8, 1.8],
[7.2, 3.6, 6.1, 2.5],
[6.5, 3.2, 5.1, 2. ],
[6.4, 2.7, 5.3, 1.9],
[6.8, 3. , 5.5, 2.1],
[5.7, 2.5, 5. , 2. ],
[5.8, 2.8, 5.1, 2.4],
[6.4, 3.2, 5.3, 2.3],
[6.5, 3. , 5.5, 1.8],
[7.7, 3.8, 6.7, 2.2],
[7.7, 2.6, 6.9, 2.3],
[6. , 2.2, 5. , 1.5],
[6.9, 3.2, 5.7, 2.3],
[5.6, 2.8, 4.9, 2. ],
[7.7, 2.8, 6.7, 2. ],
[6.3, 2.7, 4.9, 1.8],
[6.7, 3.3, 5.7, 2.1],
[7.2, 3.2, 6. , 1.8],
[6.2, 2.8, 4.8, 1.8],
[6.1, 3. , 4.9, 1.8],
[6.4, 2.8, 5.6, 2.1],
[7.2, 3. , 5.8, 1.6],
[7.4, 2.8, 6.1, 1.9],
[7.9, 3.8, 6.4, 2. ],
[6.4, 2.8, 5.6, 2.2],
[6.3, 2.8, 5.1, 1.5],
[6.1, 2.6, 5.6, 1.4],
[7.7, 3. , 6.1, 2.3],
[6.3, 3.4, 5.6, 2.4],
[6.4, 3.1, 5.5, 1.8],
[6. , 3. , 4.8, 1.8],
[6.9, 3.1, 5.4, 2.1],
[6.7, 3.1, 5.6, 2.4],
[6.9, 3.1, 5.1, 2.3],
[5.8, 2.7, 5.1, 1.9],
[6.8, 3.2, 5.9, 2.3],
[6.7, 3.3, 5.7, 2.5],
[6.7, 3. , 5.2, 2.3],
[6.3, 2.5, 5. , 1.9],
[6.5, 3. , 5.2, 2. ],
[6.2, 3.4, 5.4, 2.3],
[5.9, 3. , 5.1, 1.8]]), 'feature_names': ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'], 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), 'target_names': array(['setosa', 'versicolor', 'virginica'], dtype='<U10')}
X_train, Xtest, y_train, y_test= train_test_split( iris. data, iris. target,
random_state= 12 )
print ( X_train. shape)
print ( Xtest. shape)
(112, 4)
(38, 4)
clf= GaussianNB( )
clf. fit( X_train, y_train)
GaussianNB(priors=None, var_smoothing=1e-09)
clf. predict( Xtest)
array([0, 2, 0, 1, 2, 2, 2, 0, 2, 0, 1, 0, 0, 0, 1, 2, 2, 1, 0, 1, 0, 1,
2, 1, 0, 2, 2, 1, 0, 0, 0, 1, 2, 0, 2, 0, 1, 1])
clf. predict_proba( Xtest)
array([[1.00000000e+000, 2.32926069e-017, 1.81656357e-023],
[4.28952299e-154, 2.48576754e-002, 9.75142325e-001],
[1.00000000e+000, 7.45528845e-018, 3.79800436e-024],
[3.59748710e-076, 9.99751806e-001, 2.48194200e-004],
[2.20411871e-239, 4.45798016e-009, 9.99999996e-001],
[1.23795145e-173, 1.95814902e-003, 9.98041851e-001],
[2.45866589e-206, 2.34481513e-007, 9.99999766e-001],
[1.00000000e+000, 2.61810906e-017, 2.67446831e-023],
[3.07448595e-259, 9.07196639e-011, 1.00000000e+000],
[1.00000000e+000, 1.14549667e-010, 3.00314173e-017],
[1.64566141e-101, 9.87428016e-001, 1.25719837e-002],
[1.00000000e+000, 5.62770009e-016, 8.77233124e-022],
[1.00000000e+000, 9.78098062e-014, 4.81247272e-020],
[1.00000000e+000, 3.96616431e-015, 3.17162008e-021],
[2.58159395e-110, 7.85918892e-001, 2.14081108e-001],
[8.01004975e-208, 8.36611920e-006, 9.99991634e-001],
[2.27845999e-193, 5.52863568e-004, 9.99447136e-001],
[2.52133012e-090, 9.94597495e-001, 5.40250471e-003],
[1.00000000e+000, 4.06675976e-017, 2.53312064e-023],
[3.29537129e-123, 9.22312452e-001, 7.76875484e-002],
[1.00000000e+000, 4.66765440e-017, 1.99662820e-023],
[7.54708431e-074, 9.99690656e-001, 3.09343577e-004],
[6.27117035e-136, 1.83265786e-001, 8.16734214e-001],
[4.68960290e-103, 9.82756006e-001, 1.72439943e-002],
[1.00000000e+000, 2.15636250e-014, 2.25086772e-020],
[5.92924136e-199, 5.41122729e-007, 9.99999459e-001],
[4.07679795e-141, 7.38689632e-002, 9.26131037e-001],
[2.77929930e-083, 9.99806458e-001, 1.93541791e-004],
[1.00000000e+000, 4.48465501e-017, 4.36464333e-023],
[1.00000000e+000, 1.64440161e-014, 1.13341951e-021],
[1.00000000e+000, 8.68192867e-017, 6.71630735e-023],
[7.15007036e-050, 9.99997055e-001, 2.94492877e-006],
[1.73414331e-178, 2.06441448e-003, 9.97935586e-001],
[1.00000000e+000, 4.90168069e-019, 3.86471595e-024],
[1.35600871e-156, 2.28929843e-002, 9.77107016e-001],
[1.00000000e+000, 1.78544881e-015, 1.09390819e-020],
[1.86074590e-058, 9.99948860e-001, 5.11400371e-005],
[3.69548269e-057, 9.99992986e-001, 7.01435008e-006]])
accuracy_score( y_test, clf. predict( Xtest) )
0.9736842105263158
import numpy as np
import pandas as pd
import random
dataSet= pd. read_csv( 'iris.txt' , header= None )
dataSet. head( 10 )
0 1 2 3 4 0 5.1 3.5 1.4 0.2 Iris-setosa 1 4.9 3.0 1.4 0.2 Iris-setosa 2 4.7 3.2 1.3 0.2 Iris-setosa 3 4.6 3.1 1.5 0.2 Iris-setosa 4 5.0 3.6 1.4 0.2 Iris-setosa 5 5.4 3.9 1.7 0.4 Iris-setosa 6 4.6 3.4 1.4 0.3 Iris-setosa 7 5.0 3.4 1.5 0.2 Iris-setosa 8 4.4 2.9 1.4 0.2 Iris-setosa 9 4.9 3.1 1.5 0.1 Iris-setosa
dataSet. shape
dataSet. index
list ( dataSet. index)
[0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64,
65,
66,
67,
68,
69,
70,
71,
72,
73,
74,
75,
76,
77,
78,
79,
80,
81,
82,
83,
84,
85,
86,
87,
88,
89,
90,
91,
92,
93,
94,
95,
96,
97,
98,
99,
100,
101,
102,
103,
104,
105,
106,
107,
108,
109,
110,
111,
112,
113,
114,
115,
116,
117,
118,
119,
120,
121,
122,
123,
124,
125,
126,
127,
128,
129,
130,
131,
132,
133,
134,
135,
136,
137,
138,
139,
140,
141,
142,
143,
144,
145,
146,
147,
148,
149]
import random
def randSplit ( dataSet, rate) :
l= list ( dataSet. index)
random. shuffle( l)
dataSet. index= l
n= dataSet. shape[ 0 ]
m= int ( n* rate)
train= dataSet. loc[ range ( m) , : ]
test= dataSet. loc[ range ( m, n) , : ]
dataSet. index= range ( dataSet. shape[ 0 ] )
test. index= range ( test. shape[ 0 ] )
return train, test
x_train, x_test= randSplit( dataSet, 0.8 )
x_train
0 1 2 3 4 0 5.0 3.5 1.3 0.3 Iris-setosa 1 5.6 2.7 4.2 1.3 Iris-versicolor 2 6.3 3.3 4.7 1.6 Iris-versicolor 3 4.4 3.0 1.3 0.2 Iris-setosa 4 5.5 2.6 4.4 1.2 Iris-versicolor 5 6.4 3.1 5.5 1.8 Iris-virginica 6 4.9 2.4 3.3 1.0 Iris-versicolor 7 5.4 3.9 1.3 0.4 Iris-setosa 8 6.0 3.4 4.5 1.6 Iris-versicolor 9 6.4 2.8 5.6 2.2 Iris-virginica 10 5.0 3.5 1.6 0.6 Iris-setosa 11 6.0 2.7 5.1 1.6 Iris-versicolor 12 5.0 2.0 3.5 1.0 Iris-versicolor 13 4.9 3.0 1.4 0.2 Iris-setosa 14 5.1 3.3 1.7 0.5 Iris-setosa 15 6.3 2.5 4.9 1.5 Iris-versicolor 16 5.6 2.9 3.6 1.3 Iris-versicolor 17 5.0 3.3 1.4 0.2 Iris-setosa 18 7.3 2.9 6.3 1.8 Iris-virginica 19 4.6 3.2 1.4 0.2 Iris-setosa 20 5.8 4.0 1.2 0.2 Iris-setosa 21 6.5 3.0 5.2 2.0 Iris-virginica 22 5.5 2.3 4.0 1.3 Iris-versicolor 23 5.1 3.8 1.9 0.4 Iris-setosa 24 6.3 2.9 5.6 1.8 Iris-virginica 25 5.2 2.7 3.9 1.4 Iris-versicolor 26 6.7 2.5 5.8 1.8 Iris-virginica 27 4.9 2.5 4.5 1.7 Iris-virginica 28 6.7 3.0 5.2 2.3 Iris-virginica 29 7.1 3.0 5.9 2.1 Iris-virginica ... ... ... ... ... ... 90 7.2 3.2 6.0 1.8 Iris-virginica 91 7.0 3.2 4.7 1.4 Iris-versicolor 92 5.4 3.4 1.7 0.2 Iris-setosa 93 5.8 2.7 4.1 1.0 Iris-versicolor 94 6.8 3.0 5.5 2.1 Iris-virginica 95 5.1 3.7 1.5 0.4 Iris-setosa 96 5.6 3.0 4.1 1.3 Iris-versicolor 97 5.7 2.9 4.2 1.3 Iris-versicolor 98 6.0 2.2 4.0 1.0 Iris-versicolor 99 7.6 3.0 6.6 2.1 Iris-virginica 100 6.5 3.2 5.1 2.0 Iris-virginica 101 4.5 2.3 1.3 0.3 Iris-setosa 102 5.7 2.5 5.0 2.0 Iris-virginica 103 5.4 3.4 1.5 0.4 Iris-setosa 104 7.4 2.8 6.1 1.9 Iris-virginica 105 5.8 2.7 5.1 1.9 Iris-virginica 106 6.2 3.4 5.4 2.3 Iris-virginica 107 4.6 3.6 1.0 0.2 Iris-setosa 108 6.1 3.0 4.9 1.8 Iris-virginica 109 5.0 3.6 1.4 0.2 Iris-setosa 110 5.5 2.5 4.0 1.3 Iris-versicolor 111 6.2 2.8 4.8 1.8 Iris-virginica 112 6.5 3.0 5.8 2.2 Iris-virginica 113 6.7 3.1 4.4 1.4 Iris-versicolor 114 4.8 3.0 1.4 0.1 Iris-setosa 115 6.1 2.9 4.7 1.4 Iris-versicolor 116 5.9 3.2 4.8 1.8 Iris-versicolor 117 4.9 3.1 1.5 0.1 Iris-setosa 118 6.3 3.4 5.6 2.4 Iris-virginica 119 5.8 2.7 3.9 1.2 Iris-versicolor
120 rows × 5 columns
x_test
0 1 2 3 4 0 5.7 4.4 1.5 0.4 Iris-setosa 1 6.4 2.7 5.3 1.9 Iris-virginica 2 6.0 3.0 4.8 1.8 Iris-virginica 3 5.1 3.8 1.5 0.3 Iris-setosa 4 4.8 3.4 1.6 0.2 Iris-setosa 5 4.6 3.1 1.5 0.2 Iris-setosa 6 6.5 3.0 5.5 1.8 Iris-virginica 7 4.9 3.1 1.5 0.1 Iris-setosa 8 6.3 2.5 5.0 1.9 Iris-virginica 9 5.4 3.9 1.7 0.4 Iris-setosa 10 5.1 3.4 1.5 0.2 Iris-setosa 11 5.1 3.5 1.4 0.2 Iris-setosa 12 4.8 3.0 1.4 0.3 Iris-setosa 13 6.6 2.9 4.6 1.3 Iris-versicolor 14 5.9 3.0 5.1 1.8 Iris-virginica 15 5.2 3.4 1.4 0.2 Iris-setosa 16 7.7 2.6 6.9 2.3 Iris-virginica 17 5.4 3.0 4.5 1.5 Iris-versicolor 18 5.8 2.7 5.1 1.9 Iris-virginica 19 6.7 3.0 5.0 1.7 Iris-versicolor 20 5.8 2.6 4.0 1.2 Iris-versicolor 21 4.7 3.2 1.6 0.2 Iris-setosa 22 6.3 3.3 6.0 2.5 Iris-virginica 23 5.0 2.3 3.3 1.0 Iris-versicolor 24 5.3 3.7 1.5 0.2 Iris-setosa 25 5.7 3.8 1.7 0.3 Iris-setosa 26 6.7 3.1 4.7 1.5 Iris-versicolor 27 7.9 3.8 6.4 2.0 Iris-virginica 28 5.1 2.5 3.0 1.1 Iris-versicolor 29 6.2 2.9 4.3 1.3 Iris-versicolor
labels= x_train. loc[ : , 4 ]
labels= x_train. iloc[ : , - 1 ]
labels
0 Iris-setosa
1 Iris-versicolor
2 Iris-versicolor
3 Iris-setosa
4 Iris-versicolor
5 Iris-virginica
6 Iris-versicolor
7 Iris-setosa
8 Iris-versicolor
9 Iris-virginica
10 Iris-setosa
11 Iris-versicolor
12 Iris-versicolor
13 Iris-setosa
14 Iris-setosa
15 Iris-versicolor
16 Iris-versicolor
17 Iris-setosa
18 Iris-virginica
19 Iris-setosa
20 Iris-setosa
21 Iris-virginica
22 Iris-versicolor
23 Iris-setosa
24 Iris-virginica
25 Iris-versicolor
26 Iris-virginica
27 Iris-virginica
28 Iris-virginica
29 Iris-virginica
...
90 Iris-virginica
91 Iris-versicolor
92 Iris-setosa
93 Iris-versicolor
94 Iris-virginica
95 Iris-setosa
96 Iris-versicolor
97 Iris-versicolor
98 Iris-versicolor
99 Iris-virginica
100 Iris-virginica
101 Iris-setosa
102 Iris-virginica
103 Iris-setosa
104 Iris-virginica
105 Iris-virginica
106 Iris-virginica
107 Iris-setosa
108 Iris-virginica
109 Iris-setosa
110 Iris-versicolor
111 Iris-virginica
112 Iris-virginica
113 Iris-versicolor
114 Iris-setosa
115 Iris-versicolor
116 Iris-versicolor
117 Iris-setosa
118 Iris-virginica
119 Iris-versicolor
Name: 4, Length: 120, dtype: object
labels= x_train. iloc[ : , - 1 ] . value_counts( )
labels
Index(['Iris-versicolor', 'Iris-virginica', 'Iris-setosa'], dtype='object')
labels= x_train. iloc[ : , - 1 ] . value_counts( ) . index
labels
Index(['Iris-versicolor', 'Iris-virginica', 'Iris-setosa'], dtype='object')
mean= [ ]
std= [ ]
for i in labels:
item= x_train. loc[ x_train. iloc[ : , - 1 ] == i, : ]
m= item. iloc[ : , : - 1 ]
item
0 1 2 3 4 0 6.0 2.2 5.0 1.5 Iris-virginica 2 6.0 3.0 4.8 1.8 Iris-virginica 3 5.8 2.7 5.1 1.9 Iris-virginica 6 7.2 3.6 6.1 2.5 Iris-virginica 11 6.3 3.3 6.0 2.5 Iris-virginica 13 6.7 3.0 5.2 2.3 Iris-virginica 18 6.8 3.0 5.5 2.1 Iris-virginica 22 7.4 2.8 6.1 1.9 Iris-virginica 25 6.3 3.4 5.6 2.4 Iris-virginica 26 6.4 3.1 5.5 1.8 Iris-virginica 31 6.1 3.0 4.9 1.8 Iris-virginica 36 7.2 3.0 5.8 1.6 Iris-virginica 37 6.3 2.9 5.6 1.8 Iris-virginica 40 6.1 2.6 5.6 1.4 Iris-virginica 46 7.3 2.9 6.3 1.8 Iris-virginica 52 6.3 2.5 5.0 1.9 Iris-virginica 63 7.9 3.8 6.4 2.0 Iris-virginica 64 7.7 3.8 6.7 2.2 Iris-virginica 65 6.2 3.4 5.4 2.3 Iris-virginica 69 4.9 2.5 4.5 1.7 Iris-virginica 72 6.7 3.3 5.7 2.1 Iris-virginica 73 6.5 3.0 5.2 2.0 Iris-virginica 74 6.9 3.2 5.7 2.3 Iris-virginica 81 6.3 2.7 4.9 1.8 Iris-virginica 82 6.9 3.1 5.4 2.1 Iris-virginica 83 6.4 2.7 5.3 1.9 Iris-virginica 84 7.1 3.0 5.9 2.1 Iris-virginica 86 7.7 2.6 6.9 2.3 Iris-virginica 90 7.7 3.0 6.1 2.3 Iris-virginica 93 5.6 2.8 4.9 2.0 Iris-virginica 94 6.5 3.0 5.8 2.2 Iris-virginica 97 6.4 2.8 5.6 2.1 Iris-virginica 101 6.3 2.8 5.1 1.5 Iris-virginica 102 6.4 2.8 5.6 2.2 Iris-virginica 109 6.8 3.2 5.9 2.3 Iris-virginica 111 7.2 3.2 6.0 1.8 Iris-virginica 112 6.9 3.1 5.1 2.3 Iris-virginica
m
0 4.970270
1 3.383784
2 1.443243
3 0.243243
dtype: float64
mean= [ ]
std= [ ]
for i in labels:
item= x_train. loc[ x_train. iloc[ : , - 1 ] == i, : ]
m= item. iloc[ : , : - 1 ] . mean( )
m
0 4.970270
1 3.383784
2 1.443243
3 0.243243
dtype: float64
mean= [ ]
std= [ ]
for i in labels:
item= x_train. loc[ x_train. iloc[ : , - 1 ] == i, : ]
m= item. iloc[ : , : - 1 ] . mean( )
s= np. sum ( ( item. iloc[ : , : - 1 ] - m) ** 2 ) / item. shape[ 0 ]
( item. iloc[ : , : - 1 ] - m) ** 2
0 1 2 3 0 0.000884 0.013506 0.020519 0.003221 3 0.325208 0.147290 0.020519 0.001870 7 0.184668 0.266479 0.020519 0.024573 10 0.000884 0.013506 0.024573 0.127275 13 0.004938 0.147290 0.001870 0.001870 14 0.016830 0.007020 0.065924 0.065924 17 0.000884 0.007020 0.001870 0.001870 19 0.137100 0.033776 0.001870 0.001870 20 0.688451 0.379722 0.059167 0.001870 23 0.016830 0.173236 0.208627 0.024573 37 0.325208 0.234047 0.001870 0.001870 40 0.052776 0.013506 0.003221 0.001870 43 0.000884 0.033776 0.059167 0.001870 44 0.137100 0.000263 0.001870 0.003221 45 0.000884 0.147290 0.024573 0.001870 49 0.184668 0.099993 0.003221 0.001870 56 0.028992 0.000263 0.208627 0.001870 59 0.449262 0.147290 0.117816 0.020519 65 0.016830 0.173236 0.024573 0.001870 71 0.016830 0.013506 0.001870 0.003221 74 0.073046 0.033776 0.020519 0.001870 75 0.280614 0.666209 0.001870 0.001870 76 0.325208 0.033776 0.020519 0.001870 77 0.004938 0.080533 0.003221 0.020519 80 0.052776 0.512966 0.003221 0.020519 81 0.000884 0.000263 0.003221 0.001870 84 0.000884 0.000263 0.024573 0.024573 86 0.280614 0.013506 0.020519 0.001870 87 0.028992 0.080533 0.024573 0.001870 92 0.184668 0.000263 0.065924 0.001870 95 0.016830 0.099993 0.003221 0.024573 101 0.221154 1.174587 0.020519 0.003221 103 0.184668 0.000263 0.003221 0.024573 107 0.137100 0.046749 0.196465 0.001870 109 0.000884 0.046749 0.001870 0.001870 114 0.028992 0.147290 0.001870 0.020519 117 0.004938 0.080533 0.003221 0.020519
s
0 0.119386
1 0.137034
2 0.034887
3 0.012725
dtype: float64
mean= [ ]
std= [ ]
for i in labels:
item= x_train. loc[ x_train. iloc[ : , - 1 ] == i, : ]
m= item. iloc[ : , : - 1 ] . mean( )
s= np. sum ( ( item. iloc[ : , : - 1 ] - m) ** 2 ) / item. shape[ 0 ]
mean. append( m)
std. append( s)
means= pd. DataFrame( mean, index= labels)
stds= pd. DataFrame( std, index= labels)
mean
[0 5.935714
1 2.766667
2 4.276190
3 1.326190
dtype: float64, 0 6.600000
1 2.978049
2 5.548780
3 2.034146
dtype: float64, 0 4.970270
1 3.383784
2 1.443243
3 0.243243
dtype: float64]
std
[0 0.226105
1 0.101270
2 0.174195
3 0.036695
dtype: float64, 0 0.370732
1 0.092445
2 0.264450
3 0.077858
dtype: float64, 0 0.119386
1 0.137034
2 0.034887
3 0.012725
dtype: float64]
means
0 1 2 3 Iris-versicolor 5.935714 2.766667 4.276190 1.326190 Iris-virginica 6.600000 2.978049 5.548780 2.034146 Iris-setosa 4.970270 3.383784 1.443243 0.243243
stds
0 1 2 3 Iris-versicolor 0.226105 0.101270 0.174195 0.036695 Iris-virginica 0.370732 0.092445 0.264450 0.077858 Iris-setosa 0.119386 0.137034 0.034887 0.012725
for j in range ( x_test. shape[ 0 ] ) :
iset= x_test. iloc[ j, : - 1 ]
iset
0 6.2
1 2.9
2 4.3
3 1.3
Name: 29, dtype: object
for j in range ( x_test. shape[ 0 ] ) :
iset= x_test. iloc[ j, : - 1 ] . tolist( )
iset
[6.2, 2.9, 4.3, 1.3]
for j in range ( x_test. shape[ 0 ] ) :
iset= x_test. iloc[ j, : - 1 ] . tolist( )
iprob= np. exp( - 1 * ( iset- means) ** 2 / ( stds* 2 ) ) / np. sqrt( 2 * np. pi* stds)
iset- means
0 1 2 3 Iris-versicolor 0.264286 0.133333 0.023810 -0.026190 Iris-virginica -0.400000 -0.078049 -1.248780 -0.734146 Iris-setosa 1.229730 -0.483784 2.856757 1.056757
iprob
0 1 2 3 Iris-versicolor 0.718911 1.148286 9.543013e-01 2.063229e+00 Iris-virginica 0.528037 1.269579 4.066561e-02 4.488144e-02 Iris-setosa 0.002051 0.458797 3.406877e-51 3.100002e-19
iprob[ 0 ]
Iris-versicolor 0.718911
Iris-virginica 0.528037
Iris-setosa 0.002051
Name: 0, dtype: float64
for j in range ( x_test. shape[ 0 ] ) :
iset= x_test. iloc[ j, : - 1 ] . tolist( )
iprob= np. exp( - 1 * ( iset- means) ** 2 / ( stds* 2 ) ) / np. sqrt( 2 * np. pi* stds)
prob= 1
for k in range ( x_test. shape[ 1 ] - 1 ) :
prob*= iprob[ k]
prob
Iris-versicolor 1.625391e+00
Iris-virginica 1.223539e-03
Iris-setosa 9.936339e-73
Name: 0, dtype: float64
prob. values
array([1.62539149e+00, 1.22353864e-03, 9.93633857e-73])
prob. index
Index(['Iris-versicolor', 'Iris-virginica', 'Iris-setosa'], dtype='object')
np. argmax( prob. values)
0
result= [ ]
for j in range ( x_test. shape[ 0 ] ) :
iset= x_test. iloc[ j, : - 1 ] . tolist( )
iprob= np. exp( - 1 * ( iset- means) ** 2 / ( stds* 2 ) ) / np. sqrt( 2 * np. pi* stds)
prob= 1
for k in range ( x_test. shape[ 1 ] - 1 ) :
prob*= iprob[ k]
cla= prob. index[ ( np. argmax( prob. values) ) ]
result. append( cla)
x_test[ 'predict' ] = result
acc= ( x_test. iloc[ : , - 1 ] == x_test. iloc[ : , - 2 ] ) . mean( )
print ( "accuracy rate is" , acc)
File "<ipython-input-374-38f3e0c25417>", line 11
acc=(x_test.iloc[:,-1]==x_test.iloc[:,-2]).mean()
^
IndentationError: unexpected indent
result
x_test
cla
def gnb_classify ( x_train, x_test) :
labels= x_train. iloc[ : , - 1 ] . value_counts( )
mean= [ ]
std= [ ]
result= [ ]
for i in labels:
item= x_train. loc[ x_train. iloc[ : , - 1 ] == i, : ]
m= item. iloc[ : , : - 1 ] . mean( )
s= np. sum ( ( item. iloc[ : , : - 1 ] - m) ** 2 ) / item. shape[ 0 ]
mean. append( m)
std. append( s)
means= pd. DataFrame( mean, index= labels)
stds= pd. DataFrame( std, index= labels)
for j in range ( x_test. shape[ 0 ] ) :
iset= x_test. iloc[ j, : - 1 ] . tolist( )
iprob= np. exp( - 1 * ( iset- means) ** 2 / ( stds* 2 ) ) / np. sqrt( 2 * np. pi* stds)
prob= 1
for k in range ( x_test. shape[ 1 ] - 1 ) :
prob*= iprob[ k]
cla= prob. index[ ( np. argmax( prob. values) ) ]
result. append( cla)
x_test[ 'predict' ] = result
return x_test
gnb_classify( x_train, x_test)