#DATASET: https://archive.ics.uci.edu/ml/datasets/Computer+Hardware
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
import sklearn.preprocessing as pre
df=pd.read_csv('data\machinei\machine.data')
df.head()
| vendor | name | MYCT | MMIN | MMAX | CACH | CHMIN | CHMAX | PRP | ERP |
---|
0 | adviser | 32/60 | 125 | 256 | 6000 | 256 | 16 | 128 | 198 | 199 |
1 | amdahl | 470v/7 | 29 | 8000 | 32000 | 32 | 8 | 32 | 269 | 253 |
2 | amdahl | 470v/7a | 29 | 8000 | 32000 | 32 | 8 | 32 | 220 | 253 |
3 | amdahl | 470v/7b | 29 | 8000 | 32000 | 32 | 8 | 32 | 172 | 253 |
4 | amdahl | 470v/7c | 29 | 8000 | 16000 | 32 | 8 | 16 | 132 | 132 |
#convert string to num
#X includes the first 8 attributes, Y is the target prediction
x1=pre.LabelEncoder().fit_transform(df.iloc[:,0])
x2=pre.LabelEncoder().fit_transform(df.iloc[:,1])
X=np.zeros((df.iloc[:,:-2].shape[0],8))
X[:,0]=x1
X[:,1]=x2
X[:,2:]=df.iloc[:,2:-2]
Y=df.iloc[:,-2] #goal
#convert string to num
#X includes the first 8 attributes, Y is the target prediction
x1=pre.LabelEncoder().fit_transform(df.iloc[:,0])
x2=pre.LabelEncoder().fit_transform(df.iloc[:,1])
X=np.zeros((df.iloc[:,:-2].shape[0],8))
X[:,0]=x1
X[:,1]=x2
X[:,2:]=df.iloc[:,2:-2]
Y=df.iloc[:,-2] #goal
#compute loss, train parameters
count=0
while True:
count+=1
predict_y=np.dot(X,W.T)+b
rloss=np.dot((Y-predict_y).T,Y-predict_y)/(2*m)
w_gradient=np.dot((Y-predict_y).T,X)*(-1.0/m)
b_gradient=np.dot((Y-predict_y).T,np.ones(shape=[m,]))*(-1.0/m)
#gradient descent
W-=w_gradient*alpha
b-=b_gradient*alpha
if count%10000==0:
print(rloss)
if rloss<2500:
print('MSEloss={}'.format(rloss))
break
MSEloss=2456.133718645051
array([-0.02520069, -0.17961688, -0.00338619, 0.01356295, 0.00726707,
0.16543731, 0.01494539, 0.13264917])