1.
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression as LR
data=pd.read_excel('1.xlsx')
x=data.iloc[:,1:6].values
y=data.iloc[:,6].values
lr=LR()
lr.fit(x,y)
Slr=lr.score(x,y)
c_x=lr.coef_
c_b=lr.intercept_
x1=np.array([4,1.5,10,17,9])
x1=x1.reshape(1,5)
Y=lr.predict(x1)
r1=x1*c_x
Y=r1.sum()+c_x
print('x回归系数为:',c_x)
print('回归系数常数项:',c_b)
print('判定系数:',Slr)
print('样本预测值:',Y)
2.
# -*- coding: utf-8 -*-
import pandas as pd
data = pd.read_excel('2.xlsx')
x_train=data.iloc[:20,1:4]
y_train=data.iloc[:20,4]
x_test=data.iloc[20:,1:4]
from sklearn.linear_model import LogisticRegression as LR
clf = LR()
clf.fit(x_train, y_train)
rv=clf.score(x_train, y_train)
R=clf.predict(x_test)
print('逻辑回归模型拟合准确率: ',rv)
print('逻辑回归模型评估结果: ',R)
from sklearn import svm
clf = svm.SVC(kernel='rbf')
clf.fit(x_train, y_train)
rv=clf.score(x_train, y_train)
R=clf.predict(x_test)
print('支持向量机模型拟合准确率: ',rv)
print('支持向量机评估结果: ',R)
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5,2), random_state=1)
clf.fit(x_train, y_train)
rv=clf.score(x_train, y_train)
R=clf.predict(x_test)
print('神经网络模型拟合准确率: ',rv)
print('神经网络评估结果: ',R)
3.
# -*- coding: utf-8 -*-
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
# 1.数据获取
data = pd.read_excel('3.xlsx')
X=data.iloc[:,1:]
#主成分分析
pca=PCA(n_components=0.90)
pca.fit(X)
f=pca.transform(X) #主成分
tzxl=pca.components_ #特征向量
tz=pca.explained_variance_ #特征值
gxl=pca.explained_variance_ratio_ #贡献率
f=pd.DataFrame(f)
# 标准化
scaler = StandardScaler()
scaler.fit(f)
XZ=scaler.transform(f)
model = KMeans(n_clusters = 4, random_state=0, max_iter = 500) #分为k类,并发数4
model.fit(XZ) #开始聚类
c=model.labels_
Fs=pd.Series(c,index=data['地区'])
Fs=Fs.sort_values(ascending=True)
print(Fs)
4.
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPRegressor as MP
# 1.数据获取
data = pd.read_excel('4.xlsx')
x_train=data.iloc[:,1:4].values
y_train=data.iloc[:,4:6].values
clf = MP(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=8, random_state=1)
clf.fit(x_train, y_train);
a=np.array([[73.39,3.9635,0.9880],[75.55,4.0975,1.0268]]) #预测2010和2011年
y1=clf.predict(a)
y1=pd.DataFrame(y1)
s=[2010,2011]
s=pd.DataFrame(s)
yy=pd.concat([s,y1],axis=1)
yy.columns=['时间','公路客流量','公路货运量']
print(yy)
5.
import pandas as pd
import numpy as np
#第一题
data=pd.read_excel('5.xlsx')
tiem = ['西红柿', '茄子', '水果刀', '香蕉', '袜子', '毛巾', '肥皂', '排骨', '酸奶', '鸡蛋', '鞋子', '土豆']
D = dict()
for t in range(len(tiem)):
z = np.zeros((len(data)))
li = list()
for k in range(len(data.iloc[0, :])):
s = data.iloc[:, k] == tiem[t]
li.extend(list(s[s.values == True].index))
z[li] = 1
D.setdefault(tiem[t], z)
Data = pd.DataFrame(D)
print(Data)
#第二题
c = list(Data.columns)
c0 = 0.4
s0 = 0.2
list1 = []
list2 = []
list3 = []
for k in range(len(c)):
for q in range(len(c)):
if c[k] != c[q]:
c1 = Data[c[k]]
c2 = Data[c[q]]
I1 = c1.values == 1
I2 = c2.values == 1
t12 = np.zeros((len(c1)))
t1 = np.zeros((len(c1)))
t12[I1 & I2] = 1
t1[I1] = 1
sp = sum(t12) / len(c1)
co = sum(t12) / sum(t1)
if co >= c0 and sp >= s0:
list1.append(c[k] + '--' + c[q])
list2.append(sp)
list3.append(co)
R = {'rule': list1, 'support': list2, 'confidence': list3}
R = pd.DataFrame(R)
R.to_excel('导出数据.xlsx')