大数据挖掘技术代码

第一章

1.(1)t1= (1,2,'R','py','Matlab')

list1=[]

(2)i=0

while i<len(t1):

    list1.append(t1[i])

    i=i+1

list1

(3)dict1={}

(4) Li=['k',[3,4,5],(1,2,6),18,50]

Li2 = ['a','b','c','d','e']

for index,value in zip(Li2,Li):

           dict1.setdefault(index,value)

print(dict1)

2.

(1)

def comput(r, h):

    import math

    #表面积

    s = 2 * math.pi * r * (r + h)

    #体积

    v = math.pi * r ** 2 * h

    return (s,v)

(2)

d=comput(10,11)

s=d[0]

v=d[1]

print('半径为10,高为11的圆柱体表面积:',s)

print('半径为10,高为11的圆柱体体积:',v)

第二章

1.

import numpy as np

(1)

list1 = [1, 2, 4, 6, 7, 8]

N1 = np.array(list1)

print(N1)

(2)

tup1 = (1, 2, 3, 4, 5, 6)

N2 = np.array(tup1)

print(N2)

(3)

N3 = np.array([1, 1, 1, 1, 1, 1])

print(N3)

(4)

N4 = np.vstack((N1, N2, N3))

print(N4)

(5)

np.save('data', N4)

np.load('data.npy')

2.

(1)

import numpy as np

import test1

print(test1.N4)

(2)

N5 = np.array([test1.N4[0][1], test1.N4[0][3], test1.N4[2][0], test1.N4[2][4]])

print(N5)

(3)

N6 = np.hstack((N5,test1.N1))

print(N6)

3.

(1)

import numpy as np

arr1=np.matrix([[1,5],[5,6]])

arr2=np.matrix([[6,5],[8,2]])

arr3=arr1*arr2

print(arr3),

(2)

A=np.mat("3,-1;-1,3")

print(A)

A_value,A_vecotr = np.linalg.eig(A)

print(A_value,A_vecotr)

(3)

B=np.mat("4,11,14;8,7,-2")

print(B)

C=np.linalg.svd(B, full_matrices=False)

print(C)

(4)

D=np.mat("4,6,8;4,6,9;5,6,8")

print(D)

E=D.T

print(E)

x=np.linalg.det(D)

print(x)

y=np.linalg.det(E)

print(y)

第三章

1.import pandas as pd

import numpy as np

(1)pd = pd.read_table('test1.txt', sep=',')

print(pd)

(2)pd1 = pd.iloc[0:3]

print(pd1)

pd2 = pd.iloc[3:6]

print(pd2)

pd3 = pd.iloc[6:9]

print(pd3)

pd4 = pd.iloc[9:12]

print(pd4)

(3)M1 = pd1.mean(numeric_only=True)

print(M1)

M2 = pd2.mean(numeric_only=True)

print(M2)

M3 = pd3.mean(numeric_only=True)

print(M3)

M4 = pd4.mean(numeric_only=True)

print(M4)

2.import pandas as pd

import numpy as np

(1)df = pd.read_excel('test2.xlsx')

print(df)

(2)df1 = df.iloc[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], [2, 3]]

print(df1)

(3)Nt = np.array(df1)

print(Nt)

df2 = pd.read_excel('test2.xlsx', dtype=str)

index1 = df2['交易日期'].values >= '2017-01-05'

index2 = df2['交易日期'].values <= '2017-01-16'

TF = index1 & index2

print(TF)

(4)S = sum(Nt[TF, 1])

print(S)

第四章

(1)

import numpy as np

import pandas as pd

import pylab as plt

import matplotlib.pyplot as plt

df=pd.read_excel('test1.xlsx')

print(df)

(2)

A = df.iloc[0:10]

print(A)

B = df.iloc[0:10,2]

print(B)

C = df.iloc[0:10,1]

print(C)

plt.plot(B)

#牛肉1-10日价格走势图

plt.plot(C)

#猪肉1-10日价格走势图

(3)

D = df.iloc[0:15,2]

print(D)

E = df.iloc[0:15,1]

print(E)

plt.figure()

plt.plot(D)

plt.plot(E,color='red',linestyle='--')

plt.show()

#同一个figure界面中猪肉与牛肉的前半个月价格走势

第五章

1. import numpy as np

import pandas as pd

data=pd.read_excel('1.xlsx')

x=data.iloc[:,1:6].values

y=data.iloc[:,6].values

from sklearn.linear_model import LinearRegression as LR

lr=LR()

lr.fit(x,y)

Slr=lr.score(x,y)

c_x=lr.coef_

c_b=lr.intercept_

x1=np.array([4,1.5,10,17,9])

x1=x1.reshape(1,5)

R1=lr.predict(x1)

r1=x1*c_x

R2=r1.sum()+c_x

print('x回归系数为:',c_x)

print('回归系数常数项:',c_b)

print('判定系数:',Slr)

print('样本预测值:',R1)

2. import pandas as pd

from sklearn.linear_model import LogisticRegression as LR 

data = pd.read_excel('2.xlsx')

x_train=data.iloc[:20,1:4]

y_train=data.iloc[:20,4]

x_test=data.iloc[20:,1:4]

clf = LR()

clf.fit(x_train,y_train)

rv=clf.score(x_train,y_train)

rv=clf.score(x_train,y_train)

R=clf.predict(x_test)

print(rv,R)

3. import pandas as pd

from sklearn.preprocessing import StandardScaler

from sklearn.cluster import KMeans

from sklearn.decomposition import PCA

data = pd.read_excel('3.xlsx')

X = data.iloc[:, 1:]

pca = PCA(n_components=0.90)

pca.fit(X)

f = pca.transform(X)  

f = pd.DataFrame(f)

scaler = StandardScaler()

scaler.fit(f)

XZ = scaler.transform(f)

model = KMeans(n_clusters=4, random_state=0, max_iter=500)

model.fit(XZ)

c = model.labels_

Fs = pd.Series(c, index=data['地区'])

Fs = Fs.sort_values(ascending=True)

print(Fs)

4. import pandas as pd

import numpy as np

from sklearn.neural_network import MLPRegressor as MP

data = pd.read_excel('4.xlsx')

x_train = data.iloc[:, 1:4]

y_train = data.iloc[:, 4:6]

clf = MP(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=8, random_state=1)

clf.fit(x_train, y_train);

a = np.array([[73.39, 3.9635, 0.9880], [75.55, 4.0975, 1.0268]])

y1 = clf.predict(a)

print(y1)

5. import pandas as pd

import numpy as np

tiem = ['西红柿', '排骨', '鸡蛋', '毛巾', '水果刀', '苹果', '茄子', '香蕉', '袜子', '肥皂', '酸奶', '土豆', '鞋子']

data = pd.read_excel('5.xlsx', header=None)

data = data.iloc[:, 1:]

D = dict()

for t in range(len(tiem)):

    z = np.zeros((len(data)))

    li = list()

    for k in range(len(data.iloc[0, :])):

        s = data.iloc[:, k] == tiem[t]

        li.extend(list(s[s.values == True].index))

        z[li] = 1

        D.setdefault(tiem[t], z)

        Data = pd.DataFrame(D)  

c = list(Data.columns)

c0 = 0.7  

s0 = 0.4

list1 = []

list2 = []  

list3 = []  

for k in range(len(c)):

    for q in range(len(c)):

        if c[k] != c[q]:

            c1 = Data[c[k]]

            c2 = Data[c[q]]

            I1 = c1.values == 1

            I2 = c2.values == 1

            t12 = np.zeros((len(c1)))

            t1 = np.zeros((len(c1)))

            t12[I1 & I2] = 1

            t1[I1] = 1

            sp = sum(t12) / len(c1)  

            co = sum(t12) / sum(t1)           

         if co >= c0 and sp >= s0:

             list1.append(c[k] + '--' + c[q])

              list2.append(sp)

              list3.append(co)

    R = {'rule': list1, 'support': list2, 'confidence': list3}

R = pd.DataFrame(R)

print(R)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值