蔚蓝祥和的天空
这个作者很懒,什么都没留下…
展开
-
GMM多元混合高斯样本分布预测男女分类
import numpy as npfrom sklearn.mixture import GaussianMixturefrom sklearn.model_selection import train_test_splitimport matplotlib as mplimport matplotlib.colorsimport matplotlib.pyplot as pltmpl.rcParams['font.sans-serif'] = [u'SimHei']mpl.rcPara.原创 2020-11-09 20:46:57 · 876 阅读 · 0 评论 -
EM算法实现GMM多元混合高斯分布
import numpy as npfrom scipy.stats import multivariate_normalfrom sklearn.mixture import GaussianMixturefrom mpl_toolkits.mplot3d import Axes3Dimport matplotlib as mplimport matplotlib.pyplot as pltfrom sklearn.metrics.pairwise import pairwise_dista.原创 2020-11-09 15:31:35 · 1098 阅读 · 0 评论 -
二分KMmeans
import numpy as npimport matplotlib.pyplot as pltdef loadDataSet(fileName): dataMat = [] with open(fileName) as fr: for line in fr.readlines(): curLine = line.strip().split('\t') fltLine = list(map(float,curLine)).原创 2020-10-28 14:26:53 · 108 阅读 · 0 评论 -
SVM实践_MNIST
import numpy as npfrom sklearn import svmimport matplotlib.colorsimport matplotlib.pyplot as pltfrom PIL import Imagefrom sklearn.metrics import accuracy_scoreimport pandas as pdimport osimport csvfrom sklearn.model_selection import train_test_spl原创 2020-10-26 00:29:07 · 380 阅读 · 0 评论 -
回归树构建
import numpy as np#加载数据def loadDataSet(fileName): dataMat = [] fr = open(fileName) for line in fr.readlines(): curLine = line.strip().split('\t') fltLine = list(map(float,curLine)) dataMat.append(fltLine) return d原创 2020-10-20 11:59:38 · 189 阅读 · 0 评论 -
AdaBoost案例
import numpy as npdef loadSimData(): dataMat = np.mat([[1,2.1],[2,1.1],[1.3,1],[1,1],[2,1]]) classLabels = [1.0,1.0,-1.0,-1.0,1.0] return dataMat,classLabelsdef stumpClassfify(dataMatrix,dimen,thresVal,threshIneq): retArray = np.ones((d原创 2020-10-16 21:50:44 · 226 阅读 · 0 评论 -
AdaBoost机器学习实战
import numpy as npdef loadSimData(): dataMat = np.mat([[1,2.1],[2,1.1],[1.3,1],[1,1],[2,1]]) classLabels = [1.0,1.0,-1.0,-1.0,1.0] return dataMat,classLabelsdef stumpClassfify(dataMatrix,dimen,thresVal,threshIneq): retArray =原创 2020-10-16 20:55:12 · 110 阅读 · 0 评论 -
XGboost推导
XGboost推导原创 2020-10-15 17:10:56 · 91 阅读 · 0 评论 -
决策树--信息熵增益
from math import logimport operatordef createDateset(): dataSet = [["青年", "否", "否", "一般", "否"], ["青年", "否", "否", "好", "否"], ["青年", "是", "否", "好", "是"], ["青年", "是", "是", "一般", "是"], ["青年",原创 2020-10-15 14:14:26 · 115 阅读 · 0 评论 -
GBDT例子
抄的:GBDT原理import numpy as npimport pandas as pdA1 = np.array([0,1,2,3])A2 = np.array([5,7,21,30])A3 = np.array([20,30,70,60])y_label = np.array([1.1,1.3,1.7,1.8])data = {"编号":A1,"年龄":A2,"体重":A3,"身高":y_label}data_used = pd.DataFrame(data)data_used原创 2020-10-14 23:06:35 · 191 阅读 · 0 评论 -
AdaBoost实现
李航老师书上案例import numpy as npimport pandas as pdimport matplotlib.pyplot as pltnp.set_printoptions(precision=5) #e代表错误率e = np.arange(0.001,0.5,0.001)alpha = 1/2 * np.log2((1-e )/e)plt.plot(e,alpha)plt.show()随着分类器错误率上升,其权重系数alpha下降,0.5处等于0(分类器无用)原创 2020-10-13 21:03:28 · 104 阅读 · 0 评论 -
提升树案例
import numpy as npimport pandas as pddef Variance_cal(data,split_point,column,value): """ split_point:分裂点列表 data:DataFrame格式数据 column,分裂特征 value:标签值 """ Variance_list = {} for s in split_point:原创 2020-10-13 16:53:39 · 227 阅读 · 0 评论 -
决策树--鸢尾花
import numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport matplotlib as mplfrom sklearn import treefrom sklearn.tree import DecisionTreeClassifierfrom sklearn.model_selection import train_test_splitfrom sklearn.pipeline import Pipel原创 2020-10-12 19:59:40 · 592 阅读 · 0 评论 -
决策树构造
import operatorfrom math import logdef calcShannonEnt(dataSet): #计算信息熵 numEntries = len(dataSet) #数据总条数 labelCounts = {} #建立字典,对每个分类分别计数 for featVec in dataSet: #读取每条数据 currentLabe原创 2020-10-12 10:51:49 · 86 阅读 · 0 评论 -
决策树--基尼系数实现
def Gini_classification(data,column,label): classfication_feature = data[column].unique() classification_label = data[label].unique() sum1 = len(df) Gini_classification = {} for i in classfication_feature:原创 2020-10-10 19:58:09 · 1415 阅读 · 0 评论