




# -*- coding: utf-8 -*-  
Created on 2017年3月18日

@author: soso
from numpy import *
import operator

def createDataSet():
    group = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])
    labels = ['A', 'A', 'B', 'B']
    return group, labels

def classify0(inX, dataSet, labels, k):
    dataSetSize = dataSet.shape[0]
    # 函数形式: tile(A,rep)
    # 功能:重复A的各个维度
    # 参数类型:
    # - A: Array类的都可以
    # - rep:A沿着各个维度重复的次数
    diffMat = tile(inX, (dataSetSize, 1)) - dataSet
    sqDiffMat = diffMat ** 2
    # 当加入axis=1以后就是将一个矩阵的每一行向量相加
    sqDistances = sqDiffMat.sum(axis=1)
    distance = sqDistances * 0.5
    # argsort函数返回的是数组值从小到大的索引值
    sortedDistIndicies = distance.argsort()
    classCount = {}
    for i in range(k):
        votelabel = labels[sortedDistIndicies[i]]
        classCount[votelabel] = classCount.get(votelabel, 0) + 1
    sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCount[0][0]


import scala.collection.mutable.Map

object kNN {

  def getGroup(): Array[Array[Double]] = {
    return Array(Array(1.0, 1.1), Array(1.0, 1.0), Array(0, 0), Array(0, 0.1))
  def getLabels(): Array[Char] = {
    return Array('A', 'A', 'B', 'B')

  def classify0(inX: Array[Double], dataSet: Array[Array[Double]], labels: Array[Char], k: Int): Char = {
    val dataSetSize = dataSet.length
    val sortedDisIndicies = dataSet.map { x =>
      val v1 = x(0) - inX(0)
      val v2 = x(1) - inX(1)
      v1 * v1 + v2 * v2
    }.zipWithIndex.sortBy(f => f._1).map(f => f._2)
    var classsCount: Map[Char, Int] = Map.empty
    for (i <- 0 to k - 1) {
      val voteIlabel = labels(sortedDisIndicies(i))
      classsCount(voteIlabel) = classsCount.getOrElse(voteIlabel, 0) + 1
    classsCount.toArray.sortBy(f => -f._2).head._1
  def main(args: Array[String]) {
    println(classify0(Array(0, 0), getGroup(), getLabels(), 3))


以下是使用Scala语言实现逻辑回归的Newton-Raphson算法的示例代码: ``` import breeze.linalg.{DenseMatrix, DenseVector} import breeze.numerics.{exp, log} import scala.annotation.tailrec object LogisticRegression { /** * Compute the sigmoid function * * @param z input value * @return sigmoid value */ def sigmoid(z: Double): Double = { 1.0 / (1.0 + exp(-z)) } /** * Compute the gradient of the log-likelihood function * * @param X design matrix * @param y target variable * @param weights current weights * @return gradient vector */ def gradient(X: DenseMatrix[Double], y: DenseVector[Double], weights: DenseVector[Double]): DenseVector[Double] = { val activation = sigmoid(X * weights) X.t * (activation - y) } /** * Compute the Hessian matrix of the log-likelihood function * * @param X design matrix * @param weights current weights * @return Hessian matrix */ def hessian(X: DenseMatrix[Double], weights: DenseVector[Double]): DenseMatrix[Double] = { val activation = sigmoid(X * weights) val diagonal = activation *:* (1.0 - activation) X.t * (X(::, *) * diagonal) } /** * Compute the log-likelihood function * * @param X design matrix * @param y target variable * @param weights current weights * @return log-likelihood value */ def logLikelihood(X: DenseMatrix[Double], y: DenseVector[Double], weights: DenseVector[Double]): Double = { val activation = sigmoid(X * weights) val epsilon = 1e-16 val clippedActivation = activation.map(a => math.max(a, epsilon)).map(a => math.min(a, 1.0 - epsilon)) val logActivation = log(clippedActivation) val logOneMinusActivation = log(1.0 - clippedActivation) val logLikelihood = y.t * logActivation + (1.0 - y).t * logOneMinusActivation -logLikelihood } /** * Train a logistic regression model using Newton-Raphson algorithm * * @param X design matrix * @param y target variable * @param maxIterations maximum number of iterations * @param tolerance convergence tolerance * @return weights vector */ def train(X: DenseMatrix[Double], y: DenseVector[Double], maxIterations: Int = 100, tolerance: Double = 1e-6): DenseVector[Double] = { val numFeatures = X.cols val weights = DenseVector.zeros[Double](numFeatures) @tailrec def loop(iteration: Int): DenseVector[Double] = { val grad = gradient(X, y, weights) val hess = hessian(X, weights) val delta = hess \ grad weights -= delta val llh = logLikelihood(X, y, weights) val improvement = llh - logLikelihood(X, y, weights + delta) if (iteration >= maxIterations || improvement < tolerance) { weights } else { loop(iteration + 1) } } loop(0) } } ``` 该示例代码定义了sigmoid函数、梯度函数、Hessian矩阵函数、对数似然函数和训练函数。在训练函数中,使用了尾递归进行迭代,直到满足最大迭代次数或收敛容差的条件为止。最终,训练函数返回权重向量作为模型的输出。




