def classify0(inX, dataSet, labels, k): 输入向量 训练样本集 标签向量 邻居数目
dataSetSize = dataSet.shape[0]
diffMat = tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
return sortedClassCount[0][0]
-------------------------------------------------------------------------------------------------------------------------------------------------------------
.shape[0] 返回行数 .shape[1] 返回列数
.tile(x,(y,z)) 把x扩充y次(行),z次(列)
y**x 对y求x方
.sum(axis=0) 列求和 .sum(axis=1)行求和
.argsort() 数组从小到大得索引值
.itemgetter(x) 获取第几个值 编号从0开始
sorted(iterable[,cmp,[,key[,reverse=True]]])
作用:Return a new sorted list from the items in iterable.
第一个参数是一个iterable,返回值是一个对iterable中元素进行排序后的列表(list)。
可选的参数有三个,cmp、key和reverse。
1)cmp指定一个定制的比较函数,这个函数接收两个参数(iterable的元素),如果第一个参数小于第二个参数,返回一个负数;如果第一个参数等于第二个参数,返回零;如果第一个参数大于第二个参数,返回一个正数。默认值为None。
2)key指定一个接收一个参数的函数,这个函数用于从每个元素中提取一个用于比较的关键字。默认值为None。
3)reverse是一个布尔值。如果设置为True,列表元素将被倒序排列。
通常来说,key和reverse比一个等价的cmp函数处理速度要快。这是因为对于每个列表元素,cmp都会被调用多次,而key和reverse只被调用一次。
初次研究机器学习,留点东西以备理解,希望能用到实际开发中去,感觉非常有用。
随便做了一个C#的例子,识别效果还不错,样本才10来个,标签A,B,C 居然k值内100%准确率
using System;
using System.Windows.Forms;
using System.Runtime.InteropServices;
using System.IO;
using System.Drawing;
using System.Text;
using System.Collections.Generic;
namespace KNN
{
public partial class Form1 : Form
{
[DllImport("user32")]
private static extern IntPtr LoadCursorFromFile(string fileName);
private Point startPoint, endPoint;
private static bool isDrawing = false;
private byte[] dataSet = new byte[1024];
private Graphics g = null;
private Bitmap bitmapResult;
private List<DataSetFile> listDataSetFile = new List<DataSetFile>();
private int k = 5;
public Form1()
{
InitializeComponent();
bitmapResult = new Bitmap(this.pictureBox1.Width, this.pictureBox1.Height);
}
private void Form1_Resize(object sender, EventArgs e)
{
this.Width = 800;
this.Height = 600;
}
private void pictureBox1_MouseDown(object sender, MouseEventArgs e)
{
startPoint = new Point(e.X, e.Y);
endPoint = new Point(e.X, e.Y);
isDrawing = true;
}
private void pictureBox1_MouseUp(object sender, MouseEventArgs e)
{
isDrawing = false;
}
private void pictureBox1_MouseMove(object sender, MouseEventArgs e)
{
if (!new FileInfo("cursor.dat").Exists)
{
byte[] cursorbuffer = Properties.Resources.pen;
FileStream fileStream = new FileStream("cursor.dat", FileMode.Create);
fileStream.Write(cursorbuffer, 0, cursorbuffer.Length);
fileStream.Close();
}
Cursor.Current = new Cursor(LoadCursorFromFile("cursor.dat"));
if (e.Button == MouseButtons.Left)
{
if (isDrawing)
{
g = Graphics.FromImage(bitmapResult);
Point currentPoint = new Point(e.X, e.Y);
g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.AntiAlias;
g.DrawLine(new Pen(Color.Black, 10), endPoint, currentPoint);
endPoint.X = currentPoint.X;
endPoint.Y = currentPoint.Y;
this.pictureBox1.Image = bitmapResult;
}
}
}
private void button2_Click(object sender, EventArgs e) //清空训练区
{
bitmapResult = new Bitmap(this.pictureBox1.Width, this.pictureBox1.Height);
g = Graphics.FromImage(bitmapResult);
this.pictureBox1.Image = bitmapResult;
}
private void button1_Click(object sender, EventArgs e) //转换并存储训练数据
{
if (this.pictureBox1.Image != null && textBox1.Text.Trim().Length == 1 && "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".Contains(textBox1.Text.Trim()))
{
Bitmap map = new Bitmap(this.pictureBox1.Image);
int m = 0;
for (int j = 0; j < pictureBox1.Height; j = j + 10)
{
for (int i = 0; i < pictureBox1.Width; i = i + 10)
{
Color c = map.GetPixel(i, j);
if (c.Name == "0")
dataSet[m] = 0x30;
else
dataSet[m] = 0x31;
m++;
}
}
DirectoryInfo di = new DirectoryInfo("dataSet"); //1*1024矩阵结构
if (!di.Exists)
{
di.Create();
}
di = new DirectoryInfo("dataSetV"); //32*32矩阵结构
if (!di.Exists)
{
di.Create();
}
string randFileName = textBox1.Text.Trim() + "_" + DateTime.Now.ToString("yyyyMMddHHmmss") + ".txt";
FileStream fs = new FileStream(".\\dataSet\\" + randFileName, FileMode.Create);
fs.Write(dataSet, 0, 1024);
fs.Flush();
fs.Close();
fs = new FileStream(".\\dataSetV\\" + randFileName, FileMode.Create);
StreamWriter sw = new StreamWriter(fs);
string strDataSet = Encoding.Default.GetString(dataSet, 0, 1024);
for (int i = 0; i < 32; i++)
{
sw.WriteLine(strDataSet.Substring(i * 32, 32));
}
sw.Close();
fs.Close();
}
}
private void button3_Click(object sender, EventArgs e) //开始识别
{
if (textBox2.Text.Trim() != "")
int.TryParse(textBox2.Text.Trim(), out k);
label3.Text = "识别结果:";
if (this.pictureBox1.Image != null && listDataSetFile.Count > 0)
{
Bitmap map = new Bitmap(this.pictureBox1.Image);
int m = 0;
for (int j = 0; j < pictureBox1.Height; j = j + 10)
{
for (int i = 0; i < pictureBox1.Width; i = i + 10)
{
Color c = map.GetPixel(i, j);
if (c.Name == "0")
dataSet[m] = 0x30;
else
dataSet[m] = 0x31;
m++;
}
}
foreach (DataSetFile i in listDataSetFile)
{
double value = 0;
for (int j = 0; j < 1024; j++)
{
value += Math.Pow((Convert.ToDouble(dataSet[j]) - Convert.ToDouble(i.Content[j])), 2);
}
i.Value = Math.Sqrt(value);
}
listDataSetFile.Sort(CompareByValue);
for (int i = 0; i < k; i++)
{
label3.Text += listDataSetFile[i].Label;
}
}
}
private void button4_Click(object sender, EventArgs e) // 加载样本
{
DirectoryInfo di = new DirectoryInfo("dataSet");
FileInfo[] fi = di.GetFiles("*.txt");
foreach (FileInfo i in fi)
{
DataSetFile dsf = new DataSetFile();
dsf.Path = i.FullName;
dsf.Label = i.Name.Substring(0, 1);
byte[] byteContent = new byte[1024];
FileStream fs = new FileStream(dsf.Path, FileMode.Open);
{
fs.Read(byteContent, 0, 1024);
}
dsf.Content = byteContent;
listDataSetFile.Add(dsf);
}
label4.Text = "加载样本完毕";
}
private int CompareByValue(DataSetFile x, DataSetFile y)
{
int returnVal = x.Value.CompareTo(y.Value);
return returnVal;
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace KNN
{
class DataSetFile
{
double value;
public double Value
{
get { return this.value; }
set { this.value = value; }
}
string path;
public string Path
{
get { return path; }
set { path = value; }
}
string label;
public string Label
{
get { return label; }
set { label = value; }
}
byte[] content;
public byte[] Content
{
get { return content; }
set { content = value; }
}
}
}