<Machine Learning in Action >之一 k-近邻算法 C#实现手写识别

def classify0(inX, dataSet, labels, k):  输入向量 训练样本集  标签向量  邻居数目
    dataSetSize = dataSet.shape[0]
    diffMat = tile(inX, (dataSetSize,1)) - dataSet
    sqDiffMat = diffMat**2
    sqDistances = sqDiffMat.sum(axis=1)
    distances = sqDistances**0.5
    sortedDistIndicies = distances.argsort()     
    classCount={}          
    for i in range(k):
        voteIlabel = labels[sortedDistIndicies[i]]
        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
    sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCount[0][0]

-------------------------------------------------------------------------------------------------------------------------------------------------------------

.shape[0] 返回行数   .shape[1]  返回列数
.tile(x,(y,z)) 把x扩充y次(行),z次(列)
y**x  对y求x方
.sum(axis=0)  列求和   .sum(axis=1)行求和
.argsort() 数组从小到大得索引值
.itemgetter(x) 获取第几个值 编号从0开始
sorted(iterable[,cmp,[,key[,reverse=True]]])
作用:Return a new sorted list from the items in iterable.
          第一个参数是一个iterable,返回值是一个对iterable中元素进行排序后的列表(list)。
可选的参数有三个,cmp、key和reverse。
1)cmp指定一个定制的比较函数,这个函数接收两个参数(iterable的元素),如果第一个参数小于第二个参数,返回一个负数;如果第一个参数等于第二个参数,返回零;如果第一个参数大于第二个参数,返回一个正数。默认值为None。
2)key指定一个接收一个参数的函数,这个函数用于从每个元素中提取一个用于比较的关键字。默认值为None。
3)reverse是一个布尔值。如果设置为True,列表元素将被倒序排列。
通常来说,key和reverse比一个等价的cmp函数处理速度要快。这是因为对于每个列表元素,cmp都会被调用多次,而key和reverse只被调用一次。


初次研究机器学习,留点东西以备理解,希望能用到实际开发中去,感觉非常有用。
随便做了一个C#的例子,识别效果还不错,样本才10来个,标签A,B,C 居然k值内100%准确率


using System;
using System.Windows.Forms;
using System.Runtime.InteropServices;
using System.IO;
using System.Drawing;
using System.Text;
using System.Collections.Generic;

namespace KNN
{
    public partial class Form1 : Form
    {
        [DllImport("user32")]
        private static extern IntPtr LoadCursorFromFile(string fileName);

        private Point startPoint, endPoint;
        private static bool isDrawing = false;
        private byte[] dataSet = new byte[1024];
        private Graphics g = null;
        private Bitmap bitmapResult;
        private List<DataSetFile> listDataSetFile = new List<DataSetFile>();
        private int k = 5;

        public Form1()
        {
            InitializeComponent();
            bitmapResult = new Bitmap(this.pictureBox1.Width, this.pictureBox1.Height);
        }

        private void Form1_Resize(object sender, EventArgs e)
        {
            this.Width = 800;
            this.Height = 600;
        }

        private void pictureBox1_MouseDown(object sender, MouseEventArgs e)
        {
            startPoint = new Point(e.X, e.Y);
            endPoint = new Point(e.X, e.Y);
            isDrawing = true;
        }

        private void pictureBox1_MouseUp(object sender, MouseEventArgs e)
        {
            isDrawing = false;
        }

        private void pictureBox1_MouseMove(object sender, MouseEventArgs e)
        {
            if (!new FileInfo("cursor.dat").Exists)
            {
                byte[] cursorbuffer = Properties.Resources.pen;
                FileStream fileStream = new FileStream("cursor.dat", FileMode.Create);
                fileStream.Write(cursorbuffer, 0, cursorbuffer.Length);
                fileStream.Close();
            }
            Cursor.Current = new Cursor(LoadCursorFromFile("cursor.dat"));

            if (e.Button == MouseButtons.Left)
            {
                if (isDrawing)
                {
                    g = Graphics.FromImage(bitmapResult);
                    Point currentPoint = new Point(e.X, e.Y);
                    g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.AntiAlias;
                    g.DrawLine(new Pen(Color.Black, 10), endPoint, currentPoint);
                    endPoint.X = currentPoint.X;
                    endPoint.Y = currentPoint.Y;
                    this.pictureBox1.Image = bitmapResult;
                }
            }
        }

        private void button2_Click(object sender, EventArgs e)  //清空训练区
        {
            bitmapResult = new Bitmap(this.pictureBox1.Width, this.pictureBox1.Height);
            g = Graphics.FromImage(bitmapResult);
            this.pictureBox1.Image = bitmapResult;
        }

        private void button1_Click(object sender, EventArgs e)  //转换并存储训练数据
        {
            if (this.pictureBox1.Image != null && textBox1.Text.Trim().Length == 1 && "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".Contains(textBox1.Text.Trim()))
            {
                Bitmap map = new Bitmap(this.pictureBox1.Image);
                int m = 0;
                for (int j = 0; j < pictureBox1.Height; j = j + 10)
                {
                    for (int i = 0; i < pictureBox1.Width; i = i + 10)
                    {
                        Color c = map.GetPixel(i, j);
                        if (c.Name == "0")
                            dataSet[m] = 0x30;
                        else
                            dataSet[m] = 0x31;
                        m++;
                    }
                }

                DirectoryInfo di = new DirectoryInfo("dataSet"); //1*1024矩阵结构
                if (!di.Exists)
                {
                    di.Create();
                }
                di = new DirectoryInfo("dataSetV");  //32*32矩阵结构
                if (!di.Exists)
                {
                    di.Create();
                }
                string randFileName = textBox1.Text.Trim() + "_" + DateTime.Now.ToString("yyyyMMddHHmmss") + ".txt";
                FileStream fs = new FileStream(".\\dataSet\\" + randFileName, FileMode.Create);
                fs.Write(dataSet, 0, 1024);
                fs.Flush();
                fs.Close();
                fs = new FileStream(".\\dataSetV\\" + randFileName, FileMode.Create);
                StreamWriter sw = new StreamWriter(fs);
                string strDataSet = Encoding.Default.GetString(dataSet, 0, 1024);
                for (int i = 0; i < 32; i++)
                {
                    sw.WriteLine(strDataSet.Substring(i * 32, 32));
                }
                sw.Close();
                fs.Close();
            }
        }

        private void button3_Click(object sender, EventArgs e)  //开始识别
        {
            if (textBox2.Text.Trim() != "")
                int.TryParse(textBox2.Text.Trim(), out k);
            label3.Text = "识别结果:";
            if (this.pictureBox1.Image != null && listDataSetFile.Count > 0)
            {
                Bitmap map = new Bitmap(this.pictureBox1.Image);
                int m = 0;
                for (int j = 0; j < pictureBox1.Height; j = j + 10)
                {
                    for (int i = 0; i < pictureBox1.Width; i = i + 10)
                    {
                        Color c = map.GetPixel(i, j);
                        if (c.Name == "0")
                            dataSet[m] = 0x30;
                        else
                            dataSet[m] = 0x31;
                        m++;
                    }
                }

                foreach (DataSetFile i in listDataSetFile)
                {
                    double value = 0;
                    for (int j = 0; j < 1024; j++)
                    {
                        value += Math.Pow((Convert.ToDouble(dataSet[j]) - Convert.ToDouble(i.Content[j])), 2);
                    }
                    i.Value = Math.Sqrt(value);
                }

                listDataSetFile.Sort(CompareByValue);

                for (int i = 0; i < k; i++)
                {
                    label3.Text += listDataSetFile[i].Label;
                }
            }
        }

        private void button4_Click(object sender, EventArgs e)   //  加载样本
        {
            DirectoryInfo di = new DirectoryInfo("dataSet");
            FileInfo[] fi = di.GetFiles("*.txt");
            foreach (FileInfo i in fi)
            {
                DataSetFile dsf = new DataSetFile();
                dsf.Path = i.FullName;
                dsf.Label = i.Name.Substring(0, 1);
                byte[] byteContent = new byte[1024];
                FileStream fs = new FileStream(dsf.Path, FileMode.Open);
                {
                    fs.Read(byteContent, 0, 1024);
                }
                dsf.Content = byteContent;
                listDataSetFile.Add(dsf);
            }
            label4.Text = "加载样本完毕";
        }

        private int CompareByValue(DataSetFile x, DataSetFile y)
        {
            int returnVal = x.Value.CompareTo(y.Value);
            return returnVal;
        }
    }
}

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace KNN
{
    class DataSetFile
    {
        double value;

        public double Value
        {
            get { return this.value; }
            set { this.value = value; }
        }
        string path;

        public string Path
        {
            get { return path; }
            set { path = value; }
        }
        string label;

        public string Label
        {
            get { return label; }
            set { label = value; }
        }

        byte[] content;

        public byte[] Content
        {
            get { return content; }
            set { content = value; }
        }
    }
}


  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值