mnist数字集识别 xgboost+cnn

最新推荐文章于 2024-06-21 13:22:25 发布

cColdTea

最新推荐文章于 2024-06-21 13:22:25 发布

阅读量2.9k

点赞数 1

分类专栏：机器学习

本文链接：https://blog.csdn.net/emmaczw/article/details/84849195

版权

机器学习专栏收录该内容

26 篇文章 0 订阅

订阅专栏

因为之前想做字母识别，没找到训练集（找到的有点少），所以现在先写了点xgboost与cnn训练mnist训练集。

效果挺好，但是问题是自己有一些数字图片，也需要识别，比如下面，但是效果就很差，后来发现是因为数字不是很居中...后来做了些前期图片预处理，不知道有没有什么方法不用做预处理的。两个方法的代码都放下来了，数据集就是mnist的。

很乱，懒得整理，将就看看吧。反正挺简单的。接下来准备识别下字母...感觉也不难，图片预处理比较重要，因为验证码可能需要切割。接下来整理了一下12306的验证码，也准备做下。

def decode_idx3_ubyte(idx3_ubyte_file):
    """
    解析idx3文件的通用函数
    :param idx3_ubyte_file: idx3文件路径
    :return: 数据集
    """
    # 读取二进制数据
    bin_data = open(idx3_ubyte_file, 'rb').read()

    # 解析文件头信息，依次为魔数、图片数量、每张图片高、每张图片宽
    offset = 0
    fmt_header = '>iiii'
    magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset)
    print('魔数:%d, 图片数量: %d张, 图片大小: %d*%d' % (magic_number, num_images, num_rows, num_cols))

    # 解析数据集
    image_size = num_rows * num_cols
    offset += struct.calcsize(fmt_header)
    fmt_image = '>' + str(image_size) + 'B'
    images = np.empty((num_images, num_rows, num_cols))
    for i in range(num_images):
        if (i + 1) % 10000 == 0:
            print('已解析 %d' % (i + 1) + '张')
        images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows, num_cols))
        offset += struct.calcsize(fmt_image)
    return images

def decode_idx1_ubyte(idx1_ubyte_file):
    """
    解析idx1文件的通用函数
    :param idx1_ubyte_file: idx1文件路径
    :return: 数据集
    """
    # 读取二进制数据
    bin_data = open(idx1_ubyte_file, 'rb').read()

    # 解析文件头信息，依次为魔数和标签数
    offset = 0
    fmt_header = '>ii'
    magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)
    print('魔数:%d, 图片数量: %d张' % (magic_number, num_images))

    # 解析数据集
    offset += struct.calcsize(fmt_header)
    fmt_image = '>B'
    labels = np.empty(num_images)
    for i in range(num_images):
        if (i + 1) % 10000 == 0:
            print('已解析 %d' % (i + 1) + '张')
        labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]
        offset += struct.calcsize(fmt_image)
    return labels

def xgboost_train():
    global test_label
    from sklearn.model_selection import train_test_split

    train_data = pd.read_csv('train_data.csv',index_col=0).astype(int)
    print('csv_file read over')
    train_data = train_data.applymap(lambda x:0 if x<180 else 1)
    y_train = decode_idx1_ubyte(train_label)
    x_train,x_val,y_train,y_val = train_test_split(train_data, pd.DataFrame(y_train),test_size=0.1)

    param_list = [("eta", 0.1), ("max_depth", 5), ("subsample", 0.8),
                  ("colsample_bytree", 0.8), ("objective", "multi:softmax"),
                  ("eval_metric", "merror"), ("alpha", 8), ("lambda", 2),
                  ("num_class", 10), ('silent', False),('n_jobs',-1)]
    n_rounds = 300
    early_stopping = 10

    d_train = xgb.DMatrix(x_train,label=y_train)
    d_val = xgb.DMatrix(x_val,label=y_val)

    watch_list = [(d_train,'train'),(d_val,'val')]
    model = xgb.train(param_list, d_train, n_rounds, evals=watch_list,
                      early_stopping_rounds=early_stopping)

    joblib.dump(model, 'number_rec_2.pkl')

def predict_score():
    model = joblib.load('number_rec_1.pkl')
    test_data = pd.read_csv('test_data.csv', index_col=0).astype(int)
    print('csv_file read over')
    test_data = test_data.applymap(lambda x: 0 if x < 180 else 1)
    y_test = decode_idx1_ubyte(test_label)

    prediction = model.predict(xgb.DMatrix(test_data))
    print(metrics.accuracy_score(y_test, prediction))

def xgboost_predict():
    i = 1
    model = joblib.load('number_rec_1.pkl')
    test = pd.read_csv('test_data.csv', index_col=0)
    img = Image.open('Img/Sample005/img005-{}.png'.format(str(i + 1).zfill(3))).convert('L')
    # img = Image.open('image_test.jpg').convert('L')
    # img.show()
    img_temp = 255 - np.array(img)
    left_temp = np.min(np.where(sum(img_temp) > 0))
    right_temp = np.max(np.where(sum(img_temp) > 0))
    width = max(950, right_temp - left_temp)
    left = int(max(0, left_temp - (width) / 3))
    right = int(min(1200, right_temp + (width) / 3))

    up_temp = np.min(np.where(img_temp.sum(axis=1) > 0))
    down_temp = np.max(np.where(img_temp.sum(axis=1) > 0))
    up = int(max(0, up_temp - (down_temp - up_temp) / 4))
    down = int(min(900, down_temp + (down_temp - up_temp) / 4))

    img = img.crop((left, up, right, down))
    # img = img.crop((200,100,1000,800))
    img = img.resize((28, 28))
    # img.show()
    f = np.vectorize(lambda x: 0 if x > 180 else 1)
    img = f(np.array(img.convert('L')).flatten()).reshape(1, -1)
    img = pd.DataFrame(np.array(img).reshape((1, -1)))
    #     print(np.array(img).reshape((28,28)))
    img.columns = img.columns.astype(str)
    print(model.predict(xgb.DMatrix(img)))

def cnn_train(train=True):


    num_classes = 10

    datas_holder = tf.placeholder(tf.float32, [None, 28, 28, 1])
    labels_holder = tf.placeholder(tf.int32, [None])
    dropout_holder = tf.placeholder(tf.float32)

    conv0 = tf.layers.conv2d(datas_holder, 20, 5, activation=tf.nn.relu)
    pool0 = tf.layers.max_pooling2d(conv0, [2,2], [2,2])

    conv1 = tf.layers.conv2d(pool0, 40, 4, activation=tf.nn.relu)
    pool1 = tf.layers.max_pooling2d(conv1, [2,2], [2,2])

    flatten = tf.contrib.layers.flatten(pool1)

    fc = tf.layers.dense(flatten, 400, activation=tf.nn.relu)

    dropout_fc = tf.layers.dropout(fc, dropout_holder)

    logits = tf.layers.dense(dropout_fc, num_classes)
    predict_labels = tf.arg_max(logits, 1)

    losses = tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(labels_holder, num_classes),
                                                     logits=logits)
    mean_loss = tf.reduce_mean(losses)
    optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(losses)

    saver = tf.train.Saver()

    with tf.Session() as sess:
        if train:
            train_data = decode_idx3_ubyte(train_file)
            train_labels = decode_idx1_ubyte(train_label)

            train_data = np.array(train_data)
            f = np.vectorize(lambda x: 0 if x < 180 else 1)
            train_data = f(train_data)

            print('trian now...')
            sess.run(tf.global_variables_initializer())
            train_feed_dict = {
                datas_holder: train_data.reshape(-1,28,28,1),
                labels_holder: train_labels,
                dropout_holder: 0.25
            }
            for step in range(150):
                _, mean_loss_val = sess.run([optimizer, mean_loss],
                                            feed_dict=train_feed_dict)
                if not step%10:
                    print('step = {}\tmean loss={}'.format(step, mean_loss_val))

            saver.save(sess, 'cnn_model_1')
            print('train is over...')
        else:
            print('test now...')
            saver.restore(sess, 'cnn_model_1')

            def load_img():
                imgs = []
                for i in range(1,11):
                    for j in range(1,6):
                        img = Image.open('Img/Sample{}/img{}-{}.png'.format(str(i).zfill(3),str(i).zfill(3),str(j*5).zfill(3)))\
                            .convert('L')

                        img_temp = 255 - np.array(img)
                        left_temp = np.min(np.where(sum(img_temp) > 0))
                        right_temp = np.max(np.where(sum(img_temp) > 0))
                        width = max(600, right_temp - left_temp)

                        left = int(max(0, left_temp - (width) / 3))
                        right = int(min(1200, right_temp + (width) / 3))

                        up_temp = np.min(np.where(img_temp.sum(axis=1) > 0))
                        down_temp = np.max(np.where(img_temp.sum(axis=1) > 0))
                        up = int(max(0, up_temp - (down_temp - up_temp) / 4))
                        down = int(min(900, down_temp + (down_temp - up_temp) / 4))

                        img = img.crop((left, up, right, down))

                        img = img.resize((28,28))
                        imgs.append(img)
                imgs = [np.array(x) for x in imgs]
                return np.array(imgs)

            test_data = load_img()

            # test_data = Image.open('Img/Sample004/img004-006.jpg').convert('L')

            # test_data.resize((28,28)).show()

            #
            # test_data = test_data.resize((28, 28))
            # test_data.show()

            test_data = np.array(test_data)
            f = np.vectorize(lambda x: 0 if x > 180 else 1)
            test_data = f(test_data)

            # mnist
            # test_data = decode_idx3_ubyte(test_file)
            #
            # test_data = np.array(test_data)[:10]
            # f = np.vectorize(lambda x: 0 if x < 180 else 1)
            # test_data = f(test_data)
            # end
            print(test_data[0])
            # test_data = decode_idx3_ubyte(train_file)[:20]
            # test_labels = decode_idx1_ubyte(train_label)
            # print(test_labels[:10])
            test_feed_dict = {
                datas_holder: test_data.reshape(-1,28,28,1),
                labels_holder: [1],
                dropout_holder: 0
            }
            predicted_labels_val = sess.run(predict_labels,
                                            feed_dict=test_feed_dict)
            print(predicted_labels_val)

def test_real_img():
    model = joblib.load('number_rec_1.pkl')

    test = pd.read_csv('test_data.csv',index_col=0)
    img1 = (test.loc[300,:]).apply(lambda x:1 if x>180 else 0)
    img_temp = img1.astype('uint8')*255
    img1 = pd.DataFrame(img1.reshape(1,-1))
    Image.fromarray(np.array(img_temp).reshape((28,28))).show()
    img1.columns = img1.columns.astype(str)
    print(img1)
    print(model.predict(xgb.DMatrix(img1)))

    img = Image.open('Img/Sample005/img005-003.png').convert('L')
    img = img.resize((28,28))
    img.show()
    img = pd.DataFrame(np.array(img).reshape((1,-1)))
    print(img.any())
    img.fillna(255,inplace=True)
    img.columns = img.columns.astype(int)
    img = img.apply(lambda x:1 if x<180 else 0)
    print(img)
    print(model.predict(xgb.DMatrix(img)))



    # for i in range(2):
    #     for j in range(2):
    #         img = Image.open('Img/Sample{}/img{}-{}.jpg'.format(str(i+1).zfill(3),str(i+1).zfill(3),str(j+1).zfill(3)))
    #         # size = img.size[0]*img.size[1]
    #         img = img.resize((28,28))
    #         img_temp = img
    #         # img.show()
    #         # # img.show()
    #         # img1 = img.crop((0,0,36,36))
    #         # img1.show()
    #         f = np.vectorize(lambda x:0 if x>100 else 1)
    #         img = f(np.array(img.convert('L')).flatten()).reshape(1,-1)
    #         #
    #         img = pd.DataFrame(img)
    #         img.columns = img.columns.astype(str)
    #         # print(img)
    #         prediction = model.predict(xgb.DMatrix(img))
    #         print('predict is: ',int(prediction[0]))
    #         print('real is: ',i)
    #         if i!=prediction:
    #             img_temp.show()