C++ sqlite3解决中文排序问题

beibeix2015

于 2021-11-04 00:09:53 发布

阅读量1.2k

点赞数

分类专栏： C++ 文章标签： c++ sqlite 开发语言

本文链接：https://blog.csdn.net/beibeix2015/article/details/121133540

版权

C++ 专栏收录该内容

85 篇文章 1 订阅

订阅专栏

导言：sqlite3默认的编码方式为UTF8编码，而在UTF8编码下，中文不是按照拼音顺序编码的，所以想解决中文排序问题，必须自定义排序规则，将UTF8编码转换成GB2312编码（GB2312编码中文是按照拼音顺序编码的），然后再进行比较大小，就可以得到正确的排序结果了。

#include "sqlite3.h"
#include <Windows.h>

#include <string>
#include <vector>
using std::vector;
using std::string;
#pragma comment(lib, "sqlite3.lib")
//UTF-8到GB2312的转换
void U2G(const char* utf8, string& gb2312)
{
    int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
    wchar_t* wstr = new wchar_t[len+1];
    memset(wstr, 0, len+1);
    MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
    len = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL);
    char* str = new char[len+1];
    memset(str, 0, len+1);
    WideCharToMultiByte(CP_ACP, 0, wstr, -1, str, len, NULL, NULL);
    if(wstr) delete[] wstr;
    gb2312 = str;
    delete[] str;
}
 
//GB2312到UTF-8的转换
void G2U(const char* gb2312, string& utf8)
{
    int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0);
    wchar_t* wstr = new wchar_t[len+1];
    memset(wstr, 0, len+1);
    MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len);
    len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
    char* str = new char[len+1];
    memset(str, 0, len+1);
    WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, len, NULL, NULL);
    if(wstr) delete[] wstr;
    utf8 = str;
    delete[] str;
}

int chinese_cmp(void *NotUsed, int nKey1, const void *pKey1, int nKey2, const void *pKey2)
{
    //int n = nKey1 < nKey1 ? nKey1 : nKey2;
    string key1 = "";
    string key2 = "";
    U2G((char*)pKey1,key1);
    U2G((char*)pKey2,key2);
    //return strncmp(key1.c_str(), key2.c_str(), n + 1);
    return strcmp(key1.c_str(), key2.c_str());
}

void main()
{
    sqlite3* conn;
    int ret = sqlite3_open("C:\\Users\\Administrator\\Desktop\\testsort.db", &conn);
    sqlite3_create_collation(conn, "Chinese", SQLITE_UTF8, 0, chinese_cmp);

    string sql = "";
    G2U("select * from mzhrd order by xm collate Chinese", sql);
    sqlite3_stmt *stmt;
    ret = sqlite3_prepare_v2(conn, sql.c_str(), sql.length(), &stmt,0);

    while(sqlite3_step(stmt) != SQLITE_DONE)
    {
        string data = "";
        U2G((char*)sqlite3_column_text(stmt,0),data);
        int afsd =1;
    }
    int dsaf = 1;
}

SQL实现模糊查询

在进行数据库查询时，有完整查询和模糊查询之分。

一般模糊查询语句如下：

SELECT 字段 FROM 表 WHERE 某字段 Like 条件

其中关于条件，SQL提供了四种匹配模式：

1，% ：表示任意0个或多个字符。可匹配任意类型和长度的字符，有些情况下若是中文，请使用两个百分号（%%）表示。

比如 SELECT * FROM [user] WHERE u_name LIKE '%三%'

将会把u_name为“张三”，“张猫三”、“三脚猫”，“唐三藏”等等有“三”的记录全找出来。

另外，如果需要找出u_name中既有“三”又有“猫”的记录，请使用and条件
SELECT * FROM [user] WHERE u_name LIKE '%三%' AND u_name LIKE '%猫%'

若使用 SELECT * FROM [user] WHERE u_name LIKE '%三%猫%'
虽然能搜索出“三脚猫”，但不能搜索出符合条件的“张猫三”。

2，_ ：表示任意单个字符。匹配单个任意字符，它常用来限制表达式的字符长度语句：

比如 SELECT * FROM [user] WHERE u_name LIKE '三'
只找出“唐三藏”这样u_name为三个字且中间一个字是“三”的；

再比如 SELECT * FROM [user] WHERE u_name LIKE '三__';
只找出“三脚猫”这样name为三个字且第一个字是“三”的；

3，[ ] ：表示括号内所列字符中的一个（类似正则表达式）。指定一个字符、字符串或范围，要求所匹配对象为它们中的任一个。

比如 SELECT * FROM [user] WHERE u_name LIKE '[张李王]三'
将找出“张三”、“李三”、“王三”（而不是“张李王三”）；

如 [ ] 内有一系列字符（01234、abcde之类的）则可略写为“0-4”、“a-e”
SELECT * FROM [user] WHERE u_name LIKE '老[1-9]'
将找出“老1”、“老2”、……、“老9”；

4，[^ ] ：表示不在括号所列之内的单个字符。其取值和 [] 相同，但它要求所匹配对象为指定字符以外的任一个字符。

比如 SELECT * FROM [user] WHERE u_name LIKE '[^张李王]三'
将找出不姓“张”、“李”、“王”的“赵三”、“孙三”等；

SELECT * FROM [user] WHERE u_name LIKE '老[^1-4]';
将排除“老1”到“老4”，寻找“老5”、“老6”、……

5，查询内容包含通配符时

由于通配符的缘故，导致我们查询特殊字符“%”、“_”、“[”的语句无法正常实现，而把特殊字符用“[ ]”括起便可正常查询。据此我们写出以下函数：

function sqlencode(str)
str=replace(str,"[","[[]") '此句一定要在最前
str=replace(str,"_","[_]")
str=replace(str,"%","[%]")
sqlencode=str
end function

在查询前将待查字符串先经该函数处理即可，并且在网页上连接数据库用到这类的查询语句时侯要注意：

如Select * FROM user Where name LIKE '老[^1-4]';上面《'》老[^1-4]《'》是要有单引号的，别忘了，我经常忘！

access

在近日的写Web程序时用到了Access的模糊查询,在Acces里写代码怎么也找不到记录,后来才起来原来Acess和SqlServer的模糊查询是有特别的
条件:查找表A 的Name字段中包括 "B" 的记当
在Access里的代码:

1 Select * from a where name like 'b'Sql Server查询分析器的代码
Select * from a where name like '%b%'这时你会发现Access里可以找到相关的记录,但把''必成'%'就找不到了,原因是Access的模糊查询是'?',''
和Sql server不一样
以上只是在数据库中的代码,如果要写在程序里可就不能用.''了,还是要用'%'
程序:
strSql="select from a where name like '%b%'"所以如果有朋友和我一样喜欢先在数据库中代码测试,那可就要注意了!!

SQL模糊查询，使用like比较关键字，加上SQL里的通配符，请参考以下：
1、LIKE'Mc%' 将搜索以字母 Mc 开头的所有字符串（如 McBadden）。
2、LIKE'%inger' 将搜索以字母 inger 结尾的所有字符串（如 Ringer、Stringer）。
3、LIKE'%en%' 将搜索在任何位置包含字母 en 的所有字符串（如 Bennet、Green、McBadden）。
4、LIKE'_heryl' 将搜索以字母 heryl 结尾的所有六个字母的名称（如 Cheryl、Sheryl）。
5、LIKE'[CK]ars[eo]n' 将搜索下列字符串：Carsen、Karsen、Carson 和 Karson（如 Carson）。
6、LIKE'[M-Z]inger' 将搜索以字符串 inger 结尾、以从 M 到 Z 的任何单个字母开头的所有名称（如 Ringer）。
7、LIKE'M[^c]%' 将搜索以字母 M 开头，并且第二个字母不是 c 的所有名称（如MacFeather）。
-------------------------------------------------
下面这句查询字符串是我以前写的，根据变量 zipcode_key 在邮政编码表 zipcode 中查询对应的数据，这句是判断变量 zipcode_key 为非数字时的查询语句，用 % 来匹配任意长度的字符串，从表中地址、市、省三列中查询包含关键字的所有数据项，并按省、市、地址排序。这个例子比较简单，只要你理解了方法就可以写出更复杂的查询语句。

sql = "select * from zipcode where (address like'%" & zipcode_key & "%') or (city like'%" & zipcode_key & "%') or (province like'%" & zipcode_key & "%') order by province,city,address
存储过程中使用模糊查询的例子：
SELECT * FROM Questions where QTitle like ' % [ '+ @KeyWord +' ] % ' and IsFinish = @IsFinsih
语句中成对的方括号是书写格式的关键。

#include "sqlite3.h"
#include <Windows.h>

#include <string>
#include <vector>
using std::vector;
using std::string;
#pragma comment(lib, "sqlite3.lib")




//UTF-8到GB2312的转换
void U2G(const char* utf8, string& gb2312)
{
    int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
    wchar_t* wstr = new wchar_t[len + 1];
    memset(wstr, 0, len + 1);
    MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
    len = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL);
    char* str = new char[len + 1];
    memset(str, 0, len + 1);
    WideCharToMultiByte(CP_ACP, 0, wstr, -1, str, len, NULL, NULL);
    if (wstr) delete[] wstr;
    gb2312 = str;
    delete[] str;
}

//GB2312到UTF-8的转换
void G2U(const char* gb2312, string& utf8)
{
    int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0);
    wchar_t* wstr = new wchar_t[len + 1];
    memset(wstr, 0, len + 1);
    MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len);
    len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
    char* str = new char[len + 1];
    memset(str, 0, len + 1);
    WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, len, NULL, NULL);
    if (wstr) delete[] wstr;
    utf8 = str;
    delete[] str;
}

int chinese_cmp(void *NotUsed, int nKey1, const void *pKey1, int nKey2, const void *pKey2)
{
    //int n = nKey1 < nKey1 ? nKey1 : nKey2;
    string key1 = "";
    string key2 = "";
    U2G((char*)pKey1, key1);
    U2G((char*)pKey2, key2);
    //return strncmp(key1.c_str(), key2.c_str(), n + 1);
    return strcmp(key1.c_str(), key2.c_str());
}




int _tmain(int argc, _TCHAR* argv[])
{
    sqlite3* conn;
    int ret = sqlite3_open("C:\\Users\\Administrator\\Desktop\\testsort.db", &conn);
    sqlite3_create_collation(conn, "Chinese", SQLITE_UTF8, 0, chinese_cmp);
    string key = "三";
   
    //string sql = "select name from person order by name collate Chinese";
    string sql = "select name from person where name like '%";
    sql += key;
    sql+="%'";

    string sql_utf8 = sql;
    //G2U(sql.c_str(), sql_utf8);
    sqlite3_stmt *stmt;
    ret = sqlite3_prepare_v2(conn, sql_utf8.c_str(), sql_utf8.length(), &stmt, 0);
   /* int rt = sqlite3_step(stmt);
*/
    while (sqlite3_step(stmt) != SQLITE_DONE)
    {
        string data = "";
        U2G((char*)sqlite3_column_text(stmt, 0), data);

        std::cout << data << std::endl;
        int afsd = 1;
    }
    int dsaf = 1;

	system("pause");
	return 0;
}

beibeix2015

关注

0
点赞
踩
7

收藏

觉得还不错? 一键收藏
0
评论
C++ sqlite3解决中文排序问题

导言：sqlite3默认的编码方式为UTF8编码，而在UTF8编码下，中文不是按照拼音顺序编码的，所以想解决中文排序问题，必须自定义排序规则，将UTF8编码转换成GB2312编码（GB2312编码中文是按照拼音顺序编码的），然后再进行比较大小，就可以得到正确的排序结果了。#include "sqlite3.h"#include <Windows.h>#include <string>#include <vector>using std::vector;u
复制链接

扫一扫