中文分词用 SCWS 的api
1.php中用 curl获取分词结果
protected function http_curl($url,$type="get",$res="json",$arr=array()){
$ch =curl_init();
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
if($type=="post"){
curl_setopt($ch,CURLOPT_POST,1);
curl_setopt($ch,CURLOPT_POSTFIELDS,$arr);
}
curl_setopt($ch,CURLOPT_HTTP_VERSION,CURL_HTTP_VERSION_1_0);
$output = curl_exec($ch);
curl_close($ch);
if($res=="json"){
return json_decode($output,true);
}else{
return $output;
}
}
protected function get_fenci($str){
$url="http://www.xunsearch.com/scws/api.php";
$type="post";
$arr=array(
'data'=>$str,
'respond'=>"json",
);
$res= $this->http_curl($url,$type,"json",$arr);
return $res;
}
2.根据分词循环sql where语句
foreach ($fenci as $k => $v) {
if(empty($where)){
$where.="name LIKE '%{$v}%'";
}else{
$where.=" OR name LIKE '%{$v}%'";
}
}
3.根据分词循环sql order语句
用CASE WHEN THEN ELSE END 语句;
排序也是通过模糊查询,分别匹配词组,如果匹配到就定义一个数值,这些数值相加,词匹配到的越多,相加值的结果越大,越靠前值也越大,结果就相当于匹配度了, 然后进行降序排序;
protected function fenci_order($fenci){
$order_str;
foreach ($fenci as $k => $v) {
if(empty($order_str)){
$order_str.="(CASE WHEN name LIKE '%".$v."%' THEN ".(1000-10*$k)." ELSE 0 END)";
}else{
$order_str.=" + (CASE WHEN name LIKE '%".$v."%' THEN ".(1000-10*$k)." ELSE 0 END)";
}
}
$order_str=$order_str." DESC";
// echo count($fenci);
return $order_str;
}