中文分词是搜索引擎的关键技术之一,也是一个难点。我这里利用别人做好的一个分词工具api来实现中文分词。不敢独享:
1. 准备环境
2. Word_split.php
<?php
defined('BASEPATH') OR exit('No direct script access allowed');
class Word_split extends CI_Controller {
public function index()
{
}
public function split($start = 1, $end = 1)
{
$sku = '00000001';
for($i = $start; $i <= $end; $i++){
$sku = '00000000' . $i;;
$sku = substr($sku, -8);
//echo $sku . '<br>';
$this->split_word_save($sku);
}
}
private function split_word_save($sku = '00000001')
{
$this->load->model('word_split_model');
$desc = $this->word_split_model->get_desc($sku);
if($desc == NULL){
return;
}
$keyword = $desc['f_desc_chn'];
if($keyword != NULL){
$keyword = urlencode($keyword);
$url = "http://api.pullword.com/get.php?source={$keyword}¶m1=0¶m2=0";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$words = curl_exec($ch);
$words = trim($words);
$words = str_replace("\r\n", ' ', $words);
$this->word_split_model->save_keyword($sku, $words);
}
}
}
/* End of file Word_split.php */
/* Location: ./application/controllers/Word_split.php */
3. 保存到数据库
<?php
defined('BASEPATH') OR exit('No direct script access allowed');
class Word_split_model extends CI_Model {
public function save_keyword($sku, $keyword)
{
$this->db->set('f_keyword', $keyword);
$this->db->where('f_sku_code', $sku);
$this->db->update('t_pd_product');
}
public function get_desc($sku)
{
$this->db->select('f_sku_code, f_desc_chn, f_desc_eng');
$this->db->where('f_sku_code', $sku);
$query = $this->db->get('t_pd_product');
return $query->row_array();
}
}
/* End of file Word_split_model.php */
/* Location: ./application/models/Word_split_model.php */