make xdb file.php,make_xdb_file.php

// make xdb file from plain text dictionary (only support gbk)

// $Id: $

define('IS_UTF8_TXT',TRUE);

ini_set('memory_limit', '1024M');

set_time_limit(0);

if (!isset($_SERVER['argv'][1]))

{

echo "Usage: {$_SERVER['argv'][0]} [input file]\n";

exit(0);

}

if (!extension_loaded('mbstring'))

{

echo "Usage: mbstring exteions is required.\n";

exit(0);

}

$input = isset($_SERVER['argv'][2]) ? $_SERVER['argv'][2] : 'php://stdin';

if (!($fd = @fopen($input, 'r')))

{

echo "ERROR: can not open the input file: {$input}\n";

exit(0);

}

//

$output = $_SERVER['argv'][1];

if (file_exists($output))

{

echo "ERROR: output xdb file exists: $output\n";

exit(0);

}

require ('xdb.class.php');

$xdb = new XTreeDB;

if (!$xdb->Open($output, 'w'))

{

echo "ERROR: can not open the XDB to write: $output\n";

exit(0);

}

// load data

mb_internal_encoding(IS_UTF8_TXT ? 'UTF-8' : 'gbk');

$total = 0;

$rec = array();

echo "INFO: Loading text file data ... ";

while ($line = fgets($fd, 512))

{

if (substr($line, 0, 1) == '#') continue;

list($word, $tf, $idf, $attr) = explode("\t", $line, 4);

$k = (ord($word[0]) + ord($word[1])) & 0x3f;

$attr = trim($attr);

if (!isset($rec[$k])) $rec[$k] = array();

if (!isset($rec[$k][$word]))

{

$total++;

$rec[$k][$word] = array();

}

$rec[$k][$word]['tf'] = $tf;

$rec[$k][$word]['idf'] = $idf;

$rec[$k][$word]['attr'] = $attr;

// only support GBK dictionary

$len = strlen($word);

$len = mb_strlen($word);

while ($len > 2)

{

$len--;

$temp = mb_substr($word, 0, $len);

if (!isset($rec[$k][$temp]))

{

$total++;

$rec[$k][$temp] = array();

}

$rec[$k][$temp]['part'] = 1;

}

}

fclose($fd);

// load ok & try to save it to DBM

echo "OK, Total words=$total\n";

for ($k = 0; $k < 0x40; $k++)

{

if (!isset($rec[$k])) continue;

$cnt = 0;

printf("Inserting [%02d/64] ... ", $k);

foreach ($rec[$k] as $w => $v)

{

$flag = (isset($v['tf']) ? 0x01 : 0);

if ($v['part']) $flag |= 0x02;

$data = pack('ffCa3', $v['tf'], $v['idf'], $flag, $v['attr']);

$xdb->Put($w, $data);

$cnt++;

}

printf("%d Records saved.\n", $cnt);

}

// save

echo "INFO: optimizing ... ";

flush();

$xdb->Optimize();

$xdb->Close();

echo "DONE!\n";

?>

一键复制

编辑

Web IDE

原始数据

按行查看

历史

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值