PHP关键词词频检测

<html>
<!-- <meta http-equiv="content-type" content="text/html; charset=Shift_JIS" /> -->
<style>
td{
	background-color:#CF6;
	width:100px;
	margin:5px;
}
</style>
<body>


<meta charset="UTF-8" />
<!-- <meta charset="Shift_JIS" /> -->
<?
/**
 * The class LinkedList allows an application to store strings in
 * alphabetical order by calling orderInsert().
 * 此处定义的 LinkedList 类,可以调用它的 方法 orderInsert(),来以字母
 * 大小的顺序储存 英文字符串。
 * 同时记录 英文单词出现的次数
 * 作者: 许同春 author Tongchun Xu 
 * @开源中国 Open Source, China communiity
 * 完成日期:2016年6月10日 completion date: 10 June, 2016
 */
class Node{
    public $data;
  public $frequency;
    public $next;
    function __construct($data, $next = null, $frequency = 1){
        $this->data = $data; //英文字符串
        $this->next = $next; //指向后继结点的指针
    $this->frequency=$frequency; //英文字符串出现的次数
    }    
}

class LinkedList{
    public $head; //单链表的头结点,不存储数据
 function __construct(){//单链表的构造方法
  //头结点的数据为"傀儡", 不代表 任何数据
    $this->head = new Node("dummy 傀儡"); 
    $this->first = null;
    }

 function isEmpty(){ 
        return ($this->head->next == null);
    } 
/*  orderInsert($data) 方法, 
 *  按给定字符串 $data 的大小, 将其安插到适当的位置, 
 *  以保证单链表中字符串的存储,始终是有序的。 
 */
 function orderInsert($data){
  $p = new Node($data); 
    if($this->isEmpty()){
        $this->head->next = $p;
    }
    else {
  $node= $this->find($data);
  if(!$node){
    $q = $this->head;
    while($q->next != NULL && strcmp($data, $q->next->data)> 0 ){
    $q = $q->next;
    }
        $p->next = $q->next; 
    $q->next = $p;
    }else
  $node->frequency++;
  }
 }
 
 function insertLast($data){//将字符串插到单链表的尾部
    $p = new Node($data);
  
    if($this->isEmpty()){
        $this->head->next = $p;
    }
    else{
        $q = $this->head->next;
        while($q->next != NULL)
            $q = $q->next;
        $q->next = $p;  
    }     
}

    function find($value){//查询是否有给定的字符串
        $q = $this->head->next;
        while($q->next != null){
        if(strcmp($q->data,$value)==0){
        break;
      }
            $q = $q->next;    
        }
    if ($q->data == $value)
        return $q; 
    else
    return null;
    }

    function traversal(){//遍历单链表
    if(!$this->isEmpty()){
        $p=$this->head->next;
    echo "输出结果:<table><tr>";
        echo "<td>".$p->data."<br>出现次数:".$p->frequency."</td>";
    $n=1;
        while($p->next != null){
      $p=$p->next;
      echo "<td>".$p->data ."<br>出现次数:".$p->frequency."</td>";
      $n++;
      if ($n%11==0) echo "</tr><tr>";
      }
      
      echo "</tr></table>";     
      }else
    echo "链表为空!";
  }
  
  
  function words_count(){
  if($this->isEmpty())
  echo "<br>没有储存字符串 <br>";
    else{
  $counter=0;
  $p=$this->head->next;
  while($p->next != null){
  $p=$p->next;
    $counter++;
      };
  echo "***共有单词 ".$counter." 个***";
    }
  }}

    class StringTokenizer {

    /** @var string
     */
    private $string;

    /** @var string
     */
    private $token;

    /** @var string
     */
    private $delim;

    /**
     * Constructs a string tokenizer for the specified string.
     * @param string $str String to tokenize
     * @param string $delim The set of delimiters (the characters that separate tokens)
     * specified at creation time, default to " \n\r\t\0"
     */
    public function __construct($str, $delim=" \n\r\t\0") {
        $this->string = $str;
        $this->delim = $delim;
        $this->token = strtok($str, $delim); 
    }

    /**
     * Destructor to prevent memory leaks
     */
    public function __destruct() {
            // unset($this);
    }

    /**
     * Calculates the number of times that this tokenizer's nextToken method can
     * be called before it generates an exception
     * @return int - number of tokens
     */
    public function countTokens() {
        $counter = 0;
        while($this->hasMoreTokens()) {
            $counter++;
            $this->nextToken();
        }
        $this->token = strtok($this->string, $this->delim);
        return $counter; 
    }

    /**
     * Tests if there are more tokens available from this tokenizer's string. It
     * does not move the internal pointer in any way. To move the internal pointer
     * to the next element call nextToken()
     * @return boolean - true if has more tokens, false otherwise
     */
    public function hasMoreTokens() {
        return ($this->token !== false);
    }

    /**
     * Returns the next token from this string tokenizer and advances the internal
     * pointer by one.
     * @return string - next element in the tokenized string
     */
    public function nextToken() {
        $hold = $this->token; //hold current pointer value
        $this->token = strtok($this->delim); //increment pointer
        return $hold; //return current pointer value
    }
}


// =====================================================================上面词频方法=====================================================================
// =====================================================================上面词频方法=====================================================================
// =====================================================================上面词频方法=====================================================================

/**
 * 程序开始运行, 按"浏览"钮选择一个存储关键字的文档, 再按"统计"钮,
 * 即可得到按字符顺序(UNICODE)列出的所有关键词,及其出现的次数
 * 
 * 作者: 许同春 author Tongchun Xu 
 * @开源中国 Open Source, Chna communiity
 * 完成日期:2017年9月18日 completion date: 18 Sep., 2017
 */


// 以上代码

$list=new LinkedList();

	if(@$_POST['submit']){
	if ($_FILES["file"]["error"] > 0)
  	echo "Error: " . $_FILES["file"]["error"] . "<br />";
	else {
        $myfile = fopen($_FILES["file"]["tmp_name"], "r") or die("Unable to open file!");
        $str = fread($myfile,filesize($_FILES["file"]["tmp_name"]));  //str是string字符串,一行一个关键词
        $delim = "?\\,. /:!\"()\t\n\r\f%";
        $st = new StringTokenizer($str, $delim);
        echo '挖掘关键词数量: '.$st->countTokens()."<br>";
        //$list=new LinkedList();
         while ($st->hasMoreTokens()) {
         $list->orderInsert($st->nextToken());
         }
          
         $node=$list->head->next;
         $freArray=array();
         while($node){
         $freArray[$node->data]=$node->frequency;
         $node=$node->next;
          }
         arsort($freArray);
         // echo '<h2>按关键词出现次数排序</h2>';
        foreach ($freArray as $key => $val)  {  
          printf("<b>%s</b>  出现次数:%s <br> ", $key, $val);
          //val是词频
          // echo $key."<br>";
        }
        $list->words_count();
        $list->traversal();
        fclose($myfile);
	}
}

?>

<form action="word.php" method="post"
enctype="multipart/form-data">
<label for="file">存储关键字的文档名 File Name:</label>
<input type="file" name="file" id="file" /> 
<input type="submit" name="submit" value="统计 Statistics" />
</form> 
</body>
</html>

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

虎纠板栗咖

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值