<html>
<!-- <meta http-equiv="content-type" content="text/html; charset=Shift_JIS" /> -->
<style>
td{
background-color:#CF6;
width:100px;
margin:5px;
}
</style>
<body>
<meta charset="UTF-8" />
<!-- <meta charset="Shift_JIS" /> -->
<?
/**
* The class LinkedList allows an application to store strings in
* alphabetical order by calling orderInsert().
* 此处定义的 LinkedList 类,可以调用它的 方法 orderInsert(),来以字母
* 大小的顺序储存 英文字符串。
* 同时记录 英文单词出现的次数
* 作者: 许同春 author Tongchun Xu
* @开源中国 Open Source, China communiity
* 完成日期:2016年6月10日 completion date: 10 June, 2016
*/
class Node{
public $data;
public $frequency;
public $next;
function __construct($data, $next = null, $frequency = 1){
$this->data = $data; //英文字符串
$this->next = $next; //指向后继结点的指针
$this->frequency=$frequency; //英文字符串出现的次数
}
}
class LinkedList{
public $head; //单链表的头结点,不存储数据
function __construct(){//单链表的构造方法
//头结点的数据为"傀儡", 不代表 任何数据
$this->head = new Node("dummy 傀儡");
$this->first = null;
}
function isEmpty(){
return ($this->head->next == null);
}
/* orderInsert($data) 方法,
* 按给定字符串 $data 的大小, 将其安插到适当的位置,
* 以保证单链表中字符串的存储,始终是有序的。
*/
function orderInsert($data){
$p = new Node($data);
if($this->isEmpty()){
$this->head->next = $p;
}
else {
$node= $this->find($data);
if(!$node){
$q = $this->head;
while($q->next != NULL && strcmp($data, $q->next->data)> 0 ){
$q = $q->next;
}
$p->next = $q->next;
$q->next = $p;
}else
$node->frequency++;
}
}
function insertLast($data){//将字符串插到单链表的尾部
$p = new Node($data);
if($this->isEmpty()){
$this->head->next = $p;
}
else{
$q = $this->head->next;
while($q->next != NULL)
$q = $q->next;
$q->next = $p;
}
}
function find($value){//查询是否有给定的字符串
$q = $this->head->next;
while($q->next != null){
if(strcmp($q->data,$value)==0){
break;
}
$q = $q->next;
}
if ($q->data == $value)
return $q;
else
return null;
}
function traversal(){//遍历单链表
if(!$this->isEmpty()){
$p=$this->head->next;
echo "输出结果:<table><tr>";
echo "<td>".$p->data."<br>出现次数:".$p->frequency."</td>";
$n=1;
while($p->next != null){
$p=$p->next;
echo "<td>".$p->data ."<br>出现次数:".$p->frequency."</td>";
$n++;
if ($n%11==0) echo "</tr><tr>";
}
echo "</tr></table>";
}else
echo "链表为空!";
}
function words_count(){
if($this->isEmpty())
echo "<br>没有储存字符串 <br>";
else{
$counter=0;
$p=$this->head->next;
while($p->next != null){
$p=$p->next;
$counter++;
};
echo "***共有单词 ".$counter." 个***";
}
}}
class StringTokenizer {
/** @var string
*/
private $string;
/** @var string
*/
private $token;
/** @var string
*/
private $delim;
/**
* Constructs a string tokenizer for the specified string.
* @param string $str String to tokenize
* @param string $delim The set of delimiters (the characters that separate tokens)
* specified at creation time, default to " \n\r\t\0"
*/
public function __construct($str, $delim=" \n\r\t\0") {
$this->string = $str;
$this->delim = $delim;
$this->token = strtok($str, $delim);
}
/**
* Destructor to prevent memory leaks
*/
public function __destruct() {
// unset($this);
}
/**
* Calculates the number of times that this tokenizer's nextToken method can
* be called before it generates an exception
* @return int - number of tokens
*/
public function countTokens() {
$counter = 0;
while($this->hasMoreTokens()) {
$counter++;
$this->nextToken();
}
$this->token = strtok($this->string, $this->delim);
return $counter;
}
/**
* Tests if there are more tokens available from this tokenizer's string. It
* does not move the internal pointer in any way. To move the internal pointer
* to the next element call nextToken()
* @return boolean - true if has more tokens, false otherwise
*/
public function hasMoreTokens() {
return ($this->token !== false);
}
/**
* Returns the next token from this string tokenizer and advances the internal
* pointer by one.
* @return string - next element in the tokenized string
*/
public function nextToken() {
$hold = $this->token; //hold current pointer value
$this->token = strtok($this->delim); //increment pointer
return $hold; //return current pointer value
}
}
// =====================================================================上面词频方法=====================================================================
// =====================================================================上面词频方法=====================================================================
// =====================================================================上面词频方法=====================================================================
/**
* 程序开始运行, 按"浏览"钮选择一个存储关键字的文档, 再按"统计"钮,
* 即可得到按字符顺序(UNICODE)列出的所有关键词,及其出现的次数
*
* 作者: 许同春 author Tongchun Xu
* @开源中国 Open Source, Chna communiity
* 完成日期:2017年9月18日 completion date: 18 Sep., 2017
*/
// 以上代码
$list=new LinkedList();
if(@$_POST['submit']){
if ($_FILES["file"]["error"] > 0)
echo "Error: " . $_FILES["file"]["error"] . "<br />";
else {
$myfile = fopen($_FILES["file"]["tmp_name"], "r") or die("Unable to open file!");
$str = fread($myfile,filesize($_FILES["file"]["tmp_name"])); //str是string字符串,一行一个关键词
$delim = "?\\,. /:!\"()\t\n\r\f%";
$st = new StringTokenizer($str, $delim);
echo '挖掘关键词数量: '.$st->countTokens()."<br>";
//$list=new LinkedList();
while ($st->hasMoreTokens()) {
$list->orderInsert($st->nextToken());
}
$node=$list->head->next;
$freArray=array();
while($node){
$freArray[$node->data]=$node->frequency;
$node=$node->next;
}
arsort($freArray);
// echo '<h2>按关键词出现次数排序</h2>';
foreach ($freArray as $key => $val) {
printf("<b>%s</b> 出现次数:%s <br> ", $key, $val);
//val是词频
// echo $key."<br>";
}
$list->words_count();
$list->traversal();
fclose($myfile);
}
}
?>
<form action="word.php" method="post"
enctype="multipart/form-data">
<label for="file">存储关键字的文档名 File Name:</label>
<input type="file" name="file" id="file" />
<input type="submit" name="submit" value="统计 Statistics" />
</form>
</body>
</html>
01-21
“相关推荐”对你有帮助么?
-
非常没帮助
-
没帮助
-
一般
-
有帮助
-
非常有帮助
提交