期末大作业07

最新推荐文章于 2022-08-24 19:12:56 发布

qq_45704200

最新推荐文章于 2022-08-24 19:12:56 发布

阅读量133

点赞数

本文链接：https://blog.csdn.net/qq_45704200/article/details/118068713

版权

由于nodejieba一直安装不成功，换了一个分词器——node-analyzer

命令行输入npm install node-analyzer -save即可

在server.js头部添加：

var Segmenter = require('node-analyzer');
var segmenter = new Segmenter();

在search路由下先生成新闻表格，再顺便对每一条新闻做分词，分词结果存储在segs中：

for (var x = 0; x < cnt; ++x) {
		let newsfetch = vals[x];
        newsfetch.author = newsfetch.author.replace(regex, "");
		newsfetch.author = newsfetch.author.replace(/编辑：/g, "");
		newsfetch.content = newsfetch.content.replace(regex, "");
        $("table").append(`
		<tr id="line${x}">
        <td>${x}</td>
        <td><a href="${newsfetch.url}" id="t${x}">${newsfetch.title}</a></td>
        <td id="c${x}">${newsfetch.content}</td>
        <td id="a${x}">${newsfetch.author}</td>
        <td id="s${x}">${newsfetch.source_name}</td>
        <td id="p${x}">${newsfetch.publish_date}</td>
        <td>${newsfetch.crawltime}</td>
        <td id="k${x}">${newsfetch.keywords}</td>
        <td id="d${x}">${newsfetch.description}</td>
        </tr>
		`);
        let segs = segmenter.analyze(newsfetch.title + "。\n" + newsfetch.content)
}

为了给后续做图表准备数据，对分词后的结果继续处理

对新闻标题和内容中的词做词频统计

卡在发现seg.length都是1，输出也是每行输出一个字

经过不断调试，分词的最终版本是：

let segs = segmenter.analyze(newsfetch.title + "。\n" + newsfetch.content).split(' ');
for (let seg of segs) {
	//if (seg.length > 1 && !stop_words.includes(seg)) {
	//if (seg.length > 1) {
	//console.log(seg);
	//console.log(seg.length);
	//seg = seg + "";
	seg = seg.toString();
	if(seg.length>1 && seg!=" "){
		if (wordcount.has(seg)) {
			wordcount.set(seg, wordcount.get(seg) + 1);
		} else {
			wordcount.set(seg, 1);
		}
	}
    //}
}

为了去除停用词，在网上下载了一个停用词表

读入文件：

var fs = require('fs');
const stop_words = fs.readFileSync("./public/StopWords.txt").toString()

在统计词频的条件里加上

!stop_words.includes(seg)

除了统计内容的词频，我们再写两个map，统计作者的出现次数和报刊的出现次数：

if(newsfetch.author =="未知"){
	continue;
}

if (newsfetch.author) {
	if (authorcount.has(newsfetch.author)) {
		authorcount.set(newsfetch.author, authorcount.get(newsfetch.author) + 1);
	} else {
		authorcount.set(newsfetch.author, 1);
	}
}

if (newsfetch.source_name) {
	if (sourcecount.has(newsfetch.source_name)) {
		sourcecount.set(newsfetch.source_name, sourcecount.get(newsfetch.source_name) + 1);
	} else {
		sourcecount.set(newsfetch.source_name, 1);
	}
}

准备三个js文件，生成不同类型的可视化图表

addChart.js生成高频词直方图：

function addChart(xElems, yElems, chartId, xName, yName, Title, type_name) {
	var option = {
		tooltip: {
			trigger: 'axis',
			axisPointer: {
				type: 'shadow',
				crossStyle: {
					color: '#FFD700'
				}
			},
			formatter: function(params) {
				console.log(params);
				return params.seriesName + "<br />" + yName + ":" + params.value;
			}
		},
		title:{
			text: Title,
			x: 'center'
		},
		legend:{
			x: 'center',
			y: 'buttom',
			data: [yName]
		},
		grid: {
			x: 50,
			x2: 50,
			y: 50,
			y2: 100
		},
		xAxis:{
			data: xElems,
			axisLabel: {  
				interval: 0,
				rotate: 0,
				formatter: function(value){
					return value.split("").join("\n");
				}
			}
		},
		yAxis:{},
		series:[{
			data: yElems,
			type: type_name,
			name: xName
		}]
	};

	var myChart = echarts.init(document.getElementById(chartId));

	myChart.setOption(option);
}

addFloatChart.js生成文章数-日期流图：

function addFloatChart(xElems, yElems, chartId, xName, yName, Title, type_name) {
        var myChart = echarts.init(document.getElementById(chartId));
        var app = {};
        option = null;
        option = {
            xAxis: {
                type: 'category',
                boundaryGap: false,
                data: xElems
            },
            yAxis: {
                boundaryGap: [0, '50%'],
                type: 'value'
            },
            title:{
                text: Title,
                x: 'center'
            },
            series: [
                {
                    name:'成交',
                    type:'line',
                    smooth:true,
                    symbol: 'none',
                    stack: 'a',
                    areaStyle: {
                        normal: {}
                    },
                    data: yElems
                }
            ]
        };
    
        setInterval(function () {
            addData(true);
            myChart.setOption({
                xAxis: {
                    data: date
                },
                series: [{
                    name:'成交',
                    data: data
                }]
            });
        }, 500);
        if (option && typeof option === "object") {
            myChart.setOption(option, true);
        }
}

addPieChart生成文章来源报刊占比饼图：

function addPieChart(xElems, yElems, chartId, xName, yName, Title, type_name) {
    var myChart = echarts.init(document.getElementById(chartId));
    var app = {};
    option = null;
    // 指定图表的配置项和数据
    var option = {
        title : {
            text: Title,
            x: 'center'
        },
        tooltip: {
            trigger: 'item',
            formatter: "{a} <br/>{b} : {c} ({d}%)"
        },
        legend: {
            orient: 'vertical',
            left: 'left',
            data: xElems
        },
        series : [
            {
                name: "来源报刊",
                type: 'pie',
                radius : '55%',
                center: ['50%', '60%'],
                data:[
                    {value:yElems[0], name:xElems[0]},
                    {value:yElems[1], name:xElems[1]},
                    {value:yElems[2], name:xElems[2]},
                ],
                itemStyle: {
                    emphasis: {
                        shadowBlur: 10,
                        shadowOffsetX: 0,
                        shadowColor: 'rgba(0, 0, 0, 0.5)'
                    }
                }
            }
        ]
    };

    app.currentIndex = -1;

    setInterval(function () {
        var dataLen = option.series[0].data.length;
        // 取消之前高亮的图形
        myChart.dispatchAction({
            type: 'downplay',
            seriesIndex: 0,
            dataIndex: app.currentIndex
        });
        app.currentIndex = (app.currentIndex + 1) % dataLen;
        // 高亮当前图形
        myChart.dispatchAction({
            type: 'highlight',
            seriesIndex: 0,
            dataIndex: app.currentIndex
        });
        // 显示 tooltip
        myChart.dispatchAction({
            type: 'showTip',
            seriesIndex: 0,
            dataIndex: app.currentIndex
        });
    }, 1000);
    if (option && typeof option === "object") {
        myChart.setOption(option, true);
    }
}

在主界面html中引入这三个js文件并准备好对应的div：

<script type="text/javascript" src="/js/addChart.js"></script>
<script type="text/javascript" src="/js/addPieChart.js"></script>
<script type="text/javascript" src="/js/addFloatChart.js"></script>

<section id="image1">
	<div id="word-chart" style="height: 100%;min-height:400px;">
	</div>
</section>


<section id="image2">
	<div id="date-chart" style="height: 100%;min-height:400px;">
	</div>
</section>

<section id="image3">
	<div id="source-chart" style="height: 100%;min-height:400px;">
	</div>
</section>

在server.js的search路由下：

$("body").append(`
<script type="text/javascript">
	addChart(["${topWords.toString().split(',').join('","')}"], [${topHeats.toString()}], 'word-chart', '高频词', '热度', 'top20高频词');
	addFloatChart(["${xdates.toString().split(',').join('","')}"], [${ynums.toString()}], 'date-chart', '爬取新闻数随日期变化趋势');
	addPieChart(["${topComes.toString().split(',').join('","')}"], [${topGoods.toString()}], 'source-chart', '爬取新闻来源报刊占比');
</script>
`);

目前效果如下图：