最近,斗破苍穹动漫也是看到了三上云岚宗哈,一时无聊,就用node写了个查询斗破苍穹各人物名称出现的次数。
首先,我们先自己创建个文本测试一下,文本内容如下:
然后我们开始搭建node服务,采用node的Koa框架,执行以下两行命令
npm init -y
npm install koa --save
读取一个文本,那么我们就需要使用到,node当中的fs来创建文件流,因为我们要读取的文本是小说,有几百万字的,所以用readline来逐行读取文本。下面上代码:
const Koa = require('koa');
const fs = require('fs');
const readline = require('readline');
const path = require('path');
const app = new Koa();
const filePath = path.join(__dirname, '1.txt');
app.use(async (ctx) => {
const characterNames = ['萧炎',"薰儿","美杜莎","彩鳞","小医仙","云韵","云芝","药尘","药尊","药老","萧炎哥哥"]; // 你需要识别的角色名称列表,可以根据实际情况进行修改
const characterCounts = {}; // 用于存储角色名称出现的次数
characterNames.forEach(name => {
characterCounts[name] = 0; // 初始化角色统计次数为0
});
try {
const readStream = fs.createReadStream(filePath, 'utf-8');//用fs模块创建文件流
const rl = readline.createInterface({
input: readStream, // 用 path 直接创建文件流
crlfDelay: Infinity // 自动识别行分隔符,以支持Windows和Linux平台
});
for await (const line of rl) {
characterNames.forEach(name => {
const regex = new RegExp(name, 'g');
const count = (line.match(regex) || []).length;
characterCounts[name] += count;
});
}
ctx.body = characterCounts;// 将统计结果作为响应返回给客户端
} catch (error) {
console.error('Error reading file:', error);
ctx.status = 500;
ctx.body = 'Error reading file';
}
});
const port = 3000;
app.listen(port, () => {
console.log(`Server is running on http://localhost:${port}`);
});
运行代码,得到的结果为:
{"萧炎":3,"薰儿":2,"美杜莎":0,"彩鳞":0,"小医仙":0,"云韵":0,"云芝":0,"药尘":0,"药尊":0,"药老":0,"萧炎哥哥":0}
发现我们的测试文本是没错的,那我们直接上整篇《斗破苍穹》。
const Koa = require('koa');
const fs = require('fs');
const readline = require('readline');
const path = require('path');
const app = new Koa();
const filePath = path.join(__dirname, '斗破苍穹.txt');
app.use(async (ctx) => {
const characterNames = ['萧炎',"薰儿","美杜莎","彩鳞","小医仙","云韵","云芝","药尘","药尊","药老","萧炎哥哥"]; // 你需要识别的角色名称列表,可以根据实际情况进行修改
const characterCounts = {}; // 用于存储角色名称出现的次数
characterNames.forEach(name => {
characterCounts[name] = 0; // 初始化角色统计次数为0
});
try {
const readStream = fs.createReadStream(filePath, 'utf-8');//用fs模块创建文件流
const rl = readline.createInterface({
input: readStream, // 用 path 直接创建文件流
crlfDelay: Infinity // 自动识别行分隔符,以支持Windows和Linux平台
});
for await (const line of rl) {
characterNames.forEach(name => {
const regex = new RegExp(name, 'g');
const count = (line.match(regex) || []).length;
characterCounts[name] += count;
});
}
ctx.body = characterCounts;// 将统计结果作为响应返回给客户端
} catch (error) {
console.error('Error reading file:', error);
ctx.status = 500;
ctx.body = 'Error reading file';
}
});
const port = 3000;
app.listen(port, () => {
console.log(`Server is running on http://localhost:${port}`);
});
最后,运行代码结果为:
{"萧炎":45902,"薰儿":3024,"美杜莎":1545,"彩鳞":511,"小医仙":2386,"云韵":680,"云芝":296,"药尘":285,"药尊":53,"药老":3668,"萧炎哥哥":422}
想核实结果的小伙伴自行根据小说去数一下哈QAQ