matlab 读取txt文件以及进行数据处理

最新推荐文章于 2023-12-27 10:05:48 发布

akala啦

最新推荐文章于 2023-12-27 10:05:48 发布

阅读量4.4k

点赞数

本文链接：https://blog.csdn.net/qq_29468403/article/details/80681604

版权

周日做了信息论的小project，差不读熬了一个晚上加周一的早上，终于利用matlab成功的读取了txt文件中的英文单词以及简单的数据处理，现在进行简单的分享。

百度经验：matlab如何读取txt文件： https://jingyan.baidu.com/article/b87fe19e6b478852183568e1.html

代码：

function [] = work3()
clc
clear all
close all
%% read data
ch = fileread('harry1.txt');
ch = strrep(ch,',',' '); %%将逗号换成空格
ch = lower(ch); %%排成一行
ch = reshape(strsplit(ch),[],1); %%将cell类型的数据转换成char，每个单词一行。
%% 1-gram
gram1=ch;
[words,~,idx] = unique(char(gram1),'rows');
numOccurrences = histcounts(idx,length(words));
numOccurrences =sort(numOccurrences );
[err1,H1]=errH(numOccurrences);
%% 2-gram
clear words idx numOccurrences
gram2=char(ch);
[gram2_row,~]=size(gram2);
for i=1:gram2_row/2
gram2_reshap(i,:)=[gram2(2*i-1,:) gram2(2*i,:)];
end
[words,~,idx] = unique(gram2_reshap,'rows');
[word_row,~]=size(words);
numOccurrences = histcounts(idx,word_row);
numOccurrences =sort(numOccurrences );
[err2,H2]=errH(numOccurrences);
%% 3-gram
clear words idx numOccurrences
gram3=char(ch);
[gram3_row,~]=size(gram3);
for i=1:gram3_row/3
gram3_reshap(i,:)=[gram3(3*i-2,:) gram3(3*i-1,:) gram3(3*i,:)];
end
[words,~,idx] = unique(gram3_reshap,'rows');
[word_row,~]=size(words);
numOccurrences = histcounts(idx,word_row);
numOccurrences =sort(numOccurrences );
[err3,H3]=errH(numOccurrences);
figure
stairs(err1,H1/H1(1),'r')
titleName = ['N=',num2str(H1(1))];
hold on
stairs(err2,H2/H2(1),'b')
hold on
stairs(err3,H3/H3(1),'k')
title(titleName,'fontsize',16,'fontweight','bold');
xlabel('误差','fontsize',16,'fontweight','bold');
ylabel('H/N','fontsize',16,'fontweight','bold');
legend('1-gram','2-gram','3-gram');
end
%%
%% 纠错函数
function [err,H]=errH(numOccurrences)
x_remain=sum(numOccurrences);
p_num=numOccurrences /sum(numOccurrences);
num=1;
err(1)=0;
H(1)=log2(x_remain);
for r=1:length(numOccurrences)
% hwait=waitbar(num/sum(numOccurrences),'请等待>>>>>>>>');
for n=1:numOccurrences(r)
num=num+1;
x_remain=x_remain-1;
err(num)=err(num-1)+p_num(r)/numOccurrences(r);
H(num)=log2(x_remain);
end
end
end