java统计文本中英文单词个数split_用java统计一个字符串中重复出现的单词的个数?...

这段代码展示了如何使用C#实现一个简单的文本分析程序,统计输入文本中重复出现的单词个数。通过读取用户输入,将文本分割成单词,然后计算每个单词的出现次数,最后输出出现频率大于1的单词及其出现次数。
摘要由CSDN通过智能技术生成

using System;

using System.Collections.Generic;

using System.Linq;

namespace ConsoleApplication1 {

class Program {

static void Main( string[ ] args ) {

var text = Console.ReadLine( );

var tokens = Parser.GetToken( text );

Repostory repeatWordStatistically = new Repostory( );

foreach (var item in tokens) {

if (!repeatWordStatistically.Contains( item )) {

var charCount = (from e in tokens where e.Equals( item ) select e).Count( );

if (charCount > 1) {

repeatWordStatistically.Add( charCount , item );

}

}

}

foreach (var item in repeatWordStatistically) {

Console.WriteLine( $"{item.Value} 在文中出现了 {item.Key} 次" );

}

Console.ReadKey( );

}

}

/// /// 字符串解析工具类/// static class Parser {

/// /// 返回由 token 组成的字符串数组/// /// 用于解析的文本/// public static string[ ] GetToken( string text ) {

var index = -1;

Predicate split = ( arg ) => {

index++;

return !((arg >= 'A' && arg <= 'Z') ||

(arg >= 'a' && arg <= 'z') ||

(arg >= '0' && arg <= '9') ||

(arg == '-' && ((text[ index - 1 ] >= 'A' && text[ index - 1 ] <= 'Z') ||

(text[ index - 1 ] >= 'a' && text[ index - 1 ] <= 'z') ||

(text[ index - 1 ] >= '0' && text[ index - 1 ] <= '9')) &&

((text[ index + 1 ] >= 'A' && text[ index + 1 ] <= 'Z') ||

(text[ index + 1 ] >= 'a' && text[ index + 1 ] <= 'z') ||

(text[ index + 1 ] >= '0' && text[ index + 1 ] <= '9'))));

};

// 查找出不符合写入 token 的字符的查询指令var notEligibleQuery = from @char in text

where split( @char )

select new { NotEligible = @char , Index = index };

Repostory repo = NotEligibleQueryAndPutTheNotEligibleCharIntoRepostory( notEligibleQuery );

return Parser.Split( text , repo.ToArray( ) );

}

/// /// 执行 notEligibleQuery 查询,并把它存储到 Repostory 中(每个字符仅存储一次,键-值唯一性)/// /// 查找不符合写入 token 的字符的查询指令private static Repostory NotEligibleQueryAndPutTheNotEligibleCharIntoRepostory( IEnumerable notEligibleQuery ) {

Repostory repo = new Repostory( );

foreach (var item in notEligibleQuery) {

try {

Repostory node = new Repostory( item.Index , item.NotEligible );

if (!repo.Contains( node.Value )) {

repo.Add( item.Index , item.NotEligible );

}

}

catch (NullReferenceException) {

repo.Add( item.Index , item.NotEligible );

}

}

return repo;

}

/// /// 返回一个包含了由指定的字符数组的元素分割出来的子字符串的字符串数组/// /// 执行分割的字符串源/// 指定分割字符的分割器/// private static string[ ] Split( string source , params char[ ] separator ) {

var substring = new string[ 0 ];

var tokenIndex = substring.Length;

var splitSwitch = true;

for (var index = 0 ; index < source.Length ; index++) {

if (!separator.Contains( source[ index ] )) {

if (substring.Length > 0) {

if (splitSwitch) {

splitSwitch = false;

var previous = substring;

substring = new string[ previous.Length + 1 ];

for (var subindex = 0 ; subindex < previous.Length ; subindex++) {

substring[ subindex ] = previous[ subindex ];

}

substring[ substring.Length - 1 ] += source[ index ];

}

else {

substring[ substring.Length - 1 ] += source[ index ];

}

}

else {

if (splitSwitch) {

splitSwitch = false;

substring = new string[ substring.Length + 1 ];

substring[ 0 ] += source[ index ];

}

else {

substring[ 0 ] += source[ index ];

}

}

}

else {

splitSwitch = true;

}

}

return substring;

}

}

/// /// int 类型的自定义封装类,也是泛型类 Repostory 的类型参数的约束类型,/// 该类的设计目的是为了解决 Repostory 中使用了类型参数的成员无法使用运算符的问题。/// class Integer {

private int _data;

public Integer( int item ) {

this._data = item;

}

public static implicit operator int( Integer integer ) => integer._data;

public static implicit operator Integer( int @int ) => new Integer( @int );

public static bool operator ==( Integer left , Integer right ) {

return left._data == right._data;

}

public static bool operator !=( Integer left , Integer right ) {

return left._data != right._data;

}

public override bool Equals( object obj ) {

return this._data == ((Integer)obj)._data;

}

public override int GetHashCode( ) {

return base.GetHashCode( );

}

public override string ToString( ) {

return this._data.ToString( );

}

}

/// /// 该类是一个链表,使用键/值结构来存储 notEligibleQuery 查询出来的字符及其在数据源中的索引/// /// 键的类型/// 值的类型class Repostory where K : Integer {

private K _key;

public K Key

{

get

{

return _key;

}

}

private V _value;

public V Value

{

get

{

return _value;

}

}

private int _count;

public int Count

{

get

{

return _count;

}

}

private Repostory _node;

/// /// 返回当前节点的子节点/// public Repostory Node

{

get

{

return this._node;

}

}

public Repostory( ) { }

public Repostory( K k , V v ) {

this._key = k;

this._value = v;

}

/// /// 添加一个节点/// /// /// public void Add( K key , V value ) {

Repostory current = new Repostory( key , value );

if (this._node == null) {

this._node = current;

}

else {

current._node = this._node;

this._node = current;

}

this._count++;

}

/// /// 确定容器是否存在某节点的值/// /// /// public bool Contains( V value ) {

try {

foreach (var node in this) {

if (node.Value.Equals( value )) {

return true;

}

}

return false;

}

catch (NullReferenceException) {

return false;

}

}

/// /// 获取一个迭代器/// /// public IEnumerator> GetEnumerator( ) {

if (this._node != null) {

Repostory current = this._node;

while (current != null) {

yield return current;

current = current._node;

}

}

else {

throw new NullReferenceException( "Repostory 不存在任何元素。" );

}

}

/// /// 把整个 Repository 转换成数组/// /// public V[ ] ToArray( ) {

V[ ] arr = new V[ this._count ];

var index = arr.Length - 1;

foreach (var item in this) {

arr[ index ] = item._value;

index--;

}

return arr;

}

/// /// 键索引器,返回键对应的值/// /// /// public V this[ K key ]

{

get

{

Repostory current = this._node;

while (current != null) {

if (current.Key == key) {

return current.Value;

}

else {

current = current._node;

}

}

throw new InvalidOperationException( "元素不存在。" );

}

}

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值