晚上看到有算法分享关于怎么在10万个手机号码中选择重复号码的问题。
刚好晚上有空,也写了一个算法。
View Code
Dictionary
<
int
,
int
>
dic
=
new
Dictionary
<
int
,
int
>
();
int count3 = 0 ;
foreach (var item in mobileArray)
{
var hashCode = item.GetHashCode();
int outInt = 0 ;
if (dic.TryGetValue(hashCode, out outInt))
{
if (outInt == 1 )
{
count3 ++ ;
dic[hashCode] = 2 ;
}
}
else
dic[hashCode] = 1 ;
}
int count3 = 0 ;
foreach (var item in mobileArray)
{
var hashCode = item.GetHashCode();
int outInt = 0 ;
if (dic.TryGetValue(hashCode, out outInt))
{
if (outInt == 1 )
{
count3 ++ ;
dic[hashCode] = 2 ;
}
}
else
dic[hashCode] = 1 ;
}
有下面几点需要注意:
- Dictionary的Key本身是hash,效率很高
- 相同的字符串在.net实际上是同一个地址,所以GetHashCode是一样的。
效果:
欢迎各位高手弄出个更快的算法
所有代码
View Code
using
System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace 手机号码重复算法
{
unsafe class Program
{
static void Main( string [] args)
{
// 示例数组,存放手机号
string [] mobileArray = new string [ 100000 ]; // { "13900001234", "13900001235", "13900001236", "13900001237", "13900001234" };
for ( int i = 0 ; i < 100000 ; i ++ )
{
mobileArray[i] = " 1390000 "
+ (i.ToString().Length > 4 ? i.ToString().Substring( 0 , 4 ) : (i.ToString() + " 0000 " ).Substring( 0 , 4 ));
}
/// /linq语句来实现【select mobile from tmpTable group by mobile having count(*)>1】的效果
var selMobile = from n in mobileArray group n by n into g where g.Count() > 1 select g.Distinct(); // select g;
System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
sw.Reset();
sw.Start();
int count1 = 0 ;
// 通过两层循环输出重复的手机号
foreach (var mobile in selMobile)
{
foreach ( string multiMobile in mobile)
{
count1 ++ ;
// Console.WriteLine(multiMobile);
}
}
sw.Stop();
Console.WriteLine( " Linq共有重复号 " + count1 + " 耗时 " + sw.ElapsedMilliseconds);
TenNodeTree tree = new TenNodeTree();
TenNodeTree tree2 = new TenNodeTree();
sw.Reset();
sw.Start();
int count2 = 0 ;
// mobileArray = new string[] { "13900001234", "13900001235", "13900001236", "13900001237", "13900001234", "13900001236" };
foreach (var item in mobileArray)
{
fixed ( char * no = item)
{
if ( ! tree.Add(no, 11 ))
{
if (tree2.Add(no, 11 ))
{
count2 ++ ;
}
}
}
}
sw.Stop();
Console.WriteLine( " 十叉树共有重复号 " + count1 + " 耗时 " + sw.ElapsedMilliseconds);
sw.Restart();
Dictionary < int , int > dic = new Dictionary < int , int > ();
int count3 = 0 ;
foreach (var item in mobileArray)
{
var hashCode = item.GetHashCode();
int outInt = 0 ;
if (dic.TryGetValue(hashCode, out outInt))
{
if (outInt == 1 )
{
count3 ++ ;
dic[hashCode] = 2 ;
}
}
else
dic[hashCode] = 1 ;
}
sw.Stop();
Console.WriteLine( " hash计算共有重复号 " + count3 + " 耗时 " + sw.ElapsedMilliseconds);
Console.ReadLine();
}
class TenNodeTree
{
public TenNode Root = new TenNode();
public bool Add( char * no, int len)
{
TenNode cnode = Root;
bool isadd = false ;
for ( int i = 0 ; i < len; i ++ )
{
char k = * no;
if (cnode.Child[k - 48 ] == null )
{
isadd = true ;
cnode.Child[k - 48 ] = new TenNode();
}
cnode = cnode.Child[k - 48 ];
no ++ ;
}
return isadd;
}
}
class TenNode
{
public TenNode[] Child = new TenNode[ 10 ];
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace 手机号码重复算法
{
unsafe class Program
{
static void Main( string [] args)
{
// 示例数组,存放手机号
string [] mobileArray = new string [ 100000 ]; // { "13900001234", "13900001235", "13900001236", "13900001237", "13900001234" };
for ( int i = 0 ; i < 100000 ; i ++ )
{
mobileArray[i] = " 1390000 "
+ (i.ToString().Length > 4 ? i.ToString().Substring( 0 , 4 ) : (i.ToString() + " 0000 " ).Substring( 0 , 4 ));
}
/// /linq语句来实现【select mobile from tmpTable group by mobile having count(*)>1】的效果
var selMobile = from n in mobileArray group n by n into g where g.Count() > 1 select g.Distinct(); // select g;
System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
sw.Reset();
sw.Start();
int count1 = 0 ;
// 通过两层循环输出重复的手机号
foreach (var mobile in selMobile)
{
foreach ( string multiMobile in mobile)
{
count1 ++ ;
// Console.WriteLine(multiMobile);
}
}
sw.Stop();
Console.WriteLine( " Linq共有重复号 " + count1 + " 耗时 " + sw.ElapsedMilliseconds);
TenNodeTree tree = new TenNodeTree();
TenNodeTree tree2 = new TenNodeTree();
sw.Reset();
sw.Start();
int count2 = 0 ;
// mobileArray = new string[] { "13900001234", "13900001235", "13900001236", "13900001237", "13900001234", "13900001236" };
foreach (var item in mobileArray)
{
fixed ( char * no = item)
{
if ( ! tree.Add(no, 11 ))
{
if (tree2.Add(no, 11 ))
{
count2 ++ ;
}
}
}
}
sw.Stop();
Console.WriteLine( " 十叉树共有重复号 " + count1 + " 耗时 " + sw.ElapsedMilliseconds);
sw.Restart();
Dictionary < int , int > dic = new Dictionary < int , int > ();
int count3 = 0 ;
foreach (var item in mobileArray)
{
var hashCode = item.GetHashCode();
int outInt = 0 ;
if (dic.TryGetValue(hashCode, out outInt))
{
if (outInt == 1 )
{
count3 ++ ;
dic[hashCode] = 2 ;
}
}
else
dic[hashCode] = 1 ;
}
sw.Stop();
Console.WriteLine( " hash计算共有重复号 " + count3 + " 耗时 " + sw.ElapsedMilliseconds);
Console.ReadLine();
}
class TenNodeTree
{
public TenNode Root = new TenNode();
public bool Add( char * no, int len)
{
TenNode cnode = Root;
bool isadd = false ;
for ( int i = 0 ; i < len; i ++ )
{
char k = * no;
if (cnode.Child[k - 48 ] == null )
{
isadd = true ;
cnode.Child[k - 48 ] = new TenNode();
}
cnode = cnode.Child[k - 48 ];
no ++ ;
}
return isadd;
}
}
class TenNode
{
public TenNode[] Child = new TenNode[ 10 ];
}
}
}