pfx 是前缀prefix
sfx 是后缀suffix
final.txt ,每行是电话前 7 位,姓名,编号,共10w行
phoneMd5ed.txt 是md5后的电话,共32.3g
因为计算md5是纯计算,所有用4个线程计算md5后加入队列,再一个线程写入
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.util.*;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
public class Test {
public static volatile boolean done = false;
public static void main(String[] args) throws Exception {
//四位连续
List<String> seq = new LinkedList<>();
for (int i = 0; i <7 ; i++) {
String one = String.valueOf(i) + (i + 1) + (i + 2) + (i + 3);
seq.add(one);
}
System.out.println(seq);
//四位连续反转
List<String> seqReverse = new LinkedList<>();
for(String one:seq){
seqReverse.add(new StringBuilder(one).reverse().toString());
}
System.out.println(seqReverse);
//四位相同
List<String> fourDigit = new LinkedList<>();
for (int i = 0; i <10 ; i++) {
String one = String.valueOf(i) + i + i + i;
fourDigit.add(one);
}
System.out.println(fourDigit);
//两位数,把前一个与后一个各复制两个,得到三位相同的四位数
List<String> twoDigit = new LinkedList<>();
for (int i = 0; i <100 ; i++) {
String one;
if (i<10){
one = "0" + i;
}else{
one = String.valueOf(i);
}
twoDigit.add(one);
}
System.out.println(twoDigit);
List<String> threeDigit = new LinkedList<>();
for(String one:twoDigit){
char pfx = one.charAt(0);
String pfxone = String.valueOf(pfx) + pfx + one;
char sfx = one.charAt(1);
String onesfx = one + sfx + sfx;
threeDigit.add(pfxone);
threeDigit.add(onesfx);
}
System.out.println(threeDigit);
System.out.println();
//去重
Set<String> set = new HashSet<>(seq);
set.addAll(seqReverse);
set.addAll(fourDigit);
set.addAll(threeDigit);
System.out.println(set);
//得到不包含以上规则的四位数
Set<String> phonesfx = new HashSet<>();
for (int i = 0; i < 10000; i++) {
StringBuilder sb = new StringBuilder();
if(i<10){
sb.append(0).append(0).append(0).append(i);
}else if(i<100){
sb.append(0).append(0).append(i);
}else if(i<1000){
sb.append(0).append(i);
}else{
sb.append(i);
}
String one = sb.toString();
if (!set.contains(one)){
phonesfx.add(one);
}
}
System.out.println(phonesfx.size());
System.out.println(phonesfx);
List<String> lines = Files.readAllLines(Paths.get("final.txt"));
System.out.println(lines.size());
Set<String> pfxphone = new HashSet<>();
for(String one:lines){
if(one == null || one.length()==0) continue;
String[] ones = one.split("\\s+");
// System.out.println(ones[0]);
pfxphone.add(ones[0]);
}
System.out.println(pfxphone.size());
FileOutputStream fos = new FileOutputStream("phoneMd5ed.txt");
OutputStreamWriter w = new OutputStreamWriter(fos, "UTF-8");
BufferedWriter bw = new BufferedWriter(w);
BlockingQueue<String> phoneQueue = new LinkedBlockingQueue<>(100000);
BlockingQueue<String> md5Queue = new LinkedBlockingQueue<>(100000);
for (int i = 0; i < 4; i++) {
new Thread(()->{
try {
while (!done){
String phone = phoneQueue.take();
String md5Str= md5(phone);
md5Queue.put(md5Str);
}
} catch (InterruptedException e) {
e.printStackTrace();
}
}).start();
}
new Thread(()->{
try {
while (!done){
String md5Str = md5Queue.take();
bw.write(md5Str);
bw.newLine();
}
} catch (Exception e) {
e.printStackTrace();
}
}).start();
int count = 0;
for(String one:pfxphone){
for(String two:phonesfx){
String phone = one+two;
phoneQueue.put(phone);
count++;
if (count % 100000 == 0){
System.out.println(count);
}
}
}
done = true;
System.out.println("done");
bw.flush();
bw.close();
}
public static String md5(String dataStr) {
try {
MessageDigest m = MessageDigest.getInstance("MD5");
m.update(dataStr.getBytes("UTF8"));
byte s[] = m.digest();
String result = "";
for (int i = 0; i < s.length; i++) {
result += Integer.toHexString((0x000000FF & s[i]) | 0xFFFFFF00).substring(6);
}
return result;
} catch (Exception e) {
e.printStackTrace();
}
return "";
}
}