来电归属地java_来电归属地数据查询Java实现

项目需要实现来电归属地查询,所以就找到了下面这个文章

原理主要在(一)和(二)中,作者的数据压缩思路很给力,将6M的原始文本数据压缩到400kb左右,原作者已经将分析讲的很清楚了,这里提炼一下要点,并将c++实现转化成java实现。

1、压缩主要通过减小数据冗余完成;

2、原始数据格式:手机号码前7位,城市名

3、数据冗余点,手机号码有递增规律、城市重复排列

4、将手机号码的排列转换为号码区间,城市名建索引

5、号码区间用2个short,城市索引用1个short

6、为了查询的效率,原始数据按号码递增排列,这样查询的时候能够用二分快速查找

下面是原文中的一张图,可以更好的理解数据的存储结构

0818b9ca8b590ca3270a3433284dd417.png

转换实现

private static void convertTXTtoDAT() {

File readFile = new File(CONVERT_TXT_NAME);

if (!readFile.exists()) {

System.out.println(CONVERT_TXT_NAME + " not exist");

return;

}

File writeFile = new File(OUTPUT_FILE_NAME);

try {

writeFile.createNewFile();

} catch (IOException e) {

e.printStackTrace();

}

if (!writeFile.exists()) {

System.out.println(OUTPUT_FILE_NAME + " not create!");

return;

}

CityCollector cityCollector = new CityCollector();

BufferedReader reader = null;

RandomAccessFile writer = null;

try {

reader = new BufferedReader(new FileReader(readFile));

writer = new RandomAccessFile(writeFile, "rw");

int count = 0;

writer.writeInt(count);

String data = reader.readLine();

String[] content = data.split(",");

int number = Integer.parseInt(content[0]);

String cityName = content[1];

int cityIndex = cityCollector.putCity(cityName);

NumberCompressor compressor = new NumberCompressor(number,

cityIndex);

while ((data = reader.readLine()) != null) {

content = data.split(",");

if (content.length != 2) {

continue;

}

number = Integer.parseInt(content[0]);

cityName = content[1];

cityIndex = cityCollector.putCity(cityName);

if (cityIndex == compressor.getCityIndex()

&& number - compressor.getCurrentNumber() == 1) {

compressor.increaseSkipNum();

} else {

writer.writeShort(compressor.getStartNum());

writer.writeShort(compressor.getAfterNum());

writer.writeShort(compressor.getCityIndex());

++count;

compressor = new NumberCompressor(number, cityIndex);

}

}

writer.writeShort(compressor.getStartNum());

writer.writeShort(compressor.getAfterNum());

writer.writeShort(cityIndex);

++count;

writer.seek(0);

writer.writeInt(count);

writer.seek(writer.length());

writer.write(cityCollector.getFormatCityByte());

} catch (IOException e) {

} finally {

if (reader != null) {

try {

reader.close();

} catch (IOException e) {

e.printStackTrace();

}

}

if (writer != null) {

try {

writer.close();

} catch (IOException e) {

e.printStackTrace();

}

}

}

}其中CityCollector是存储城市信息的工具类,按顺序存储城市名,并返回其索引,最后和号码数据一起存入文件

public class CityCollector {

/**

* 将城市存储长度固定,便于查找时的快速定位,可以按需要修改

*/

public static final int MAXCITYLENGTH = 34;

private ArrayList mCityList = new ArrayList();

public CityCollector() {

}

public byte[] getFormatCityByte() {

ByteBuffer buffer = ByteBuffer.allocate(mCityList.size() * MAXCITYLENGTH);

int size = mCityList.size();

for (int i = 0; i < size; ++i) {

buffer.position(MAXCITYLENGTH * i);

buffer.put(mCityList.get(i).getBytes());

}

return buffer.array();

}

public int putCity(String city) {

int cityIndex = mCityList.indexOf(city);

if (cityIndex != -1) {

return cityIndex;

} else {

mCityList.add(city);

return mCityList.size() - 1;

}

}

}NumberCompressor是为实现号码压缩存储的一个结构,包括一个号码区间的起始号,该区间的号码数量,和区间的城市索引(一个区间内的号码在同一个城市)

public class NumberCompressor {

private int mBeginNum;

private int mSkipNum = 0;

private int mCityIndex;

public NumberCompressor(int number, int cityIndex) {

mBeginNum = number;

mCityIndex = cityIndex;

}

public void increaseSkipNum() {

++mSkipNum;

}

public int getCurrentNumber() {

return mBeginNum + mSkipNum;

}

public int getCityIndex() {

return mCityIndex;

}

public int getStartNum() {

return mBeginNum / 100;

}

public int getAfterNum() {

return mBeginNum % 100 + mSkipNum * 100;

}

}查找就是存储的逆向过程,由于数据是有序存储的,所以这里查找用二分实现,如下

private static String searchNumberLocation(int number) {

File file = new File(OUTPUT_FILE_NAME);

RandomAccessFile readFile = null;

String result = null;

try {

readFile = new RandomAccessFile(file, "r");

int count = readFile.readInt();

int left = 0;

int right = count - 1;

int blockSize = 3 * 2; // 3 * short

while (left <= right) {

int middle = (left + right) / 2;

readFile.seek(4 + middle * blockSize); // 4 is the count(int)

// size

int firstNum = readFile.readShort();

int secondNum = readFile.readShort();

int beginNum = firstNum * 100 + secondNum % 100;

int endNum = beginNum + secondNum / 100;

if (number < beginNum) {

right = middle - 1;

} else if (number > endNum) {

left = middle + 1;

} else {

int cityIndex = readFile.readShort();

readFile.seek(4 + count * blockSize + cityIndex

* CityCollector.MAXCITYLENGTH);

byte[] b = new byte[CityCollector.MAXCITYLENGTH];

readFile.read(b);

result = new String(b).trim();

break;

}

}

} catch (FileNotFoundException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

}

if (readFile != null) {

try {

readFile.close();

} catch (IOException e) {

e.printStackTrace();

}

}

return result;

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值