在学习save()函数时,有一点疑惑是对于修改表的疑惑,是不是修改表中只是存储修改的词?对于没有变化的不作处理。如果是这样那就能解释的通了。
首先原表public ArrayList<WordTable> wts表示;修改表public ArrayList<ModifyTable> mts;对于删除的词,修改表中它的频率会设为-1,我看源代码
修改表中没有加入新的字(即新的大数据块)因为mts 的大小要么为 0 要么与 wts 相同,即 CC_NUM 大小.现在说存储过程也就是,对原表和修改表进行遍历
的过程:
现将代码粘贴如下(java版)
for (int i = 0; i < Utility.CC_NUM; i++) {
if (mts != null) {// Modification made
int nCount = wts.get(i).getCount() + mts.get(i).getCount() - mts.get(i).getDelete();
out.write(GFCommon.int2bytes(nCount, false));
j = 0;
k = 0;
// Output to the file after comparision
for (; j < mts.get(i).getCount() && k < wts.get(i).getCount();) {
WordItem mwi = mts.get(i).getWords().get(j);
WordItem wi = wts.get(i).getWords().get(k);
//如果修改表中的词长度小于原表中对应位置的词的长度或者长度相等但nHandle值比原表中的小,则把修改表中的写入到词典文件当中.
if (mwi.getLen() < wi.getLen() || (strEqual(mwi.getWord(), wi.getWord()))
&& mwi.getHandle() < wi.getHandle()) {
nBuffer[0] = mwi.getFreq();
nBuffer[1] = mwi.getLen();
nBuffer[2] = mwi.getHandle();
for (int n : nBuffer)
out.write(GFCommon.int2bytes(n, false));
if (nBuffer[1] > 0)// String length is more than 0
out.write(mwi.getWord().getBytes());
j++;
} else if (mwi.getFreq() == -1) {
//频度nFrequecy等于-1说明该词已被删除,跳过它
// The item has been removed,so skip it
k++;
} else if (mwi.getLen() > wi.getLen() || strEqual(mwi.getWord(), wi.getWord())
&& mwi.getHandle() > wi.getHandle()) {
//如果修改表中的词长度比原表中的长度大或 长度相等但句柄值要多,就把原表的词写入的词典文件中
nBuffer[0] = wi.getFreq();
nBuffer[1] = wi.getLen();
nBuffer[2] = wi.getHandle();
for (int n : nBuffer)
out.write(GFCommon.int2bytes(n, false));
if (nBuffer[1] > 0)// String length is more than 0
out.write(wi.getWord().getBytes());
k++;// Get next item in the original table.
}
}
//把原表中剩余的词写入的词典文件当中
if (k < wts.get(i).getCount()) {
for (; k < wts.get(i).getCount();) {
WordItem wi = wts.get(i).getWords().get(k);
// Has been deleted
if (wi.getFreq() != -1) {
nBuffer[0] = wi.getFreq();
nBuffer[1] = wi.getLen();
nBuffer[2] = wi.getHandle();
for (int n : nBuffer)
out.write(GFCommon.int2bytes(n, false));
// String length is more than 0
if (nBuffer[1] > 0)
out.write(wi.getWord().getBytes());
}
k++;// Get next item in the original table.
}
} else
//原表已到尾部但修改表还没有遍历完,把修改表中剩余的词写入到词典文件当
for (; j < mts.get(i).getCount();) {
WordItem wi = mts.get(i).getWords().get(j);
nBuffer[0] = wi.getFreq();
nBuffer[1] = wi.getLen();
nBuffer[2] = wi.getHandle();
for (int n : nBuffer)
out.write(GFCommon.int2bytes(n, false));
if (nBuffer[1] > 0)// String length is more than 0
out.write(wi.getWord().getBytes());
}
} else {
//不是修改标记,则把原表的数据全部写入到词典文件当中
out.writeInt(wts.get(i).getCount());
for (j = 0; j < wts.get(i).getCount(); j++) {
WordItem wi = wts.get(i).getWords().get(j);
nBuffer[0] = wi.getFreq();
nBuffer[1] = wi.getLen();
nBuffer[2] = wi.getHandle();
for (int n : nBuffer)
out.write(GFCommon.int2bytes(n, false));
if (nBuffer[1] > 0)// String length is more than 0
out.write(wi.getWord().getBytes());
}
}
}
这是自己的理解如果有错欢迎指正也可以参考:http://blog.csdn.net/sinboy/article/details/624909感觉挺好的