字典树 + 并查集 + 滑动窗口 + 优先队列 demo

统计一段时间内(500)单词出现的频率

单词最长为10

void init();
初始化函数

void searchWord(int tsec, char str[], int num);
在tsec时刻,搜索了某一个单词str, 它的频率是num

void banWord(int tsec, char str[]);
在tsec时刻,ban到了单词str,在以后的rank中不会再出现str

void mergeWords(int tsec, char str1[], char str2[]);
在tsec时刻,合并单词,str2不在出现在rank中,且str2的频率都计算到str1中,相当于父子关系

int getRank10(int, char[][MAX_WORD_LEN + 1]);
在tsec时刻,返回该时刻以及前500s内的所有单词出现频率最高的单词。

#define MAX_WORD_LEN 10
#define USEFUL 0
#define BAN 1
#define MERGED 2

static void mstrcpy(char dest[], const char src[])
{
	int i = 0;
	while ((dest[i] = src[i]) != '\0') i++;
}

static bool check(char str1[], char str2[]){
	int i = 0;
	while (str1[i] != '\0' && str1[i] == str2[i]) i++;

	if (str2[i] == '\0') return false;
	return str1[i] < str2[i] ? true : false;
}

//>>>>>>>>>>>>>>>>>DATA struct>>>>>>>>>>>>>>
struct Node
{
	int state;
	Node* fathe;
	int val;
	int isRank;  //是不是前10
	char word[11];
	Node* keys[26];
};

struct SearchInfo
{
	int timeId;
	Node* ptr;
	int fre; //frequency
};

struct RankNode{
	Node* ptr;
	char word[11];

	RankNode* front;
	RankNode* next;
};
//>>>>>>>>>>>>>>>>>DATA struct>>>>>>>>>>>>>>

//>>>>>>>>>>>>>>>>Global Value>>>>>>>>>>>

//-----saerch info
SearchInfo searchWindow[50000]; int f = 0, r = 0;
//-----word list
Node wordSet[50000] = { 0 };	int countWord = 0;	Node* root;
//----searh links
RankNode links[3501]; int countLink = 0;
//<<<<<<<<<<<<<<<<Global Value<<<<<<<<<<<<<<<<<<

//>>>>>>>>>>>>>>>Common func>>>>>>>>>>>>>>>>>
Node* getNode(char str[]){
	Node* p = root;
	int i = 0;
	while (str[i] != '\0')
	{
		if (p->keys[str[i] - 'a'] == nullptr){
			Node*s = &wordSet[countWord++];
			p->keys[str[i] - 'a'] = s;
		}
		p = p->keys[str[i] - 'a'];
		++i;
	}
	return p;
}

Node* getRoot(Node* p){
	while (p->fathe != nullptr)
	{
		p = p->fathe;
	}
	return p;
}

void outWindow(int tsec){
	while (f < r && searchWindow[f].timeId < tsec - 500){
		Node* n = searchWindow[f].ptr;
		n->val = n->val - searchWindow[f].fre;

		if (n->state == MERGED){
			Node* orignal = getRoot(n);
			orignal->val = orignal->val - searchWindow[f].fre;
		}
		f++;
	}
}
//<<<<<<<<<<<<<<Common func<<<<<<<<<<<<<<<<<<<<<<

void init(){
	f = 0; r = 0;
	for (int i = 0; i < countWord; ++i){ //clear word list
		wordSet[i].fathe = nullptr;  wordSet[i].state = USEFUL; 	wordSet[i].val = 0;
		//for (int j = 0; j < 26; j++) wordSet[i].keys[j] = nullptr; 
	}
	//优化3 减少不必要的操作
	//countWord = 0;
	root = &wordSet[countWord++];
}

void searchWord(int tsec, char str[], int num){
	//in Window
	Node* n = getNode(str);
	if (n->state != BAN){
		searchWindow[r].timeId = tsec; searchWindow[r].fre = num;
		mstrcpy(n->word, str); searchWindow[r++].ptr = n;
		//动态更新节点val
		n->val = n->val + num;
		if (n->fathe != nullptr){
			n = getRoot(n); //it's original
			if (n->state != BAN) n->val = n->val + num;
		}
	}
	//out Window
	outWindow(tsec);
}

void banWord(int tsec, char str[]){
	Node* n = getNode(str);
	//动态更新节点val
	if (n->state == MERGED && n->val != 0){
		Node* o = getRoot(n);
		o->val = o->val - n->val;
	}
	n->state = BAN;
	n->fathe = nullptr;
	outWindow(tsec);
}

void mergeWords(int tsec, char str1[], char str2[]){
	Node* n1 = getNode(str1);
	mstrcpy(n1->word, str1);//merge 可以新建节点

	Node* n2 = getNode(str2);//优化2 不关心merge后的复杂情况
	Node* o = getRoot(n1);
	n2->fathe = o;
	n2->state = MERGED;
	
	//动态更新节点val
	o->val = o->val + n2->val;

	outWindow(tsec);
}

int  getRank10(int tsec, char cur_rank[][MAX_WORD_LEN + 1]){
	outWindow(tsec);
	countLink = 0;  int ret = 0;

	RankNode* head = &links[countLink++];  head->next = nullptr; head->front = nullptr;
	int ff = f;
	while (f < r){
		Node* n = searchWindow[f].ptr;
		//优化1: 减少memcopy
		if (n->state == MERGED) n = getRoot(n);

		if (n->state == USEFUL && n->isRank == 0){
			if (ret < 10){
				//insert to link
				RankNode* p = head;
				//减少函数压入栈的时间 相当于内联函数
				while (p->next)
				{
					if (n->val > p->next->ptr->val || (n->val == p->next->ptr->val && check(n->word, p->next->word)))
						p = p->next;
					else break;
				}

				RankNode* s = &links[countLink++];
				s->ptr = n;  mstrcpy(s->word, n->word);

				s->front = p; s->next = p->next;
				if (p->next != nullptr) p->next->front = s;
				p->next = s;

				n->isRank = 1;
				ret++;
			}
			else{
				//check the tail
				RankNode* p = head->next;

				if (p->ptr->val < n->val || (n->val == p->ptr->val && check(n->word, p->word))){
					while (p->next)
					{
						if (n->val > p->next->ptr->val || (n->val == p->next->ptr->val && check(n->word, p->next->word)))
							p = p->next;
						else break;
					}

					RankNode* s = &links[countLink++];
					s->ptr = n;  mstrcpy(s->word, n->word);

					s->front = p; s->next = p->next;
					if (p->next != nullptr) p->next->front = s;
					p->next = s;

					n->isRank = 1;
					head->next->ptr->isRank = 0;
					head->next = head->next->next;
				}
			}
		}

		f++;
	}
	f = ff;
	//copy to the result
	RankNode* t = head->next;
	for (int i = 0; i < ret; ++i){
		mstrcpy(cur_rank[ret - 1 - i], t->word);
		t->ptr->isRank = 0;
		t = t->next;
	}

	return ret;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值