在c/c++中,可以使用含位域的结构体来实现存储的优化。最近的项目中因为一个需求简单写了个测试程序,暂时放在这里。
需求的大概是这样的,用32位的空间存储一个文本中词语的位置信息,这个位置信息是具有维度的:常用的是3维形式,即段、段内句、句内词这样记录。一些相关的运算及索引的压缩,都是在这种空间结构之上(以前做压缩时称之为整数向量的压缩)。
但有一个特殊的地方是,如果仅仅是访问这三个维度的信息,则诸如如下形式已经可以满足要求:
struct Pos_3
{
_UINT 32 m _Word : WORD_BIT_NUM;
_UINT 32 m _Sent : SENT_BIT_NUM;
_UINT 32 m _Para : PARA_BIT_NUM;
};
但在这个应用中,也常常需要把任意两个相临的分量作为一个整体来访问,即取“段句”存储空间的内容,或“句词”存储空间的内容。
所以这个测试程序中,又使用了联合体。
#define PARA_BIT_NUM 14 //段落存储bit数
#define SENT_BIT_NUM 10 //句子存储bit数
#define WORD_BIT_NUM 8 //词语存储bit数
#define _MEM_ADDR_FROW_LOW
typedef unsigned long _UINT32;
union _UINT32_Pos_3 //三维Pos
{
struct Pos_3_Access_Main
{
#ifdef _MEM_ADDR_FROW_LOW
_UINT 32 m _Word : WORD_BIT_NUM;
_UINT 32 m _Sent : SENT_BIT_NUM;
_UINT 32 m _Para : PARA_BIT_NUM;
#else
_UINT 32 m _Para : PARA_BIT_NUM;
_UINT 32 m _Sent : SENT_BIT_NUM;
_UINT 32 m _Word : WORD_BIT_NUM;
#endif
} p_s_w;
struct Pos_3_Access_Help1
{
#ifdef _MEM_ADDR_FROW_LOW
_UINT 32 m _Word : WORD_BIT_NUM;
_UINT 32 m _ParaSent : PARA_BIT_NUM+SENT_BIT_NUM;
#else
_UINT 32 m _ParaSent : PARA_BIT_NUM+SENT_BIT_NUM;
_UINT 32 m _Word : WORD_BIT_NUM;
#endif
} ps_w;
struct Pos_3_Access_Help2
{
#ifdef _MEM_ADDR_FROW_LOW
_UINT 32 m _SentWord : SENT_BIT_NUM+WORD_BIT_NUM;
_UINT 32 m _Para : PARA_BIT_NUM;
#else
_UINT 32 m _Para : PARA_BIT_NUM;
_UINT 32 m _SentWord : SENT_BIT_NUM+WORD_BIT_NUM;
#endif
} p_sw;
};
int _tmain(int argc, _TCHAR* argv[])
{
_UINT32 i32 = 0;
_UINT32_Pos_3* p = (_UINT32_Pos_3*)&i32;
// (p,s,w) = (1,2,3)
printf("(p,s,w) = (1,2,3)/n");
p->p_s_w.m_Para = 1;
p->p_s_w.m_Sent = 2;
p->p_s_w.m_Word = 3;
printf("_UINT32_Pos_3 's memval: %xh/n",p->p_s_w);
printf(" .para: %xh/n",p->p_s_w.m_Para);
printf(" .sent: %xh/n",p->p_s_w.m_Sent);
printf(" .word: %xh/n",p->p_s_w.m_Word);
printf(" .parasent: %xh/n",p->ps_w.m_ParaSent);
printf(" .sentword: %xh/n/n",p->p_sw.m_SentWord);
// (p,s,w) = (1,0,3)
printf("(p,s,w) = (1,0,3)/n");
p->p_s_w.m_Para = 1;
p->p_s_w.m_Sent = 0;
p->p_s_w.m_Word = 3;
printf("_UINT32_Pos_3 's memval: %xh/n",p->p_s_w);
printf(" .para: %xh/n",p->p_s_w.m_Para);
printf(" .sent: %xh/n",p->p_s_w.m_Sent);
printf(" .word: %xh/n",p->p_s_w.m_Word);
printf(" .parasent: %xh/n",p->ps_w.m_ParaSent);
printf(" .sentword: %xh/n/n",p->p_sw.m_SentWord);
// (p++,s++,w++)
printf("(p++,s++,w++)/n");
p->p_s_w.m_Para ++;
p->p_s_w.m_Sent ++;
p->p_s_w.m_Word ++;
printf("_UINT32_Pos_3 's memval: %xh/n",p->p_s_w);
printf(" .para: %xh/n",p->p_s_w.m_Para);
printf(" .sent: %xh/n",p->p_s_w.m_Sent);
printf(" .word: %xh/n",p->p_s_w.m_Word);
printf(" .parasent: %xh/n",p->ps_w.m_ParaSent);
printf(" .sentword: %xh/n/n",p->p_sw.m_SentWord);
return 0;
}
测试的输出结构如下:
(p,s,w) = (1,2,3)
_UINT32_Pos_3 's memval: 40203h
.para: 1h
.sent: 2h
.word: 3h
.parasent: 402h
.sentword: 203h
(p,s,w) = (1,0,3)
_UINT32_Pos_3 's memval: 40003h
.para: 1h
.sent: 0h
.word: 3h
.parasent: 400h
.sentword: 3h
(p++,s++,w++)
_UINT32_Pos_3 's memval: 80104h
.para: 2h
.sent: 1h
.word: 4h
.parasent: 801h
.sentword: 104h
Press any key to continue