1 #-*-coding:utf-8-*-2 3 classBTree:4 def__init__(self, data):5 self.l=None6 self.r=None7 self.data=data8 9 definsertl(self, ltree):10 self.l=ltree11 12 definsertr(self, rtree):13 self.r=rtree14 15 defprintTree(self, indent):16 ifisa(self.data, str):17 printindent+self.data18 else:19 l, op, r=self.data20 printindent+op,l,r21 22 23 #BNF语法树解析24 #Compare = > | < | =25 #Atom = Field Compare Number26 #SubSet = Atom|(Exp)*27 #Factor = SubSet(and SubSet)*28 #Exp = Factor(or Factor)*29 isa=isinstance30 conjunction=['and','or']31 compare=['>','','0):96 op=strSQL.pop(0)97 ifop=='and':98 atom=getSubSet(strSQL)99 else:100 strSQL.insert(0, op)101 break102 factorOP=BTree(op)103 factorOP.insertl(factor)104 factorOP.insertr(atom)105 factor=factorOP106 returnfactor107 108 defgetExp(strSQL):109 exp=getFactor(strSQL)110 while(len(strSQL)>0):111 op=strSQL.pop(0)112 ifop=='or':113 factor=getFactor(strSQL)114 else:115 raiseSyntaxError('keyword should be or')116 break117 expOP=BTree(op)118 expOP.insertl(exp)119 expOP.insertr(factor)120 exp=expOP121 returnexp122 123 ##########################124 #解析SQL字符串,得到语法树125 ##########################126 deftokenize(s):127 forkeyincompare:128 s=s.replace(key,''+key+'')129 returns.replace('(','(').replace(')',')').split()130 131 defread_fromPart(s):132 L=[]133 whilelen(s)>0:134 ch=s.pop(0)135 ifch==')':136 returnL137 elifch=='(':138 L.append(getPart(s))139 else:140 L.append(atom(ch))141 ifch!=')':142 raiseSyntaxError('unexpcted ) part')143 returnL144 145 defread_from(s):146 L=[]147 whilelen(s)>0:148 ch=s.pop(0)149 ifch==')':150 raiseSyntaxError('unexpcted )')151 returnL152 elifch=='(':153 L.append(read_fromPart(s))154 else:155 L.append(atom(ch))156 157 returnL158 159 defread(s):160 returnread_from(tokenize(s))161 162 #得到语法树163 defpharse(stringExp):164 strSQL=read(stringExp)165 exp=getExp(strSQL)166 returnexp167 168 ##########################169 #输出表达式170 ##########################171 INDENT=''172 defoutputExp(exp, indent):173 ifexp.data:174 exp.printTree(indent)175 ifexp.l:176 outputExp(exp.l, indent+INDENT)177 ifexp.r:178 outputExp(exp.r, indent+INDENT)179 180 defaddtowlist(l, r):181 result=[]182 forsubsetlinl:183 forsubsetrinr:184 foratominsubsetr:185 subsetl.append(atom)186 result.append(subsetl)187 returnresult188 189 ##########################190 #处理SQL表达式191 #将(A<10 OR A>20) AND (B<10 OR B>20)192 #变换成为:193 #(A<10 AND B<10) OR (A<10 AND B>20) OR (A>20 AND B<10) OR (A>20 AND B>20)194 ##########################195 defnormalizeExp(exp):196 ifnotexp.l:197 return[[exp.data]]198 l=normalizeExp(exp.l)199 r=normalizeExp(exp.r)200 op=exp.data201 result=[]202 ifop=='and':203 result=addtowlist(l, r)204 elifop=='or':205 forsubsetlinl:206 result.append(subsetl)207 forsubsetrinr:208 result.append(subsetr)209 returnresult210 211 ##########################212 #判断是否为子集213 ##########################214 defis_subset_atom_atom (left, right):215 assertleftandright216 ll, lop, lr=left217 rl, rop, rr=right218 assertlopincompareandropincompare219 ifll!=rl:220 returnFalse221 iflop==rop=='>':222 returnlr>=rr223 eliflop==rop=='':228 returnFalse229 eliflop=='':233 returnlr>rr234 elifrop=='10 不是 age>10 and weight>100的子集258 b=False259 iflen(fieldR)==len(fieldL):260 b=True261 262 ifb:263 #对于 age>10 and weight>10 和 age>20 and weight>20的子集264 #必须左子式的每个原子条件都是右子式任一原子条件的子集265 foratomlinleft:266 bSubset=False267 foratomrinright:268 bSubset=is_subset_atom_atom(atoml, atomr)269 ifbSubset==True:270 break271 ifbSubset==False:272 returnFalse273 returnTrue274 else:275 #对于 age>10和 age>20 and weight>20的子集276 #只要左子式的有原子条件都是右子式任一原子条件的子集,则左子式为右子式的子集277 foratomlinleft:278 bSubset=False279 foratomrinright:280 bSubset=is_subset_atom_atom(atoml, atomr)281 ifbSubset==True:282 returnTrue283 returnFalse284 285 defis_subset_exp(left, right):286 assertleftandright287 forsublinleft:288 b=False289 forsubrinright:290 b=is_subset_sub(subl, subr)291 ifb==True:292 break293 ifb==False:294 returnFalse295 296 returnTrue297 298 classQuery(object):299 def__init__(self, q):300 self._q=q301 self._tree=normalizeExp(pharse(q))302 assertself._tree303 304 defis_subset(self, other):305 ifnotself._tree:306 returnFalse307 ifnotother._tree:308 returnTrue309 returnis_subset_exp(self._tree, other._tree)310 311 312 if__name__=='__main__':313 t0=Query('age > 40')#中年人314 t1=Query('age > 18')#成年人315 printt0.is_subset(t0)316 printt0.is_subset(t1)317 printt1.is_subset(t0)318 printt1.is_subset(t1)319 print'-'*30320 321 t2=Query('age > 18 and weight < 100')#成年瘦子322 t3=Query('age > 18 or weight < 100')#成年人,或体重小于 100323 printt0.is_subset(t2)324 printt0.is_subset(t3)325 326 printt2.is_subset(t0)327 printt2.is_subset(t3)328 329 printt3.is_subset(t2)330 331 r0=Query('age > 30 and sex = 0')332 r1=Query('age > 40 and sex = 0')333 334 printr0.is_subset(r1)335 printr1.is_subset(r0)336 print'='*30337 338 t0=Query('(age < 15 and sex = 0) or age > 30')339 t1=Query('age < 7')340 t2=Query('age < 18')341 342 print'*'*30343 assert't0 is subset of t0:'andt0.is_subset(t0)==True344 print'-'*30345 assert't0 is subset of t1:'andt0.is_subset(t1)==False346 print'-'*30347 assert't1 is subset of t0:'andt1.is_subset(t0)==False348 print'-'*30349 assert't2 is subset of t0:'andt2.is_subset(t0)==False350 print'-'*30351 352 q0=Query('age < 15')353 q1=Query('age > 30')354 q2=Query('age > 18')355 q3=Query('age > 40')356 q4=Query('age > 30 and sex = 0')357 358 359 assert'q0 is subset of q0:'andq0.is_subset(q0)==True360 print'-'*30361 assert'q0 is subset of q1:'andq0.is_subset(q1)==False362 print'-'*30363 assert'q0 is subset of q2:'andq0.is_subset(q2)==False364 print'-'*30365 assert'q0 is subset of q3:'andq0.is_subset(q3)==False366 print'-'*30367 assert'q0 is subset of q4:'andq0.is_subset(q4)==False368 print'-'*30369 print370 371 assert'q1 is subset of q0:'andq1.is_subset(q0)==False372 print'-'*30373 assert'q1 is subset of q1:'andq1.is_subset(q1)==True374 print'-'*30375 assert'q1 is subset of q2:'andq1.is_subset(q2)==True376 print'-'*30377 assert'q1 is subset of q3:'andq1.is_subset(q3)==False378 print'-'*30379 assert'q1 is subset of q4:'andq1.is_subset(q4)==False380 print'-'*30381 print382 383 assert'q2 is subset of q0:'andq2.is_subset(q0)==False384 print'-'*30385 assert'q2 is subset of q1:'andq2.is_subset(q1)==False386 print'-'*30387 assert'q2 is subset of q2:'andq2.is_subset(q2)==True388 print'-'*30389 assert'q2 is subset of q3:'andq2.is_subset(q3)==False390 print'-'*30391 assert'q2 is subset of q4:'andq2.is_subset(q4)==False392 print'-'*30393 print394 395 assert'q3 is subset of q0:'andq3.is_subset(q0)==False396 print'-'*30397 assert'q3 is subset of q1:'andq3.is_subset(q1)==True398 print'-'*30399 assert'q3 is subset of q2:'andq3.is_subset(q2)==True400 print'-'*30401 assert'q3 is subset of q3:'andq3.is_subset(q3)==True402 print'-'*30403 assert'q3 is subset of q4:'andq3.is_subset(q4)==False404 print'-'*30405 print406 407 assert'q4 is subset of q0:'andq4.is_subset(q0)==False408 print'-'*30409 assert'q4 is subset of q1:'andq4.is_subset(q1)==True410 print'-'*30411 assert'q4 is subset of q2:'andq4.is_subset(q2)==True412 print'-'*30413 assert'q4 is subset of q3:'andq4.is_subset(q3)==False414 print'-'*30415 assert'q4 is subset of q4:'andq4.is_subset(q4)==True416 print'-'*30