# -*- encoding:UTF-8 -*-
#以指定的时间间隔获取CSDN押宝游戏的结果
#网址:http://community.csdn.net/Games/GamePawn.aspx?id=395&_t_fq8zksda
import sys, urllib
import re
import chardet
def getCharset(string):
#todo : automatic discern the charset
charset = chardet.detect(string)
return charset['encoding']
url = "http://community.csdn.net/Games/GamePawn.aspx?id=395&_t_fq8zksda"
#网页地址
wp = urllib.urlopen(url)
#打开连接
pattern = "(/d+)[/s/t]+/"
p = re.compile(pattern, re.S | re.M);
#匹配串的字符集
print getCharset(pattern)
#获取页面内容
while 1:
content = wp.read()
if not content:
break
content = unicode(content, getCharset(content)).encode("gb2312")
m = p.findall(content)[0:]
de