#coding=utf-8
import urllib2, re, urllib
def shangq(key):
pattern = re.compile(r'"word":"([^"]+)","total":(\d+)')
url = 'http://shangqing.baidu.com/recomword/recomWordCache_findRecomWord.htm?area_id=&word=' + urllib.quote_plus(key)
try:
req = urllib2.Request(url)
html = urllib2.urlopen(req, timeout=5).read()
segs = pattern.findall(html)
except Exception, e:
segs = [(key, e)]
return segs
waci = '墙板机'#扩展关键词
waci_count = 500
ret = [waci]
ret1 = set(ret)#好像set查找比较快
fout = open('itseo_waci.txt', 'w')
fout.write('id\tkey\tindex\n')
k = 0
for i in ret:
if k >= waci_count:
break
segs = shangq(i)
for key, index in segs:
# if not key in ret1 and waci in key:
if not key in ret1:
k += 1
ret.append(key)
ret1.add(key)
line = '%d\t%s\t%s' % (k, key, index)
fout.write(line + '\n')
print line
fout.close()
print 'end!'
import urllib2, re, urllib
def shangq(key):
pattern = re.compile(r'"word":"([^"]+)","total":(\d+)')
url = 'http://shangqing.baidu.com/recomword/recomWordCache_findRecomWord.htm?area_id=&word=' + urllib.quote_plus(key)
try:
req = urllib2.Request(url)
html = urllib2.urlopen(req, timeout=5).read()
segs = pattern.findall(html)
except Exception, e:
segs = [(key, e)]
return segs
waci = '墙板机'#扩展关键词
waci_count = 500
ret = [waci]
ret1 = set(ret)#好像set查找比较快
fout = open('itseo_waci.txt', 'w')
fout.write('id\tkey\tindex\n')
k = 0
for i in ret:
if k >= waci_count:
break
segs = shangq(i)
for key, index in segs:
# if not key in ret1 and waci in key:
if not key in ret1:
k += 1
ret.append(key)
ret1.add(key)
line = '%d\t%s\t%s' % (k, key, index)
fout.write(line + '\n')
print line
fout.close()
print 'end!'