python关键词排名_python打造批量关键词排名查询工具

自己做站点的时候,都看看收录和关键词排名什么的,所以打造的这个批量关键词查询工具。

#encoding:utf-8

import urllib,re,random,time,sys,StringIO,socket

try:

import pycurl

except:

pass

from bs4 import BeautifulSoup

score={1: 28.56,

2: 19.23,

3: 10.20,

4: 8.14,

5: 7.50,

6: 5.72,

7: 4.01,

8: 4.41,

9: 5.53,

10: 6.70,}

#获取根域名,百度产品直接显示子域名

def root_domain(url):

if ‘baidu.com‘ in url:

return url

else:

try:

url = url.replace(‘http://‘, ‘‘)

l = [‘.com.cn‘, ‘.org.cn‘, ‘.net.cn‘, ‘.gov.cn‘]

for suffix in l:

if suffix in url:

return re.search(‘^(.*?\..*?)*([^.]+?\.[^.]+?\.[^.]+)‘, url).group(2)

return re.search(‘^(.*?\..*?)*([^.]+?\.[^.]+)‘, url).group(2)

except:

return ‘-‘

def curl(url, debug=False, **kwargs):

list=[‘Mozilla/5.0 (Windows NT 5.1; rv:37.0) Gecko/20100101 Firefox/37.0‘,‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36‘,‘Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36‘]

randhead=random.sample(list,1)

while 1:

try:

s = StringIO.StringIO()

c = pycurl.Curl()

c.setopt(pycurl.URL, url)

c.setopt(pycurl.REFERER, url)

c.setopt(pycurl.FOLLOWLOCATION, True)

c.setopt(pycurl.TIMEOUT, 60)

c.setopt(pycurl.ENCODING, ‘gzip‘)

c.setopt(pycurl.USERAGENT, ‘%s‘%randhead[0])

c.setopt(pycurl.NOSIGNAL, True)

c.setopt(pycurl.WRITEFUNCTION, s.write)

for k, v in kwargs.iteritems():

c.setopt(vars(pycurl)[k], v)

c.perform()

c.close()

return s.getvalue()

except:

if debug:

raise

continue

def get_baidudata(keyword,rn):

search_url = ‘http://www.baidu.com/s?wd=%s&rn=%d‘%(urllib.quote(keyword),rn)

pagetext = curl(search_url) #获取百度搜索结果源代码

while ‘http://verify.baidu.com‘ in pagetext: #判断 如果查询过程中出现验证码则提示并停止10分钟,然后重新查询

print u"查询过程出现验证码,休息10分钟",keyword

time.sleep(600)

pagetext = curl(search_url)

else:

soup = BeautifulSoup(pagetext)

data = soup.find_all("div",attrs={‘class‘:‘result c-container ‘})#提取自然排名结果

return data

return

def get_rank_data(keyword,rn):

data = get_baidudata(keyword,rn)#获取自然排名结果

items = {}

for result in data:

g = result.find_all("a",attrs={‘class‘:‘c-showurl‘})#获取主域名

if g:

site=re.search(r‘([a-zA-Z0-9\.\-]+)‘,g[0].text)

host = site.groups(1)[0]

host=root_domain(host)#获取根域名

rank = int(result[‘id‘])#排名

if host not in items.keys():

items[host] = []

items[host].append(score[rank])

else:

items[host].append(score[rank])

return items#返回单个词前十数据

def get_keywords(filename):#读取关键词返回列表

kwfile = open(filename,‘r‘)

keywords = kwfile.readline()

kw_list = []

while keywords:

kw = keywords.strip()

kw_list.append(kw)

keywords = kwfile.readline()

kwfile.close()

return kw_list

def get_all_data(filename,rn):#单域名数据合并

kw_list = get_keywords(filename)

items = {}

for i,kw in enumerate(kw_list,1):

print i,kw

item = get_rank_data(kw,rn)

for host,rank in item.items():

if host not in items.keys():

items[host] = rank

else:

items[host].extend(rank)

return items

def get_score(filename,rn):

data = get_all_data(filename,rn)

fh = open(‘score.csv‘,‘a+‘)

fh.write(‘host,kws,average_score,host_score,\n‘)

for host,rank in data.items():

if host != None:

host = host.encode(‘utf-8‘)

else:

host = ‘error page‘

kws = len(rank)#关键词数

host_score = sum(rank)#总得分

average_score = host_score/kws#平均分

fh.write(host+‘,‘+str(kws)+‘,‘+str(average_score)+‘,‘+str(host_score)+‘\n‘)

return

if __name__=="__main__":

file=raw_input("请输入包含关键词的文件名:")

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值