python360指数_360的搜索指数排行榜

百度指数用图片处理过,太难抓取

爬取代码是 python 3.x

#!/usr/bin/env python

#-*- encoding: utf-8 -*-

# refer to http://blog.csdn.net/wangtaoking1/article/details/18308635

import http.cookiejar

from urllib import request

from urllib.parse import quote

HTTP_PROXY = '10.13.61.118:6666'

def getOpener(head, enable_proxy=False):

# deal with the Cookies

cj = http.cookiejar.CookieJar()

cookie_support = request.HTTPCookieProcessor(cj)

# deal with proxy

debug_hander = request.HTTPHandler(debuglevel=1) # debuglevel=0

proxy_handler = request.ProxyHandler({"http":HTTP_PROXY, "https":HTTP_PROXY})

opener = request.build_opener(cookie_support, proxy_handler, debug_hander) \

if enable_proxy else request.build_opener(cookie_support, debug_hander)

# request.install_opener(opener)

opener.addheaders = list(head.items())

return opener

import os,json

def main(school="江苏经贸职业技术学院"):

header = {

'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0',

'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',

'Accept-Language':'zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3',

'Accept-Encoding':'gzip, deflate, sdcn',

}

opener = getOpener(header)

url = "http://index.so.com/index.php?a=overviewJson&q=%s&area=%s" % (quote(school),quote("全国"))

data = opener.open(url).read().decode('utf-8')

try:

index = json.loads(data).get('data')[0]['data']['month_index']

except:

index = -1

return ('%s=%d' % (school, index if isinstance(index,int) else -1 ))

import time

if __name__ == '__main__':

#main()

fp = open("index.txt",'w',encoding='utf-8')

XX = open("school_list.txt").read().splitlines()

for line in XX:

time.sleep(1)

fp.write(main(line)+'\n')

fp.flush()

fp.close()

查看排行

cat index.txt | sort -t= -k2 -nr | less

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值