1.装好python和pip环境
2. 安装requests fake-useragent
如果python版本<3 使用pip install requests pip install requests
如果python版本>=3 使用pip3 install requests pip3 install fake-useragent
3.代码实现
import requests
import random
browsers_json = {}
def get_user_agent():
global browsers_json
if len(browsers_json) == 0:
response = requests.get("https://fake-useragent.herokuapp.com/browsers/0.1.11")
browsers_json = eval(response.text)['browsers']
max_len = len(browsers_json)
rand = random.randint(0,max_len-1)
# python>=3
# key = list(browsers_json.keys())[rand]
# python < 3
key = browsers_json.keys()[rand]
rand = random.randint(0,len(browsers_json[key])-1)
return browsers_json[key][rand]
def do_request(url):
userAgent = get_user_agent()
print(userAgent)
# headers = {'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'}
headers = {'user-agent':userAgent}
proxies = {'url':'http://47.100.21.174:8118'}
response = requests.get(url,headers = headers,proxies = proxies)
print(response.text.encode('utf-8').decode('unicode_escape'))
return response
def main():
url = "https://www.toutiao.com/api/pc/feed/?category=news_hot&utm_source=toutiao&widen=1&max_behot_time=0&max_behot_time_tmp=0&tadrequire=true&as=A1C58DD6B38D254&cp=5D63ED72E5A45E1&_signature=FhTyWAAAS3v4Uq0TwudnaxYU8k"
i = 0
while(True):
i += 1 ;
response = do_request(url)
response_json = response.text.encode('utf-8').decode('unicode_escape')
if response.text.__contains__('"message": "success"'):
print(i)
break
if __name__ == '__main__' :
main()
4.涉及到的问题及处理
1)随机去到的用户代理(user-agent)有可能在请求时发生错误
采用循环请求方式直到请求成功为止
2)循环请求会涉及到循环调用获取用户代理列表的方法
定义全局变量browsers_json 请求前判断如果有值则不再请求
3) 需要注意python2和python3一个区别的地方
python3中dict.keys()方法返回的类型是dict_keys,不能直接通过index继续取值,
需要先通过list(dict.keys())方法转换成list即可