requsets库与之前的urllib.request功能,但是相对于后者,前者不需要我们进行封装,能够直接导入调用
如下:get请求
import requests url = 'http://www.baidu.com' # requests 的get用法 response = requests.get(url) with open('baidu.html', 'wb') as f: f.write(response.content)
post请求
import requests import json # 1. url url = 'http://fanyi.baidu.com/sug' # 2. form/ data/ body def translate(kw): form = { 'kw': kw } # 2.1 字典转换成str # form_str = parse.urlencode(form) # print(form_str) # 2.2 str转换成bytes # form_bytes = form_str.encode('utf-8') # 3. 调用函数 url form # response = request.urlopen(url, data=form_bytes) # request 的post方法色使用 response = requests.post(url, data=form) # str = response.content.decode('utf-8/gbk/gb2312/gb18080') = response.encoding = 'utf-8' str = response.text # 解析json,转为dict型 res_dict = json.loads(response.text) print(res_dict) # 输出结果 #print(res_dict) result = res_dict['data'][0]['v'] return result if __name__ == '__main__': res1 = translate('河边草') res2 = translate('黄昏') print(res1,res2)
====================================================添加代理======================================================
import requests # url url = 'http://www.xicidaili.com' # 添加 proxy proxy = { 'http': 'http://root:*******@255.255.255.255:8080' } # 添加headers headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' } # 调用requests, 得到response response = requests.get(url, headers=headers, proxies=proxy) print(response.text)
=====================================================抓取雪球网首页=======================================================
import requests, json, pymysql # 打开数据库连接 class sql_my(object): def __init__(self): self.db = pymysql.connect("127.0.0.1","root","123456","xueqiu") # 使用 cursor() 方法创建一个游标对象 cursor self.cursor = self.db.cursor() def execute_modify_mysql(self, sql): self.cursor.execute(sql) self.db.commit() def __del__(self): # 提交 self.cursor.close() self.db.close() s = sql_my() # sql = 'insert into fangchan(target) values(11)' # s.execute_modify_mysql(sql) #url为请求地址,num为请求次数,即获取的数据页数 def request_sql(url,num): #循环作为递归的出口 i= 0 while i<num: num-=1 mylist = [] headers = { 'Cookie': 'device_id=8ef6474c963f19dd4b434f9b6cc4c4bd; _ga=GA1.2.283628643.1534314264; _gid=GA1.2.446448707.1534314264; aliyungf_tc=AQAAAJqLCnKOTwsAF/gnd4T1KbbEtJnL; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; u=971534333229421; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534333716,1534341738,1534342886,1534342891; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534348215; _gat_gtag_UA_16079156_4=1', 'Referer': 'https://xueqiu.com/', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', } #请求 response = requests.get(url, headers=headers) #将请求数据格式转为字典 res_json = response.text res_dict = json.loads(res_json) list_list = res_dict['list'] for list_items in list_list: data = json.loads(list_items['data']) mylist.append(list_items['id']) print(data) column = list_items['column'] data['column'] = column try: sql = 'insert into fangchan(tid,title,description,target,`column`) values({id},"{title}","{description}","{target}","{column}");'.format(**data) s.execute_modify_mysql(sql) except: sql = 'insert into fangchan(tid,title,target,`column`) values({id},"{title}","{target}","{column}");'.format(**data) s.execute_modify_mysql(sql) #构造新的url用于下一次请求 url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id={}&count=15&category=111'.format(mylist[-1]) request_sql(url,num) #循环作为递归的出口 while i<num: num -=1 if __name__ == '__main__': #测试,获取三页数据
url='https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=-1&count=10&category=111'
request_sql(url,10)