import urllib.request import urllib.parse import os def create_request(barname,page,base_url): headers = { 'User-Agent': 'Mozilla / 5.0(Windows NT 6.1;WOW64) AppleWebKit / 537.36 (KHTML, likeGecko) Chrome / 65.0.3325.146 Safari / 537.36' } p = (page - 1) * 50 data = { 'kw': barname, 'pn': p } data = urllib.parse.urlencode(data) url = base_url + data request = urllib.request.Request(url=url, headers=headers) return request def request_data(request): return urllib.request.urlopen(request) def parse_response(response): return response.read().decode('utf-8') def save_data(content,file_path,file_name): path = os.path.join(file_path,file_name) try: with open(path,'w',encoding='utf-8') as fp: fp.write(content) except Exception as e: print(e) else: print(file_name + '保存成功') if __name__ == "__main__": barname = input('请输入要查询的吧名') start_page = int(input('请输入起始页')) end_page = int(input('请输入结束页')) base_url = 'http://tieba.baidu.com/f?' for page in range(start_page,end_page+1): # 创建一个请求对象 request = create_request(barname,page,base_url) # 处理请求任务 response = request_data(request) # 数据解析 content = parse_response(response) # 实现本地化存储 file_name = str(page) + '.html' file_path = 'tieba' save_data(content,file_path,file_name) print('全部保存成功')
爬虫:百度贴吧_python吧
最新推荐文章于 2020-12-03 09:14:24 发布