import requests
class tieba(object):
def __init__(self,tieba_name):
self.tieba_name = tieba_name
self.start_url = 'https://tieba.baidu.com/f?kw=' + tieba_name + '&ie=utf-8&pn={}'
self.headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/70.0.3538.77 '
'Safari/537.36'}
def get_url_list(self):
#构造url列表
url_list = []
for i in range(10):
url_list.append(self.start_url.format(i*50))
print(url_list)
return url_list
def parse_url(self,url):
response = requests.get(url,headers = self.headers)
return response.content.decode()
def save_html(self,html,num):
with open('英雄联盟贴吧源码
利用python爬取百度贴吧源码
最新推荐文章于 2024-07-30 17:23:09 发布