import requests
import os, sys, stat
from lxml import etree
import time
class HuangMan():
def __init__(self):
#设置请求头
self.headers = {'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
self.url_list = []
self.Hman_url_list = []
self.Hman_name_list = []
def get_url_list(self):
#这里是设置爬取到第3页,想要更多的话请自行更改
url = "http://www.93qoqo.com/artlist/27-%d.html"
for i in range(2,4):
#获取第2到3页的url
self.url_list.append(url % i)
#由于第一页网址不同,所以单独写一个获取函数
def start_1(self):
url_1 = "http://www.93qoqo.com/artlist/27.html"
response = requests.get(url_1, headers = self.headers).content.decode('utf-8')
html = etree.HTML(response)