# 用于发起请求
from urllib import request
import lxml.etree as le
from fake_useragent import UserAgent
def get_html(url,cookie):
'''获取网页代码'''
headers = {
'User-Agent':ua.random,
'cookie':cookie
}
req = request.Request(url,headers=headers)
# 发送请求
ret = request.urlopen(req)
# 获取响应数据,返回一个字节类型的json数据
cont = ret.read().decode('utf-8')
return cont
def get_data(html):
'''提取数据'''
datas = []
html_x = le.HTML(html)
titles = html_x.xpath('//div[@class="dr_mod3list clearfix"]//div[@class="info"]/a[1]/text()')
links = html_x.xpath('//div[@class="dr_mod3list clearfix"]//div[@class="info"]/a[1]/@href')
for i in range(len(titles)):
title = titles[i]
link = links[i]
datas.append({'title':title,'link':link})
return datas
def show_data(datas):
'''展示数据'''
for data in datas:
print(data)
print("-"*50)
def main(url,cookie):
'''主函数'''
html = get_html(url,cookie)
datas = get_data(html)
show_data(datas)
# 判断当前程序是否是主程序
if __name__=='__main__':
ua = UserAgent()
url = 'https://i.meishi.cc/daren_task/daren.php'
cookie = 'BAIDU_SSP_lcr=https://www.meishij.net/; PHPSESSID=1278d47b741cffd144827509e8782174; UserGuide=1; __SessHandler=5440b38a5fac3b036619d345255bd489; loginId=14674499; MSCookieKey=f3ef07451a456c1546b31b7674e73f02.14674499; Hm_lvt_01dd6a7c493607e115255b7e72de5f40=1590129537; UM_distinctid=1723b1b2fce130-0023dd0a3d0079-c373667-100200-1723b1b2fd139d; CNZZDATA1259001544=1268762511-1590125727-%7C1590125727; last_update_time=1590130395; Hm_lpvt_01dd6a7c493607e115255b7e72de5f40=1590130396'
main(url,cookie)
注意的是cookie是自己登录后才能从浏览器上得到的,而且是会过期的,过期了,就再登录再复制鹅鹅鹅
~