第1关 XPath解析网页
import urllib.request
from lxml import etree
def get_data(url):
'''
:param url: 请求地址
:return: None
'''
response=urllib.request.urlopen(url=url)
html=response.read().decode("utf-8")
# *************** Begin *************** #
url = 'http://127.0.0.1:8080/'
parse = etree.HTML(html)
item_list = parse.xpath("//div[@class='left']/ul/li/span/a/text()")
# *************** End ***************** #
print(item_list)
第2关 BeautifulSoup解析网页
import requests
from bs4 import BeautifulSoup
def get_data(url, headers):
'''
两个参数
:param url:统一资源定位符,请求网址
:param headers:请求头
:return data:list类型的所有古诗内容
'''
# ***************** Begin ******************** #
# ***************** Begin ******************** #
text=requests.get(url,headers)
text.encoding='utf-8'
soup=BeautifulSoup(text.text,'lxml')
soup.prettify
divs=soup.find("div",class_='left').ul.find_all("li")
list=[]
for i in divs:
a=i.p.text
list.append(a)
data=list
return data
# ****************** end ********************* #
return data