注意以上两图红色部分以及运行结果的区别,附源代码如下:
import requests from lxml import etree url = 'http://weekend.ctrip.com/around/' response = requests.get(url) html = response.content.decode() html = etree.HTML(html) divs = html.xpath('//div[@class="wc_link_title"]') for div in divs: title = div.xpath('a/text()')[0] url_l = 'http://weekend.ctrip.com' + div.xpath('a/@href')[0] response = requests.get(url_l) html = response.content.decode() html = etree.HTML(html) divs = html.xpath('//div[@class="product_m"]') for div in divs: title = div.xpath('h2/a/text()')[0] href = div.xpath('h2/a/@href')[0] print(title, href)