import requests from urllib import response import lxml.html import csv from requests import Response doubanurl = 'https://movie.douban.com/top250?start={}&filter=' def getSource(url): #获取目标网页 response = requests.get(url) response.encoding = 'utf-8' return response.content def getEveryItem(source): selector = lxml.html.document_fromstring(source) movieItemList = selector.xpath('//div[@class="info"]') movieList = [] for eachMovie in movieList: movieDict = {} title = eachMovie.xpath('div[@class="hd"]/a/span[@class="title"]/text()') print(title) otherTitle = eachMovie.xpath('div[@class="hd"]/a/span[@class="other"]/text()') link = eachMovie.xpath('div[@class="hd"]/a/@href')[0] star = eachMovie
爬去网页时出现raise etree.ParserError(lxml.etree.ParserError: Document is empty问题,想知道哪里出现了错误源代码如下
于 2023-11-17 08:28:43 首次发布