item = BeautifulSoup(page,'lxml')
# parser = etree.HTML(page)
# li = parser.xpath('//ul[@id="s-results-list-atf"]//li//@data-asin')
# for l in li:
# print(l)
div = item.find('div',id='centerMinus') #等价于 .xpath('//div[@centerMinus]
content = div.find('ul',id='s-results-list-atf')
content.get_text() #等价于 .xpath中的string('//ul[@id="s-results-list-atf"]')
count = 0
for li in item.find_all('li',attrs={'data-result-rank':True}): #等价于 .xpath('//li[@data-result-rank]')
#print(li['data-asin'])
#print li.select('a[class="a-size-small a-link-normal a-text-normal"]')
#select 只能提取子节点,不能找到孙节点,但是会提取满足条件的所有子节点
ok = li.select('div.s-item-container > div.a-row') #ok = li.select('div.s-item-container > div.a-row') 与 ok = li.select('div.s-item-container > div .a-row') 与 ok = li.select('div.s-item-container > div ~ .a-row')的结果不同,">" 号后面的div加了" ~ ",
for x in ok:
for lo in x.descendants:
print(lo.string)
#print(x.get_text())
#print li.select('div.s-item-container > div.a-row') #等价于 .xpath('//div[@class='s-item-container']//div[@class='a-row']')
#print li.find_all('a',attrs={'class':'a-size-small a-link-normal a-text-normal'})
break
count+=1
print(count)
python beautifulsoup解析器与xpath解析器对比
最新推荐文章于 2023-03-22 21:53:04 发布