近期有业务涉及的易贝网的爬虫写了一个demo拿出来大家参看看看,还有淘宝,闲鱼,1688,速卖通,京东,苏宁,国美,当当,网易,微店,小红书,拼多多,唯品会,亚马逊,一号店爬虫后面慢慢也放出来
# -*-coding:utf8 -*-
import requests
from lxml import html
from utils import user_agent
def run(q,page):
headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"accept-encoding": "gzip, deflate",
"accept-language": "zh-CN,zh;q=0.8",
"connection": "keep-alive",
"User-Agent": user_agent.userAgent(),
"X-Requested-With": "XMLHttpRequest",
}
url = 'https://www.ebay.com/sch/i.html?_sacat=0&_nkw=%s&_pgn=4&_skc=%s&rt=nc&_dmd=1'\
%(q,50*(int(page)-1))
statuses = requests.get(url,headers=headers)
statuses = statuses.content.decode('utf-8')
dom_tree = html.etree.HTML(statuses)
scriptDate = dom_tree.xpath('//*[@id="ListViewInner"]/li')
list = []
for i in range(0, len(scriptDate)):
img = dom_tree.xpath('//*[@id="ListViewInner"]/li['+str(i+1)+']/div[1]/div/a/img/@src')[0] # 主图
url = dom_tree.xpath('//*[@id="ListViewInner"]/li[' + str(i + 1) + ']/div[1]/div/a/@href')[0] # 商品连接
table = dom_tree.xpath('//*[@id="ListViewInner"]/li[' + str(i + 1) + ']/h3/a/text()')#标题
table = table[len(table)-1]
jiage = dom_tree.xpath('//*[@id="ListViewInner"]/li[' + str(i + 1) + ']/ul[1]/li[1]/span/text()') # 价格
jiage = jiage[len(jiage) - 1]
map = {
"img":img,
"url": url,
"table": table,
"jiage": jiage
}
list.append(map)
return list
if __name__ == '__main__':
print(run("iphone",1))