from selenium import webdriver
import re
import requests
import time
import csv
import random
from lxml import etree
from mouse import move,click
from bs4 import BeautifulSoup
import re
from pyquery import PyQuery as pq
import time
path = '.\chromedriver.exe'
driver = webdriver.Chrome(executable_path=path)
goods='小米手机'
driver.get('http://www.taobao.com')
driver.find_element_by_id('q').send_keys(goods)
# find_element_by_class_name('btn-search tb-bg').click() 有空格
driver.find_element_by_class_name('btn-search').click()
time.sleep(2)
driver.find_element_by_class_name('icon-qrcode').click()
driver.maximize_window()
time.sleep(20)
token=driver.find_element_by_xpath('//*[@id="mainsrp-pager"]/div/div/div/div[1]').text
token=int(re.findall("共 (.*?) 页",token)[0])
def drop_down():
for x in range(1,11,3):
time.sleep(2)
j=x/10
js = 'document.documentElement.scrollTop = document.documentElement.scrollHeight * %f' % j
driver.execute_script(js)
for i in range(0,token+1):
url = 'https://s.taobao.com/search?q=' + goods+'&s=' + str(44*i)
driver.get(url)
driver.implicitly_wait(10)
drop_down()
source = driver.page_source
tree=etree.HTML(source)
div_list=tree.xpath('//*[@id="mainsrp-itemlist"]/div/div/div[1]/div')
for div in div_list:
li=[]
pic_url=div.xpath('./div/div/div//@src')[0]
if pic_url=="/":
pic_url="https:"+pic_url
price="".join(div.xpath('./div[2]/div[1]/div[1]//text()')).replace("\n","").replace(" ","")
paid="".join(div.xpath('./div[2]/div[1]/div[2]//text()'))
intro="".join(div.xpath('./div[2]/div[2]//text()')).replace("\n","").replace(" ","")
shop_url=div.xpath('./div[2]/div[2]//@href')[0]
if shop_url=="/":
shop_url="https:"+shop_url
own=div.xpath('./div[2]/div[3]/div/a/span[2]/text()')[0]
place=div.xpath('./div[2]/div[3]/div[2]/text()')[0]
li.append(price)
li.append(intro)
li.append(pic_url)
li.append(paid)
li.append(shop_url)
li.append(own)
li.append(place)
print(pic_url)
with open('{}.csv'.format(goods), 'a', newline='', encoding='utf-8-sig') as fp:
writer = csv.writer(fp)
writer.writerow(li)
淘宝实战进阶
最新推荐文章于 2024-07-19 16:51:29 发布