目的:学习笔记
代码
from selenium import webdriver
import time
import csv
import re
def search_product(key):
"""搜索商品,获取最大页数"""
driver.find_element_by_id('q').send_keys(key)
driver.find_element_by_xpath('/html/body/div[2]/div/div/div[2]/div/div[1]/div[2]/form/div[1]/button').click()
driver.maximize_window()
time.sleep(10)
page=driver.find_element_by_xpath('//*[@id="mainsrp-pager"]/div/div/div/div[1]').text
page=re.findall('\d+',page)[0]
return int(page)
def get_product():
divs=driver.find_elements_by_xpath('//div[@class="items"]/div[@class="item J_MouserOnverReq "]')
for div in divs:
info=div.find_element_by_xpath('.//div[@class="row row-2 title"]/a').text #获取商品名称
price=div.find_element_by_xpath('.//strong').text+'元' #获取商品价格
deal = div.find_element_by_xpath('.//div[@class="deal-cnt"]').text #获取月销量
name=div.find_element_by_xpath('.//div[@class="row row-3 g-clearfix"]/div[1]/a/span[2]').text #获取店铺名称
print(info,price,deal,name,sep='|')
with open('data.csv','a',newline="",encoding='utf-8') as filecsv: #存为csv文件
csvwriter=csv.writer(filecsv,delimiter=',')
csvwriter.writerow([info,price,deal,name]) #一次写入一行数据,循环写入
def main():
print('正在爬取第1页的数据')
page=search_product(keyword)
get_product()
page_num=1
while page_num !=page:
print('*'*100)
print('正在爬取第{}页的数据'.format(page_num+1))
print('*' * 100)
driver.get('https://s.taobao.com/search?q={}&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.2&ie=utf8&initiative_id=tbindexz_20170306&bcoffset=0&ntoffset=6&p4ppushleft=1%2C48&s={}'.format(keyword,page_num*44))
driver.implicitly_wait(2)
driver.maximize_window()
get_product()
page_num+=1
if __name__ == '__main__': #程序入口
keyword=input('请输入你要搜索的商品名字:')
driver=webdriver.Chrome()
driver.get('https://www.taobao.com')
main()