python selenium 淘宝价格_python selenium 爬取淘宝

#-*- coding:utf-8 -*-#author : yesehngbao#time:2018/3/29

importreimportpymongofrom lxml importetreefrom selenium importwebdriverfrom selenium.webdriver.support importexpected_conditions as ECfrom selenium.webdriver.support.ui importWebDriverWaitfrom selenium.webdriver.common.by importBy#from selenium.webdriver.common.utils import Keys

MONGO_HOST= 'localhost'MONGO_PORT= 27017MONGO_DB= 'test'MONGO_COLL= 'selenum_tao'webdir=webdriver.Chrome()defget_page_num():

webdir.get('http://www.taobao.com')

input= WebDriverWait(webdir, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '#q')))

button= WebDriverWait(webdir, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn-search')))

input.clear()

input.send_keys('衬衫')

button.click()

page_num= WebDriverWait(webdir, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > div.total'))).text

page_num= re.findall('\d+', page_num)[0]returnpage_numdefgain_page(page):try:

WebDriverWait(webdir,10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.items .item .pic a img')))

input= WebDriverWait(webdir, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.J_Input')))

button= WebDriverWait(webdir, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.J_Submit')))

input.clear()

input.send_keys(page)

button.click()

WebDriverWait(webdir,10).until(EC.text_to_be_present_in_element((By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > ul > li.item.active > span'), str(page)))exceptException:

gain_page(page)defget_page_html(page):ifpage:

html=webdir.page_sourcereturnhtmldefanalysis_page(html):

doc=etree.HTML(html)

div_list= doc.xpath('.//div[@class="items"]//div[contains(@class,"item")]')for div indiv_list:

img= div.xpath('.//div[@class="pic"]/a/img/@data-src')[0]

money= div.xpath('.//div[contains(@class, "price")]/strong/text()')[0]yield{'img': img,'money': money,

}defsave_mongo(content):

mongo_client= pymongo.MongoClient(host=MONGO_HOST, port=MONGO_PORT)

db=mongo_client[MONGO_DB]

coll=db[MONGO_COLL]

coll.insert(content)defmain():

page_num=get_page_num()for page in range(1, int(page_num)+1):

gain_page(page)

html=get_page_html(page)

content=analysis_page(html)

save_mongo(content)if __name__ == '__main__':

main()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值