python selenium 淘宝价格_python selenium 爬取淘宝

该代码实现了一个自动化脚本,利用Selenium库浏览淘宝网站并搜索衬衫,获取页面上的商品数量。然后,它遍历每个商品页面,抓取图片链接和价格,并将这些信息存储到MongoDB数据库中。整个过程涉及网页元素定位、页面等待、数据解析和数据库交互。
摘要由CSDN通过智能技术生成

#-*- coding:utf-8 -*-#author : yesehngbao#time:2018/3/29

importreimportpymongofrom lxml importetreefrom selenium importwebdriverfrom selenium.webdriver.support importexpected_conditions as ECfrom selenium.webdriver.support.ui importWebDriverWaitfrom selenium.webdriver.common.by importBy#from selenium.webdriver.common.utils import Keys

MONGO_HOST= 'localhost'MONGO_PORT= 27017MONGO_DB= 'test'MONGO_COLL= 'selenum_tao'webdir=webdriver.Chrome()defget_page_num():

webdir.get('http://www.taobao.com')

input= WebDriverWait(webdir, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '#q')))

button= WebDriverWait(webdir, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn-search')))

input.clear()

input.send_keys('衬衫')

button.click()

page_num= WebDriverWait(webdir, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > div.total'))).text

page_num= re.findall('\d+', page_num)[0]returnpage_numdefgain_page(page):try:

WebDriverWait(webdir,10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.items .item .pic a img')))

input= WebDriverWait(webdir, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.J_Input')))

button= WebDriverWait(webdir, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.J_Submit')))

input.clear()

input.send_keys(page)

button.click()

WebDriverWait(webdir,10).until(EC.text_to_be_present_in_element((By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > ul > li.item.active > span'), str(page)))exceptException:

gain_page(page)defget_page_html(page):ifpage:

html=webdir.page_sourcereturnhtmldefanalysis_page(html):

doc=etree.HTML(html)

div_list= doc.xpath('.//div[@class="items"]//div[contains(@class,"item")]')for div indiv_list:

img= div.xpath('.//div[@class="pic"]/a/img/@data-src')[0]

money= div.xpath('.//div[contains(@class, "price")]/strong/text()')[0]yield{'img': img,'money': money,

}defsave_mongo(content):

mongo_client= pymongo.MongoClient(host=MONGO_HOST, port=MONGO_PORT)

db=mongo_client[MONGO_DB]

coll=db[MONGO_COLL]

coll.insert(content)defmain():

page_num=get_page_num()for page in range(1, int(page_num)+1):

gain_page(page)

html=get_page_html(page)

content=analysis_page(html)

save_mongo(content)if __name__ == '__main__':

main()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值