目录
一、体会
本次的项目实训收获很多,让我进一步学会了团队工作的大体机制,同时让我学到了更多selenium爬虫的相关技巧和方法,对国内现在的云服务器有了一个大体的印象。
二、代码
from selenium import webdriver
import time
import csv
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
nowtime=time.strftime("%Y-%m-%d", time.localtime())
itemList=[]
company=['百度云']
tableList=[]
driver = webdriver.Chrome(executable_path=r'C:\Program Files\Google\Chrome\Application\chromedriver.exe')
driver.set_window_size(1280, 700)
driver.get('https://cloud.baidu.com/calculator.html#/bcc/price')
time.sleep(2)
action_chains = ActionChains(driver)
action_chains.double_click(driver.find_element_by_xpath('//*[@id="ctrl-bcc-price-ctrl-bcc-price-systemDiskDragger-input-input"]')).perform()
time.sleep(2)
driver.find_element_by_xpath('//*[@id="ctrl-bcc-price-ctrl-bcc-price-systemDiskDragger-input-input"]').send_keys('40GB')
time.sleep(1)
driver.find_element_by_xpath('//*[@id="ctrl-bcc-price-ctrl-bcc-price-systemDiskDragger-input-input"]').send_keys(Keys.ENTER)
time.sleep(1)
stopServiceType=[]
serviceTypeList=['通用型','计算型','内存型','高主频型','本地SSD型','大数据型']
for location_num in range(0,len(driver.find_element_by_id('ctrl-bcc-price-region').find_element_by_tag_name('ul').find_elements_by_tag_name('li'))):
location=driver.find_element_by_id('ctrl-bcc-price-region').find_element_by_tag_name('ul').find_elements_by_tag_name('li')[location_num]
location_text=location.text
location.click()
if location_num !=0:
time.sleep(5)
time.sleep(5)
for product_type_num in range(0,len(driver.find_element_by_id('ctrl-bcc-price-subServiceType').find_element_by_tag_name('ul').find_elements_by_tag_name('li'))):
product_type=driver.find_element_by_id('ctrl-bcc-price-subServiceType').find_element_by_tag_name('ul').find_elements_by_tag_name('li')[product_type_num]
product_type_text=product_type.text
if product_type_text not in serviceTypeList:
break
else:
product_type.click()
time.sleep(3)
for tr_num in range(0,len(driver.find_element_by_id('ctrl-bcc-price-instances-body-panel').find_elements_by_tag_name('tr'))):
tableList=[]
tr=driver.find_element_by_id('ctrl-bcc-price-instances-body-panel').find_elements_by_tag_name('tr')[tr_num]
if tr.find_elements_by_tag_name('td')[1].text in stopServiceType:
continue
#点击第一列按钮
tableTd = tr.find_elements_by_tag_name('td')[0]
try:
driver.find_element_by_xpath('//*[@id="ctrl-bcc-price-instances-single-select{}"]'.format(tr_num)).click()
except:
time.sleep(3)
try:
driver.find_element_by_xpath('/html/body/esui-dialog/div[3]/div/div').click()
except:
js = "var q=document.documentElement.scrollTop=200"
driver.execute_script(js)
continue
stopServiceType.append(tr.find_elements_by_tag_name('td')[1].text)
continue
tableTd=tr.find_elements_by_tag_name('td')[3].text
#cpu核数
tableList.append(tableTd.replace('核',''))
tableTd=tr.find_elements_by_tag_name('td')[5].text
#cpu种类
tableList.append(tableTd)
#区域
tableList.append(location_text)
time.sleep(1)
price=driver.find_element_by_xpath('//*[@id="ctrl-bcc-price-totalPricePanel"]').text
#价格
if price != '¥0.00':
tableList.append(int(float(price.replace('¥',''))))
else:
continue
#pps
tableList.append(int(1))
tableTd = tr.find_elements_by_tag_name('td')[2].text
#产品名
tableList.append(tableTd)
tableTd = tr.find_elements_by_tag_name('td')[1].text
#产品类型
tableList.append(tableTd)
tableTd = tr.find_elements_by_tag_name('td')[4].text
#内存
tableList.append(tableTd.replace('GB',''))
tableList.append(nowtime)
print(tableList)
itemList.append(company+tableList)
fileName='BD'+nowtime+'.csv'
print(fileName,len(itemList))
with open(fileName,'w',newline='')as f:
f_csv = csv.writer(f)
f_csv.writerows(itemList)