2021-07-18山东大学暑期实训第三周2


前言

这一次也是爬数据,也会把源码贴出来


一、爬去的数据

test4

在这里插入图片描述
共有4933条

二、爬虫的源码

from selenium import webdriver
import time
import warnings
import pandas as pd
import csv
warnings.filterwarnings("ignore")

driver=webdriver.Chrome(executable_path=r"C:\Users\dell\AppData\Local\Google\Chrome\Application\chromedriver.exe")
#driver.get("https://piao.qunar.com/ticket/list.htm?keyword=%E6%B3%B0%E5%AE%89&region=&from=mpl_search_suggest")
driver.get("https://www.huaweicloud.com/pricing.html?tab=detail#/ecs")
time.sleep(3)
center_info=[]
Areas=driver.find_elements_by_xpath("/html/body/div[3]/div[1]/div/div[2]/div[2]/div/div/div[2]/div[1]/div/div/div[2]/div[1]/div/div/div[2]/div/div/div")
for Area in Areas:
    Area.find_element_by_tag_name("button").click()
    time.sleep(1)
    area=Area.find_element_by_tag_name("button").find_element_by_tag_name("span").text
    print(area)
    #area=driver.find_element_by_class_name("page-calculator").find_element_by_class_name("ti-form-items").find_elements_by_class_name("ti-button-group")[0]\
        #.find_element_by_class_name("ti-radio-button").find_element_by_tag_name("button").find_element_by_tag_name("span").text
    cpus=Area.find_elements_by_xpath("/html/body/div[3]/div[1]/div/div[2]/div[2]/div/div/div[2]/div[1]/div/div/div[3]/div[2]/div/div[2]/div[1]/div/div/div[2]/div/div/div")
    for i in cpus:
        i.find_element_by_tag_name("button").click()
        time.sleep(1)
        cpu=i.find_element_by_tag_name("button").find_element_by_tag_name("span").text
        print(cpu)
        Standards=i.find_elements_by_xpath("/html/body/div[3]/div[1]/div/div[2]/div[2]/div/div/div[2]/div[1]/div/div/div[3]/div[2]/div/div[2]/div[2]/div/div/div[2]/div/div/div")
        for Standard in Standards:
            Standard.find_element_by_tag_name("button").click()
            time.sleep(1)
            standard=Standard.find_element_by_tag_name("button").find_element_by_tag_name("span").text
            print(standard)
            Types=Standard.find_elements_by_xpath("/html/body/div[3]/div[1]/div/div[2]/div[2]/div/div/div[2]/div[1]/div/div/div[3]/div[2]/div/div[2]/div[3]/div/div/div[2]/div/div/div")
            for Type in Types:
                Type.find_element_by_tag_name("button").click()
                time.sleep(1)
                type_name=Type.find_element_by_tag_name("button").find_element_by_tag_name("span").text
                print(type_name)
                Systems=Type.find_elements_by_xpath("/html/body/div[3]/div[1]/div/div[2]/div[2]/div/div/div[2]/div[1]/div/div/div[3]/div[2]/div/div[2]/div[4]/div/div/div[2]/div/div/div")
                for System in Systems:
                    System.find_element_by_tag_name("button").click()
                    time.sleep(1)
                    system=System.find_element_by_tag_name("button").find_element_by_tag_name("span").text
                    print(system)
                    list=System.find_elements_by_xpath("/html/body/div[3]/div[1]/div/div[2]/div[2]/div/div/div[2]/div[1]/div/div/div[3]/div[2]/div/div[3]/div/div/div[3]/table/tbody/tr")
                    for tr in list:
                        info=[]
                        info.append(area)
                        info.append(cpu)
                        info.append(standard)
                        info.append(type_name)
                        info.append(system)
                        td=tr.find_elements_by_xpath("/html/body/div[3]/div[1]/div/div[2]/div[2]/div/div/div[2]/div[1]/div/div/div[3]/div[2]/div/div[3]/div/div/div[3]/table/tbody/tr[1]/td")
                        num= len(td)
                        standard_name = tr.find_elements_by_tag_name("td")[0].find_element_by_class_name("cell-normal").find_element_by_tag_name("span").text
                        info.append(standard_name)#规格名称
                        cpu_count = tr.find_elements_by_tag_name("td")[1].find_element_by_class_name("cell-normal").find_element_by_tag_name("span").text
                        info.append(cpu_count)#核数
                        memory = tr.find_elements_by_tag_name("td")[2].find_element_by_class_name("cell-normal").find_element_by_tag_name("span").text
                        info.append(memory)#内存
                        unit = tr.find_elements_by_tag_name("td")[-1].find_element_by_class_name("cell-normal").find_element_by_tag_name("span").text
                        info.append(unit)#单位
                        charge_hour = tr.find_elements_by_tag_name("td")[3].find_element_by_class_name("cell-normal").find_element_by_tag_name("span").text
                        if(charge_hour=="--"):
                            charge_hour="0"
                        info.append(charge_hour)#按小时
                        charge_month = tr.find_elements_by_tag_name("td")[4].find_element_by_class_name("cell-normal").find_element_by_tag_name("span").text
                        if(charge_month=="--"):
                            charge_month="0"
                        if(charge_month!="元"):
                            info.append(charge_month)
                        for j in range(5,num-1):
                            charge=tr.find_elements_by_tag_name("td")[j].find_element_by_class_name("cell").text
                            if(charge=="--"):
                                charge="0"
                            info.append(charge)
                        #charge_oneyear = tr.find_elements_by_tag_name("td")[5].find_element_by_class_name("cell").text
                        #charge_twoyear = tr.find_elements_by_tag_name("td")[6].find_element_by_class_name("cell").text
                        #charge_threeyear = tr.find_elements_by_tag_name("td")[7].find_element_by_class_name("cell").text
                        #charge_fouryear = tr.find_elements_by_tag_name("td")[8].find_element_by_class_name("cell").text
                        #charge_fiveyear = tr.find_elements_by_tag_name("td")[9].find_element_by_class_name("cell").text
                        print(info)
                        center_info.append(info)
driver.quit()
name=["区域","CPU架构","规格","类型","镜像","规格名称","核数","内存","价格单位","按小时","包月","包1年","包2年","包3年","包4年","包5年"]
test=pd.DataFrame(columns=name,data=center_info)
test.to_csv("D:/test0716.csv")
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值