from selenium import webdriver #导入python版的selenium(webdriver) import time from selenium.webdriver.common.desired_capabilities import DesiredCapabilities print(“time:”,time.strftime(’%Y-%m-%d %H:%M:%S’,time.localtime(time.time()))) ‘’’ 基于PhantomJS创建一个无界面浏览器,并且设置一下用户代理, 否则可能出现界面不兼容的情况desired_capabilities=dcap ‘’’ dcap = dict(DesiredCapabilities.PHANTOMJS) dcap[“phantomjs.page.settings.userAgent”] = (“Mozilla/4.0 (compatible; MSIE 5.5; windows NT)” ) browser = webdriver.PhantomJS() ‘’’ 我们通过PhantomJS打开相关 http://ac.qq.com/ComicView/index/id/539443/cid/1 ‘’’ print(“time:”,time.strftime(’%Y-%m-%d %H:%M:%S’,time.localtime(time.time()))) browser.get(‘https://www.cbirc.gov.cn/cn/view/pages/ItemList.html?itemPId=923&itemId=925&itemUrl=ItemListRightList.html&itemName=%E5%85%AC%E5%91%8A%E9%80%9A%E7%9F%A5’) print(“time:”,time.strftime(’%Y-%m-%d %H:%M:%S’,time.localtime(time.time()))) print(“title:”,browser.title) #获取标题
《宛香》遇见(1)-在线漫画-腾讯动漫官方网站 print(browser.page_source) #打印当前网页所有源代码 # print(browser.find_element_by_xpath()) print(“time:”,time.strftime(’%Y-%m-%d %H:%M:%S’,time.localtime(time.time()))) print(“获得目标位置:”) # print(browser.find_element_by_class_name(‘ng-scope’)) first_container = browser.find_elements_by_xpath("//div[@ng-if=\“itemName!=‘政府网站年度报表’\” and @class=\“ng-scope\”]/div/div/div/div[@class=\“panel-row ng-scope\”]") print(“time:”,time.strftime(’%Y-%m-%d %H:%M:%S’,time.localtime(time.time()))) print(“len of list_container:”,len(first_container)) for first_element in first_container: print(“title:”) print(first_element.text) print(first_element.get_attribute(“ng-repeat”)) print(“second container:”) #print(first_element.find_element_by_class_name()) second_container_name = first_element.find_element_by_xpath(".//span[@class=\“title\”]") print(“name:”,second_container_name.text) second_container_a = first_element.find_element_by_xpath(".//span[@class=\“title\”]/a") print(“a.attributes:”,second_container_a.get_attribute(“href”)) second_container_date = first_element.find_element_by_xpath(".//span[@class=\“date ng-binding\”]") print(“date:”,second_container_date.text)