# -*- coding:utf-8 -*- from selenium import webdriver from bs4 import BeautifulSoup import pdfcrowd import re import requestsif __name__ == '__main__':
driver = webdriver.PhantomJS('E:\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe') driver.get('https://wenku.baidu.com/view/ff06a4c2bed5b9f3f90f1cdb.html') html = driver.page_source bf1 = BeautifulSoup(html, 'html5lib')save_file = 'E:/1.txt'
fp = open(save_file, "ab+")val_class = 'mod reader-page complex hidden-doc-banner reader-page-5'
for tag in bf1.find_all('div', class_=val_class):try: js = "window.scroll(0, 100)" driver.execute_script(js) time.sleep(10) m_name = tag.find('div', class_='ie-fix').get_text() fp.write(m_name.encode('UTF-8')+'\n') print 'm_name===', m_name # m_name = tag.find('img', class_='ie-fix').get_text() except: pass fp.close()val_class = 'mod reader-page complex hidden-doc-banner reader-page-5'for tag in bf1.find_all('div', class_=val_class): try: js = "window.scroll(0, 100)" driver.execute_script(js) time.sleep(10) m_name = tag.find('div', class_='ie-fix').get_text() fp.write(m_name.encode('UTF-8')+'\n') print 'm_name===', m_name # m_name = tag.find('img', class_='ie-fix').get_text() except: passfp.close()
display:none 转为 display:block
最新推荐文章于 2021-10-26 08:26:57 发布