最近写了个爬虫,其中有两功能:
1.调用windows接口实现窗口全截屏;
2.用Selenium PhantomJS截网页全图。
import os
import re
import win32api
import win32gui
import win32ui
import time
import win32con
from lxml import etree
from selenium import webdriver
class Spider(object):
def __init__(self):
self.browser = webdriver.Chrome() # 实例化一个浏览器对象
self.browser_js = webdriver.PhantomJS()
self.start_url = 'https://www.baidu.com/'
def start(self):
html = self.chrome_browser(self.start_url)
self.window_capture('./pic.png')
self.pj_capture(self.start_url, './pic_long.png')
self.close()
def close(self):
'''爬虫结束关闭浏览器'''
self.browser.quit()
self.browser_js.quit()
def pj_capture(self, url, filename):
"""截网页长图"""
self.browser_js.maximize_window()
self.browser_js.get(url)
self.browser_js.save_screenshot(filename)
def window_capture(self, filename):
"""windows全截屏"""
hwnd = 0 # 窗口的编号,0号表示当前活跃窗口
# 根据窗口句柄获取窗口的设备上下文DC(Divice Context)
hwndDC = win32gui.GetWindowDC(hwnd)
# 根据窗口的DC获取mfcDC
mfcDC = win32ui.CreateDCFromHandle(hwndDC)
# mfcDC创建可兼容的DC
saveDC = mfcDC.CreateCompatibleDC()
# 创建bigmap准备保存图片
saveBitMap = win32ui.CreateBitmap()
# 获取监控器信息
MoniterDev = win32api.EnumDisplayMonitors(None, None)
w = MoniterDev[0][2][2]
h = MoniterDev[0][2][3]
# print w,h #图片大小
# 为bitmap开辟空间
saveBitMap.CreateCompatibleBitmap(mfcDC, w, h)
# 高度saveDC,将截图保存到saveBitmap中
saveDC.SelectObject(saveBitMap)
# 截取从左上角(0,0)长宽为(w,h)的图片
saveDC.BitBlt((0, 0), (w, h), mfcDC, (0, 0), win32con.SRCCOPY)
saveBitMap.SaveBitmapFile(saveDC, filename)
def chrome_browser(self, url, state=None):
'''
chrome浏览器驱动
'''
self.browser.set_page_load_timeout(300)
self.browser.set_script_timeout(300)
self.browser.maximize_window() # 最大化窗口
self.browser.implicitly_wait(30) # 隐式等待
try:
self.browser.get(url)
time.sleep(2)
except Exception as e:
print(e)
# html = self.browser.page_source
# print(self.browser.current_url)
# html1 = self.init_html(html)
# if state:
# return html, html1
# return html1
return 1
if __name__ == '__main__':
spider = Spider()
spider.start()