python执行javascript网页_python 用 pyqt4 实现的网页抓取,可以执行javascript,还可以用此来截图...

---------这个把下载的网页保存在一个文件里---------------------------------------------------

# coding: utf8

import sys

import signal

from optparse import OptionParser

from PyQt4.QtCore import *

from PyQt4.QtGui import *

from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings

class Crawler( QWebPage ):

def __init__(self, url, file):

QWebPage.__init__( self )

self._url = url

self._file = file

#QFont.setPixelSize(QFont(12))

#self.QWebView = BrowserScreen()

#self.setView(self.QWebView)

#QWebSettings.globalSettings().setAttribute(QWebSettings.AutoLoadImages, False);

#QWebSettings.globalSettings().setAttribute(QWebSettings.JavascriptEnabled, True);

#QWebSettings.globalSettings().setAttribute(QWebSettings.DnsPrefetchEnabled, True);

#QWebSettings.globalSettings().setDefaultTextEncoding("utf-8");

def crawl( self ):

signal.signal( signal.SIGINT, signal.SIG_DFL )

self.connect( self, SIGNAL( 'loadFinished(bool)' ), self._finished_loading )

'''

file = open("/tmp/b.txt", 'r' )

try:

html = file.read( )

finally:

file.close( )

html = unicode(html, 'utf-8', 'ignore')

self.mainFrame().setHtml(html, QUrl( self._url ) )

'''

self.mainFrame().load( QUrl( self._url ) )

def _finished_loading( self, result ):

file = open( self._file, 'w' )

#print self.mainFrame().toHtml()

file.write( self.mainFrame().toHtml() )

file.close()

sys.exit( 0 )

def main():

app = QApplication( sys.argv )

options = get_cmd_options()

crawler = Crawler( options.url, options.file )

crawler.crawl()

sys.exit( app.exec_() )

def get_cmd_options():

"""

gets and validates the input from the command line

"""

usage = "usage: %prog [options] args"

parser = OptionParser(usage)

parser.add_option('-u', '--url', dest = 'url', help = 'URL to fetch data from')

parser.add_option('-f', '--file', dest = 'file', help = 'Local file path to save data to')

(options, args) = parser.parse_args()

if not options.url:

print 'You must specify an URL.',sys.argv[0],'--help for more details'

exit(1)

if not options.file:

print 'You must specify a destination file.',sys.argv[0],'--help for more details'

exit(1)

return options

if __name__ == '__main__':

main()

---------这个网页加载完成后可以输出---------------------------------------------------

# coding: utf8

import sys

import os

from time import time

import code

from PyQt4.QtCore import *

from PyQt4.QtGui import *

from PyQt4.QtWebKit import *

class Pekit():

def __init__(self):

self.user_agent = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2) Gecko/20100124 Firefox/3.6 (Swiftfox)'

self.application = QApplication([])

self.webpage = QWebPage()

self.webpage.userAgentForUrl = lambda url: self.user_agent

self.webframe = self.webpage.currentFrame()

self.webpage.javaScriptAlert = self._alert

self.webpage.javaScriptConfirm = lambda frame, message: raw_input('==Confirm== (y/n): %s' % message) == 'y'

self.webpage.javaScriptConsoleMessage = lambda string1, int1, string2: sys.stdout.write('==Message==: %s %d %s\n' % (string1, int1, string2))

self.webpage.javaScriptPrompt = lambda frame, message, defaultValue, result: raw_input('==Prompt== %s:' % message) or False

self._load_status = 'init'

self.webpage.connect(self.webpage,

SIGNAL('loadFinished(bool)'),

self._onLoadFinished)

self.webpage.connect(self.webpage,

SIGNAL('loadStarted()'),

self._onLoadStarted)

def load(self, url):

self.webframe.load(QUrl(url))

self.wait_load()

jsdir = os.path.dirname(__file__)

jscode = open('%s/jquery.min.js' % jsdir, 'r').read()

jscode += open('%s/jquery.simulate.js' % jsdir, 'r').read()

jscode += "var _JQ = jQuery.noConflict();"

self.js_noload(jscode)

def js(self, js):

self._load_status = 'start'

self.webframe.evaluateJavaScript(js)

def js_noload(self, js):

self.webframe.evaluateJavaScript(js)

def html(self):

return unicode(self.webframe.toHtml())

def wait_load(self, least = 0, most = 300):

start = time()

while (self._load_status == 'start' and time() - start < most) or time() - start < least:

self.application.processEvents()

def _onLoadFinished(self, status):

if status:

self._load_status = 'end'

def _onLoadStarted(self):

self._load_status = 'start'

def _alert(self, frame, message):

print "==Alert== %s" % unicode(message)

参考资料:

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值