python模拟浏览器代码_python 模拟浏览器

想用python模拟浏览器访问web的方法测试些东西,有哪几种方法呢?

一类:单纯的访问web,不解析其js,css等。

1. urllib2

#-*- coding:utf-8 -*

import urllib2

def Furllib2(ip,port,url,timeout):

proxydict = {}

proxydict['http'] = "http://%s:%s"%(ip,port)

print proxydict

proxy_handler = urllib2.ProxyHandler(proxydict)

opener = urllib2.build_opener(proxy_handler)

opener.addheaders = [('User-agent', 'Mozilla/5.0')]

urllib2.install_opener(opener)

try:

response = urllib2.urlopen(url,timeout=timeout)

print response.geturl()

print response.getcode()

print response.info()

print response.read()

return True

except:

print 'some errors occored' + '-'*50

return 0

def main():

proxyip = '14.18.16.69'

proxyport = '80'

proxy = 'http://2.181.1.127:80'

url = 'http://www.cnblogs.com/'

timeout = 4

print Furllib2(proxyip,proxyport,url,timeout)

if __name__ == "__main__":

main()

2. mechanize(与网站的自动化交互)

http://wwwsearch.sourceforge.net/mechanize/doc.html

def Fmechanize(url):

cookies = mechanize.CookieJar()

opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))

try:

r = opener.open(url) # GET

# r = opener.open("http://example.com/", data) # POST

print r.geturl()

print r.info()

return True

except:

return 0

二类:模拟浏览器,使用firefox等的浏览器引擎,支持js,css等。

1. selenium 的firefox或者chrome等驱动,但是由于要打开一个浏览器,所以会比较慢(浏览器驱动可以到selenium官网上下载,也可以到firefox插件出搜索)

def Fselenium_firefox(ip,port,url,timeout):

try:

profile = webdriver.FirefoxProfile()

profile.set_preference('network.proxy.type', 1)

profile.set_preference('network.proxy.http',ip)

profile.set_preference('network.proxy.http_port', port)

profile.update_preferences()

driver = webdriver.Firefox(profile,timeout = timeout)

except Exception:

print traceback.print_exc()

return 0

pass

try:

driver.get(url)

time.sleep(5)

cookies= driver.get_cookies()

print cookies

# driver.get()

driver.quit()

return 1

except Exception:

traceback.print_exc()

# print 'not have Union allianceid'

driver.quit()

return 0

2. selenium :headless test使用selenium+ phantomjs驱动,无需打开浏览器,但是支持js的模拟浏览器动作,也就说说和你手工打开是没有区别的。

http://selenium.googlecode.com/git/docs/api/py/api.html

def Fselenium_phantomjs(ip,port,url,timeout):

try:

proxyip = '%s%s%s%s'%('--proxy=',ip,':',port)

proxyport = '--proxy-type=http'

service_args = []

service_args.append(proxyip)

service_args.append(proxyport)

print service_args

driver = webdriver.PhantomJS(service_args = service_args)

#driver = webdriver.PhantomJS("/root/phantomjs-1.9.7-linux-x86_64/bin/phantomjs",service_args = service_args)制定phantomjs的位置

driver.set_page_load_timeout(timeout)

driver.get(url)

time.sleep(4)

except Exception:

traceback.print_exc()

try:

geturl = driver.current_url

print driver.current_url

return True

except Exception:

traceback.print_exc()

geturl = None

return 0

3. qt,网上戗来的代码

http://qt-project.org/wiki/PySide#PySide.QtWebKit.PySide.QtWebKit.QWebView.url

from PyQt4 import QtCore, QtGui, QtWebKit, QtNetwork

class cookieJar(QtNetwork.QNetworkCookieJar):

def __init__(self, cookiesKey, parent=None):

super(cookieJar, self).__init__(parent)

self.mainWindow = parent

self.cookiesKey = cookiesKey

cookiesValue = self.mainWindow.settings.value(self.cookiesKey)

if cookiesValue:

cookiesList = QtNetwork.QNetworkCookie.parseCookies(cookiesValue)

self.setAllCookies(cookiesList)

# def setCookiesFromUrl (self, cookieList, url):

# cookiesValue = self.mainWindow.settings.value(self.cookiesKey)

# cookiesArray = cookiesValue if cookiesValue else QtCore.QByteArray()

# for cookie in cookieList:

# cookiesArray.append(cookie.toRawForm() + "\n")

#self.mainWindow.settings.setValue(self.cookiesKey, cookiesArray)

#return super(cookieJar, self).setCookiesFromUrl(cookieList, url)

def deleteCookie(self,cookieList):

cookie = []

self.mainWindow.settings.value(cookie)

class webView(QtWebKit.QWebView):

def __init__(self, cookiesKey, url, parent=None):

super(webView, self).__init__(parent)

self.cookieJar = cookieJar(cookiesKey, parent)

self.page().networkAccessManager().setCookieJar(self.cookieJar)

class myWindow(QtGui.QMainWindow):

def __init__(self, parent=None):

super(myWindow, self).__init__(parent)

self.cookiesKey = "cookies"

self.centralwidget = QtGui.QWidget(self)

self.tabWidget = QtGui.QTabWidget(self.centralwidget)

self.tabWidget.setTabsClosable(True)

self.verticalLayout = QtGui.QVBoxLayout(self.centralwidget)

self.verticalLayout.addWidget(self.tabWidget)

self.actionTabAdd = QtGui.QAction(self)

self.actionTabAdd.setText("Add Tab")

self.actionTabAdd.triggered.connect(self.on_actionTabAdd_triggered)

self.lineEdit = QtGui.QLineEdit(self)

self.lineEdit.setText("http://www.example.com")

self.toolBar = QtGui.QToolBar(self)

self.toolBar.addAction(self.actionTabAdd)

self.toolBar.addWidget(self.lineEdit)

self.addToolBar(QtCore.Qt.ToolBarArea(QtCore.Qt.TopToolBarArea), self.toolBar)

self.setCentralWidget(self.tabWidget)

self.settings = QtCore.QSettings()

@QtCore.pyqtSlot()

def on_actionShowCookies_triggered(self):

webView = self.tabWidget.currentWidget()

listCookies = webView.page().networkAccessManager().cookieJar().allCookies()

for cookie in listCookies:

print cookie.toRawForm()

@QtCore.pyqtSlot()

def on_actionTabAdd_triggered(self):

url = self.lineEdit.text()

self.addNewTab(url if url else 'about:blank')

def addNewTab(self, url):

tabName = u"Tab {0}".format(str(self.tabWidget.count()))

tabWidget= webView(self.cookiesKey, url, self)

tabWidget.loadFinished.connect(self.on_tabWidget_loadFinished)

tabWidget.load(QtCore.QUrl(url))

tabIndex = self.tabWidget.addTab(tabWidget, tabName)

self.tabWidget.setCurrentIndex(tabIndex)

@QtCore.pyqtSlot()

def on_tabWidget_loadFinished(self):

cookies2 = self.settings.value(self.cookiesKey)

if __name__ == "__main__":

import sys

app = QtGui.QApplication(sys.argv)

app.setApplicationName('myWindow')

main = myWindow()

main.resize(666, 333)

main.show()

sys.exit(app.exec_())

4. qt-headless

http://qt-project.org/wiki/PySide#PySide.QtWebKit.PySide.QtWebKit.QWebView.url

import sys

from PyQt4.QtGui import *

from PyQt4.QtCore import *

from PyQt4.QtWebKit import *

class Render(QWebPage):

def __init__(self, url):

self.app = QApplication(sys.argv)

QWebPage.__init__(self)

self.loadFinished.connect(self._loadFinished)

self.mainFrame().load(QUrl(url))

self.app.exec_()

def _loadFinished(self, result):

self.frame = self.mainFrame()

self.app.quit()

url = 'http://webscraping.com'

r = Render(url)

html = r.frame.toHtml()

print html

5. splinter :打开浏览器,模拟操作,python的

http://splinter.cobrateam.info/docs/tutorial.html

>>> from splinter import Browser

>>> browser = Browser()

>>> url = "http://www.cnblogs.com"

>>> browser.visit(url)

具体用哪个要看你有什么具体的需求了

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值