1、从网址中获取网页内容
import urllib
import re
import sys
import string
sock = urllib.urlopen("http://www.hao123.com/")
strhtml = sock.read()
strhtml = unicode(strhtml, 'gb2312','ignore').encode('utf-8','ignore')
print(strhtml)
转载自:http://hi.baidu.com/kopla/blog/item/591335afde167ce8fbed505a.html
这个博客有不少python从网页获取内容的东东,很好
2、POST请求
以前在实现论坛自动发贴的时候写的代码,其中data部分就是你要提交的数据。
其实最好的方式就是你自己在提交一次注册信息的时候,抓包看一下post了什么东西,然后把data部分改成你要提交的东西,注意报文格式,就可以了。
#!/usr/bin/python
import cookielib, urllib2, urllib, sys, time
import httplib
http = httplib.HTTP('你要连接的host')
# write header
http.putrequest("POST", '/phpwind/post.php?')
http.putheader("User-Agent", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; InfoPath.2; MAXTHON 2.0)")
http.putheader("Referer", 'http://10.16.62.100/phpwind/post.php?fid=2')
http.putheader("Host", '10.16.62.100')
http.putheader("Cookie", cookie)
http.putheader("Content-Type", 'multipart/form-data; boundary=---------------------------7d91d42da0af0')
http.putheader("Content-Length", str(len(data)))
http.endheaders()
# write body
http.send(data)
# get response
errcode, errmsg, headers = http.getreply()
if errcode != 200:
raise Error(errcode, errmsg, headers)
file = http.getfile()
print file.read()
转载自:http://topic.csdn.net/u/20101012/14/51a74db4-fad7-4d05-ba64-69f6b0149a44.html
http://pleac.sourceforge.net/pleac_python/webautomation.html
示例:
使用python在win下通过IE组件获得Ajax执行后网页源代码
在论坛发帖都没人回,最后还是自己解决了,现在把测试的代码写下。距离色色的目标又进一步了,欧耶!
view plaincopy to clipboardprint?
1. #!/usr/bin/env python
2. #coding=utf-8
3. import wx.lib.iewin
4. import wx,time
5. class MyFrame(wx.Frame):
6. def __init__(self):
7. wx.Frame.__init__(self,parent = None,id = -1,pos = wx.DefaultPosition,title = u'iewin窗口')
8. panel = wx.Panel(self)
9. self.html = wx.lib.iewin.IEHtmlWindow(panel,-1,pos = wx.DefaultPosition,style = 0,name = 'OK')
10. self.html.LoadUrl('http://www.cnbeta.com/articles/105719.htm')
11.
12. sizer = wx.BoxSizer(wx.HORIZONTAL)
13. sizer.Add(self.html,1, wx.ALL|wx.EXPAND,0)
14. panel.SetSizer(sizer)
15. sizer.Fit(self)
16. self.html.AddEventSink(self)
17.
18. def DocumentComplete(self,pDisp,URL):
19.
20. print isinstance(self.html.GetText(),unicode)
21. s = self.html.GetText().encode('utf8')
22. fi = open('1.txt','w')
23. t = s.replace('乔布斯','片子')
24. fi.write(t)
25. fi.close()
26.
27.
28. if __name__=='__main__':
29. app= wx.PySimpleApp()
30. frame = MyFrame()
31. frame.Show()
32. app.MainLoop()
#!/usr/bin/env python #coding=utf-8 import wx.lib.iewin import wx,time class MyFrame(wx.Frame): def __init__(self): wx.Frame.__init__(self,parent = None,id = -1,pos = wx.DefaultPosition,title = u'iewin窗口') panel = wx.Panel(self) self.html = wx.lib.iewin.IEHtmlWindow(panel,-1,pos = wx.DefaultPosition,style = 0,name = 'OK') self.html.LoadUrl('http://www.cnbeta.com/articles/105719.htm') sizer = wx.BoxSizer(wx.HORIZONTAL) sizer.Add(self.html,1, wx.ALL|wx.EXPAND,0) panel.SetSizer(sizer) sizer.Fit(self) self.html.AddEventSink(self) def DocumentComplete(self,pDisp,URL): print isinstance(self.html.GetText(),unicode) s = self.html.GetText().encode('utf8') fi = open('1.txt','w') t = s.replace('乔布斯','片子') fi.write(t) fi.close() if __name__=='__main__': app= wx.PySimpleApp() frame = MyFrame() frame.Show() app.MainLoop()