import db,re,os
import socket
from urllib import urlopen
from BeautifulSoup import BeautifulSoup
def mkdir(path=''):
base = '.'
newpath = base + '\\'+path
if (os.path.exists(newpath)):
pass
else:
os.mkdir(newpath)
socket.setdefaulttimeout(600)
url="http://www.example.com"
text2 = urlopen(url).read()
text = BeautifulSoup(text2).prettify()
or
text2 = urlopen('http://www.example.com/').read()
text = BeautifulSoup(text2)
[s.extract() for s in text('script')] #去除 危险的script
[s.extract() for s in text('style')] #去除样式
print text.prettify()
==========================
安装
wget http://www.crummy.com/software/BeautifulSoup/download/3.x/BeautifulSoup-3.2.1.tar.gz
tar zxf
cd 目录
python setup.py install