1 #-*- coding:utf-8 -*-
2 #python 2.7
3 #XiaoDeng
4 #http://tieba.baidu.com/p/2460150866
5 #标签操作
6
7
8 from bs4 importBeautifulSoup9 importurllib.request10 importre11
12
13 #如果是网址,可以用这个办法来读取网页
14 #html_doc = "http://tieba.baidu.com/p/2460150866"
15 #req = urllib.request.Request(html_doc)
16 #webpage = urllib.request.urlopen(req)
17 #html = webpage.read()
18
19
20
21 html="""
22
The Dormouse's story23 24The Dormouse's story
25Once upon a time there were three little sisters; and their names were26 ,27 Lacie and28