爬取天猫超市的牛奶信息时,遇到重定向问题:
代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
import
sys
reload
(sys)
sys.setdefaultencoding(
'utf-8'
)
import
urllib2,urllib,re
from
bs4
import
BeautifulSoup
import
cookielib
url
=
'https://list.tmall.com/search_product.htm?spm=a3204.7084717.1996500281.2.EUMiGi&cat=51462017'
headers
=
{
'user-agent'
:
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/45.0.2454.101 Safari/537.36'
,
'referer'
:url}
cookie
=
cookielib.CookieJar()
handler2
=
urllib2.HTTPCookieProcessor(cookie)
opener
=
urllib2.build_opener()
opener.add_handler(handler
=
handler2)
opener.addheaders
=
headers.items()
urllib2.install_opener(opener
=
opener)
page
=
opener.
open
(url)
print
page.url
####这里看到定向到登录页去了
html
=
page.read()
print
html
|