同样是爬某网站的商品标题
urllib
url="http://category.dangdang.com/pg1-cid4008154.html"
import urllib.request
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.22 Safari/537.36 SE 2.X MetaSr 1.0",
"Content-Type":"application/javascript",
}
##############################################################
opener=urllib.request.build_opener()
headall=[]
for key,value in headers.items():
item=(key,value)
headall.append(item)
opener.addheaders=headall
urllib.request.install_opener(opener)
data=urllib.request.urlopen(url).read().decode("gbk")
##############################################################
from lxml import etree
html = etree.HTML(data)
href_list = html.xpath("//a[@name='itemlist-picture']/@title")
href_list
requests
url="http://category.dangdang.com/pg1-cid4008154.html"
import requests
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.22 Safari/537.36 SE 2.X MetaSr 1.0",
"Content-Type":"application/javascript",
}
##############################################################
r=requests.get(url,headers=headers)
html = r.content.decode('gbk')
##############################################################
from lxml import etree
html = etree.HTML(html)
href_list = html.xpath("//a[@name='itemlist-picture']/@title")
href_list[0:10]
输出
[' Lily冬粉紫韩版直筒中长款双面呢大衣羊毛大衣女',
' Lily春秋新款女装纯色直筒毛呢大衣中长款毛呢大衣117410F1556',
' 【1件3折!限时购!到手价:126元】裂帛2019春新款时尚圆领绣花休闲甜美风清新韩版学生长袖卫衣女',
' 【到手价449元】加厚女羽绒面包服秋冬季新款韩版学生小棉袄短款外套棉衣',
' 【到手价299元】百搭羊羔毛外套女新款小个子呢子韩版短款毛呢外套女学生',
' 【2.5折价:79.75元/叠加40元券】茵曼秋装新款连帽印花抽绳宽松休闲运动长袖卫衣女【1883082079】',
' Lily春新款女装商务通勤OL深宝蓝拉链毛呢外套117420F1543',
' 【领券下单立减120元】Amii韩版ins高领加厚加绒卫衣女2019秋冬新款宽松印花长袖上衣',
' 【明星同款】Lily2018冬新款直筒人字纹双面呢长款大衣118419F1901',
' 【到手价149元】长袖运动服女新款韩版学生宽松时尚bf短款拉夏贝尔情侣外套']