这个熟悉吗?
对,这就是小米官网首页热卖的东西,想不想看看都有啥,来,我的代码满足你
代码展示
from lxml import etree
import urllib3.request
import pandas as pd
urllib3.disable_warnings()
# 生成待解析的对象
def getTree(url):
pool_manager = urllib3.PoolManager()
response = pool_manager.request('GET', url)
r = response.data.decode()
return etree.HTML(r)
def getInfo(url):
etree = getTree(url)
# 得到每一个模块
block = etree.xpath('//li[@class="category-item"]')
print(len(block))
specs = block[0].xpath('//a[@class="title"]/text()')
temp =[]
for item in specs:
item1 = item.strip()
temp.append(item1)
specs_final = temp[::2]
print(specs_final)
names = []
lens = []
for bk in block:
# 得到每个版块的ul中的li
lis = bk.xpath('div[@class="children clearfix"]/ul[@class="children-list clearix"]/li/a/span[@class="text"]/text()')
# print(lis)
names.append(lis)
lens.append(len(lis))
data = {'specs_final':specs_final,'lens':lens,'names':names}
# 数据框处理
df = pd.DataFrame(data)
df.to_excel('midata.xls')
print('finished!')
url ="https://www.mi.com/"
if __name__ == '__main__':
getInfo(url)