统计各地门店的数量
import requests
resp = requests.get(
'https://www.apple.com.cn/retail/storelist/',
headers={'user-agent':'Mozilla/5.0'}
)
if resp.status_code == 200:
print('访问成功')
通过Xpath来分析
通过Xpath来定位到想要的位置
from lxml import etree
tree = etree.HTML(resp.text)
prevs = tree.xpath("//div[@class='address-lines']/span/text()")
#address = tree.xpath('//*[@id="accordion-region-上海"]/div/div[1]/div/span/a')
data = dict()
for prev in prevs:
if prev != ',' and prev != ' ':
if not prev in data.keys():
data[prev] = 1
else:
data[prev] += 1
print(data)
for key,value in data.items():
print(key,value)
使门店数量可视化
import pandas as pd
import csv
Apple_store = {'上海': 7, '昆明': 1, '北京': 5, '成都': 2, '天津': 3, '济南': 1, '青岛': 1, '广州': 2, '深圳': 1, '南宁': 1, '南京': 3, '无锡': 1, '苏州': 1, '郑州': 1, '宁波': 1, '杭州': 2, '武汉': 1, '长沙': 1, '厦门': 1, '福州': 1, '大连': 2, '沈阳': 2, '重庆': 3}
with open('Apple_store.csv','w',newline='',encoding='utf-8') as f:
writer = csv.writer(f)
for row in Apple_store.items():
writer.writerow(row)
#给csv文件添加表头
df = pd.read_csv('Apple_store.csv',header=None,names=['区域','数量'])
df.to_csv('Apple_store.csv',index=False)
import pandas as pd
df = pd.read_csv('Apple_store.csv',encoding="utf-8")
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
df.plot(
kind = 'bar',
x = '区域',
y = '数量',
title = '苹果商店分布',
)
plt.show()
import numpy as np
import pandas as pd
data = pd.read_csv('Apple_store.csv')
data = data.groupby('区域').sum()['数量'].sort_values(ascending=False)
# print(data.index)
# print(data.values)
#制作饼图
plt.pie(data.values,labels=data.index,autopct='%d')
plt.legend(data.index)
plt.show()
图片爬取
通过xpath来定位得到图片的链接,将两个链接结合,通过向浏览器发送请求,就得到图片
img_number = tree.xpath("//div//span//a/@data-store-number")
#print(img_number)
url1 = 'https://rtlimages.apple.com/cmc/dieter/store/16_9/'
url2 = '.png?resize=375:211&output-format=jpg&output-quality=85&interpolation=progressive-bicubic'
img_address = tree.xpath('//div//address/text()[1]')
# print(img_address)
#将图片的网址组合起来
# 要将url变成列表,方便遍历
image_url = [url1+str(i)+url2 for i in img_number]
# print(image_url)
for url,address in zip(image_url,img_address):
print(url,address)
path='D://jupyter_study/爬虫/apple_store image//'
for url,address in zip(image_url,img_address):
url = url
title = address+'.jpg'
#通过向网页再次返送访问请求
response = requests.get(url=url)
with open(path+title,'wb') as f:
f.write(response.content)
print('下载成功'+title)
path='D://jupyter_study/爬虫/apple_store image//'
for url,address in zip(image_url,img_address):
url = url
title = address+'.jpg'
#通过向网页再次返送访问请求
response = requests.get(url=url)
with open(path,'wb') as f:
f.write(response.content)
print('下载成功'+title)