目录
前言
python基础代码:爬虫爬取什么值得买网站手机折扣信息
一、爬取网站
二、使用步骤
1.引入库
代码如下(示例):
from selenium import webdriver from lxml import etree import time import requests
2.读入数据
代码如下(部分):
def doWork():
for i in range(1,3):
mainurl = 'https://www.smzdm.com/fenlei/zhinengshouji/p{}/#feed-main'.format(i)
print(mainurl)
webdriverFunc(url = mainurl)
if __name__ == '__main__':
doWork()
3.解析源码
def webdriverFunc(url):
'''该函数功能:请求网址,获取网页源码'''
#初始化浏览器,设置Chrome浏览器
browser = webdriver.Chrome()
#设置全屏
browser.maximize_window()
#打开url
URL = url
browser.get(URL)
#获取网页源码,并解析
html = etree.HTML(browser.page_source)
#利用xpath解析源码
mobile_picture = html.xpath("//*[@id='feed-main-list']/li/div/div/a/img/@src")
title = html.xpath("//*[@id='feed-main-list']/li/div/div/h5/a/text()")
price = html.xpath("//*[@id='feed-main-list']/li/div/div/div[@class='z-highlight']/a/text()")
shopping_name = html.xpath("//*[@id='feed-main-list']/li/div/div/div/div[2]/span/a/text()")
#处理列表数据
price = [ p.strip() for p in price] #去除列表元素首尾空白
price = [element for element in price if element !=''] #去除空白元素
shopping_name = [ shop.strip() for shop in shopping_name] #去除列表元素首尾空白
4.打印信息
def printCar(title,mobile_picture,price,shopping_name):
'''该方法功能:打印信息'''
global i
for t,pic,p,shoppingname in zip(title,mobile_picture,price,shopping_name):
print('{}.标题:{}'.format(i,t))
print('图片地址:https:{}'.format(pic))
print('价格(单位:元):{}'.format(p))
print('信息来源:{}'.format(shoppingname))
print('----------------------------------------')
#【2,下载图片】
downloadPic('{}.标题:{}'.format(i,t),'https:{}'.format(pic))
#计数器自增加1
i += 1
5.下载图片
def downloadPic(title,url):
'''该方法功能:下载图片到本地'''
# 图片下载的路径
pic_path = 'D:\\py_project\\picture\\什么值得买手机折扣信息\\'
#图片文件名
file = pic_path + '\\%s.gif' % (title)
response = requests.get(url)
img = response.content
#下载图片
with open(file, 'wb') as f:
f.write(img)
time.sleep(1)
6.完整代码如下
# 我们以什么值得买网站查询手机折扣信息
from selenium import webdriver
from lxml import etree
import time
import requests
#计数器
i = 1
def webdriverFunc(url):
'''该函数功能:请求网址,获取网页源码'''
#初始化浏览器,设置Chrome浏览器
browser = webdriver.Chrome()
#设置全屏
browser.maximize_window()
#打开url
URL = url
browser.get(URL)
#获取网页源码,并解析
html = etree.HTML(browser.page_source)
#利用xpath解析源码
mobile_picture = html.xpath("//*[@id='feed-main-list']/li/div/div/a/img/@src")
title = html.xpath("//*[@id='feed-main-list']/li/div/div/h5/a/text()")
price = html.xpath("//*[@id='feed-main-list']/li/div/div/div[@class='z-highlight']/a/text()")
shopping_name = html.xpath("//*[@id='feed-main-list']/li/div/div/div/div[2]/span/a/text()")
#处理列表数据
price = [ p.strip() for p in price] #去除列表元素首尾空白
price = [element for element in price if element !=''] #去除空白元素
shopping_name = [ shop.strip() for shop in shopping_name] #去除列表元素首尾空白
#初步核验数据
print(mobile_picture)
print(title)
print(price)
print(shopping_name)
print(len(mobile_picture),len(title),len(price),len(shopping_name))
# 【1,遍历实例】
printCar(title,mobile_picture,price,shopping_name)
#关闭浏览器
browser.close()
def printCar(title,mobile_picture,price,shopping_name):
'''该方法功能:打印信息'''
global i
for t,pic,p,shoppingname in zip(title,mobile_picture,price,shopping_name):
print('{}.标题:{}'.format(i,t))
print('图片地址:https:{}'.format(pic))
print('价格(单位:元):{}'.format(p))
print('信息来源:{}'.format(shoppingname))
print('----------------------------------------')
#【2,下载图片】
downloadPic('{}.标题:{}'.format(i,t),'https:{}'.format(pic))
#计数器自增加1
i += 1
def downloadPic(title,url):
'''该方法功能:下载图片到本地'''
# 图片下载的路径
pic_path = 'D:\\py_project\\picture\\什么值得买手机折扣信息\\'
#图片文件名
file = pic_path + '\\%s.gif' % (title)
response = requests.get(url)
img = response.content
#下载图片