网络爬虫:post和getf方法爬取人民邮电书籍基本信息并存入excel文件

网络爬虫:post和get方法爬取人民邮电书籍基本信息

get方法获取基本信息(主要获取bookId)
def getList():
    url = 'https://www.ptpress.com.cn/recommendBook/getRecommendBookListForPortal?bookTagId=828bebbb-af9f-401f-a1fa-42551fc0866a'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'

    }
    reponse = requests.get(url=url, headers=headers)
    paper = reponse.json()
    lists = paper['data']
    return lists
通过bookId,获取书籍价格
def getPrice(lists):
    Price = []
    url1 = 'https://www.ptpress.com.cn/bookinfo/getBookDetailsById'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    for y in range(len(lists)):
        params = {
            'bookId': lists[y]['bookId']
        }
        reponses = requests.get(url=url1, params=params, headers=headers)
        papers = reponses.json()
        Price.append(papers['data'])
    return Price
通过bookId,获取ISbn
def getIsbn(lists):
    isbn = []
    url1 = 'https://www.ptpress.com.cn/bookinfo/getBookDetailsById'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    for y in range(len(lists)):
        params = {
            'bookId': lists[y]['bookId']
        }
        reponses = requests.post(url=url1, params=params, headers=headers)
        papers = reponses.json()
        isbn.append(papers['data']['photos'][1]['isbn'])
    return isbn
通过Isbn获取 库存
# 获取库存
def getSave(lists):
    save = []
    url1 = 'https://www.ptpress.com.cn/order/getBookSaleStock'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    for y in range(len(lists)):
        params = {
            'isbn': lists[y]
        }
        reponses = requests.post(url=url1, params=params, headers=headers)
        papers = reponses.json()
        save.append(papers)
    return save

将基本信息存入excel表格
def save(list,Price,saves):
    work_book = xlwt.Workbook(encoding='utf-8')
    sheet = work_book.add_sheet('python_word')
    heads = ['bookName','Price','save']
    for i in range(len(heads)):
        sheet.write(0, i, heads[i])
    j = 1  # 从第二行开始
    for item in list:
        sheet.write(j, 0, item['bookName'])  # 第二行,第一列
        j += 1
    j = 1
    for item in Price:
        sheet.write(j, 1, item['discountPrice'])  # 第二行,第二列
        j += 1
    j = 1
    for item in saves:
        sheet.write(j, 2, item['data'])  # 第二行,第二列
        j += 1

        # 5,保存文件
    work_book.save('./电子类新书价格库存信息表.xlsx')
完整代码
import requests
import json
import xlwt
# 获取信息
def getList():
    url = 'https://www.ptpress.com.cn/recommendBook/getRecommendBookListForPortal?bookTagId=828bebbb-af9f-401f-a1fa-42551fc0866a'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'

    }
    reponse = requests.get(url=url, headers=headers)
    paper = reponse.json()
    lists = paper['data']
    return lists


# 获取价格
def getPrice(lists):
    Price = []
    url1 = 'https://www.ptpress.com.cn/bookinfo/getBookDetailsById'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    for y in range(len(lists)):
        params = {
            'bookId': lists[y]['bookId']
        }
        reponses = requests.get(url=url1, params=params, headers=headers)
        papers = reponses.json()
        Price.append(papers['data'])
    return Price

# 获取ISbn
def getIsbn(lists):
    isbn = []
    url1 = 'https://www.ptpress.com.cn/bookinfo/getBookDetailsById'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    for y in range(len(lists)):
        params = {
            'bookId': lists[y]['bookId']
        }
        reponses = requests.post(url=url1, params=params, headers=headers)
        papers = reponses.json()
        isbn.append(papers['data']['photos'][1]['isbn'])
    return isbn

# 获取库存
def getSave(lists):
    save = []
    url1 = 'https://www.ptpress.com.cn/order/getBookSaleStock'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    for y in range(len(lists)):
        params = {
            'isbn': lists[y]
        }
        reponses = requests.post(url=url1, params=params, headers=headers)
        papers = reponses.json()
        save.append(papers)
    return save

#保存
def save(list,Price,saves):
    work_book = xlwt.Workbook(encoding='utf-8')
    sheet = work_book.add_sheet('python_word')
    heads = ['bookName','Price','save']
    for i in range(len(heads)):
        sheet.write(0, i, heads[i])
    j = 1  # 从第二行开始
    for item in list:
        sheet.write(j, 0, item['bookName'])  # 第二行,第一列
        j += 1
    j = 1
    for item in Price:
        sheet.write(j, 1, item['discountPrice'])  # 第二行,第二列
        j += 1
    j = 1
    for item in saves:
        sheet.write(j, 2, item['data'])  # 第二行,第二列
        j += 1

        # 5,保存文件
    work_book.save('./电子类新书价格库存信息表.xlsx')

# 获取 信息列表  bookid
list1 = getList()
# 获取价格
Price = getPrice(list1)
# 获取isbon
Isbn = getIsbn(list1)
# 获取库存
saves = getSave(Isbn)
# 保存文件
save(list1,Price,saves)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值