网络爬虫:post和get方法爬取人民邮电书籍基本信息
get方法获取基本信息(主要获取bookId)
def getList():
url = 'https://www.ptpress.com.cn/recommendBook/getRecommendBookListForPortal?bookTagId=828bebbb-af9f-401f-a1fa-42551fc0866a'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
reponse = requests.get(url=url, headers=headers)
paper = reponse.json()
lists = paper['data']
return lists
通过bookId,获取书籍价格
def getPrice(lists):
Price = []
url1 = 'https://www.ptpress.com.cn/bookinfo/getBookDetailsById'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
for y in range(len(lists)):
params = {
'bookId': lists[y]['bookId']
}
reponses = requests.get(url=url1, params=params, headers=headers)
papers = reponses.json()
Price.append(papers['data'])
return Price
通过bookId,获取ISbn
def getIsbn(lists):
isbn = []
url1 = 'https://www.ptpress.com.cn/bookinfo/getBookDetailsById'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
for y in range(len(lists)):
params = {
'bookId': lists[y]['bookId']
}
reponses = requests.post(url=url1, params=params, headers=headers)
papers = reponses.json()
isbn.append(papers['data']['photos'][1]['isbn'])
return isbn
通过Isbn获取 库存
def getSave(lists):
save = []
url1 = 'https://www.ptpress.com.cn/order/getBookSaleStock'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
for y in range(len(lists)):
params = {
'isbn': lists[y]
}
reponses = requests.post(url=url1, params=params, headers=headers)
papers = reponses.json()
save.append(papers)
return save
将基本信息存入excel表格
def save(list,Price,saves):
work_book = xlwt.Workbook(encoding='utf-8')
sheet = work_book.add_sheet('python_word')
heads = ['bookName','Price','save']
for i in range(len(heads)):
sheet.write(0, i, heads[i])
j = 1
for item in list:
sheet.write(j, 0, item['bookName'])
j += 1
j = 1
for item in Price:
sheet.write(j, 1, item['discountPrice'])
j += 1
j = 1
for item in saves:
sheet.write(j, 2, item['data'])
j += 1
work_book.save('./电子类新书价格库存信息表.xlsx')
完整代码
import requests
import json
import xlwt
def getList():
url = 'https://www.ptpress.com.cn/recommendBook/getRecommendBookListForPortal?bookTagId=828bebbb-af9f-401f-a1fa-42551fc0866a'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
reponse = requests.get(url=url, headers=headers)
paper = reponse.json()
lists = paper['data']
return lists
def getPrice(lists):
Price = []
url1 = 'https://www.ptpress.com.cn/bookinfo/getBookDetailsById'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
for y in range(len(lists)):
params = {
'bookId': lists[y]['bookId']
}
reponses = requests.get(url=url1, params=params, headers=headers)
papers = reponses.json()
Price.append(papers['data'])
return Price
def getIsbn(lists):
isbn = []
url1 = 'https://www.ptpress.com.cn/bookinfo/getBookDetailsById'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
for y in range(len(lists)):
params = {
'bookId': lists[y]['bookId']
}
reponses = requests.post(url=url1, params=params, headers=headers)
papers = reponses.json()
isbn.append(papers['data']['photos'][1]['isbn'])
return isbn
def getSave(lists):
save = []
url1 = 'https://www.ptpress.com.cn/order/getBookSaleStock'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
for y in range(len(lists)):
params = {
'isbn': lists[y]
}
reponses = requests.post(url=url1, params=params, headers=headers)
papers = reponses.json()
save.append(papers)
return save
def save(list,Price,saves):
work_book = xlwt.Workbook(encoding='utf-8')
sheet = work_book.add_sheet('python_word')
heads = ['bookName','Price','save']
for i in range(len(heads)):
sheet.write(0, i, heads[i])
j = 1
for item in list:
sheet.write(j, 0, item['bookName'])
j += 1
j = 1
for item in Price:
sheet.write(j, 1, item['discountPrice'])
j += 1
j = 1
for item in saves:
sheet.write(j, 2, item['data'])
j += 1
work_book.save('./电子类新书价格库存信息表.xlsx')
list1 = getList()
Price = getPrice(list1)
Isbn = getIsbn(list1)
saves = getSave(Isbn)
save(list1,Price,saves)