'''
爬取豌豆荚app数据
-请求url:
page1:
https: / /wwW . wandouj ia . com/wdjweb/api/ category/more?
catId=6001&subCatId=0&page=2&ctoken=vbw9lj1sRQsRddx0hD-XqCNF
'''
# 1.发送请求
import re
import requests
from bs4 import BeautifulSoup
def get_page(url):
requests.get(url)
# 2.解析数据
def parse_data(text):
soup = BeautifulSoup(text,'lxml')
print(soup)
li_list = soup.find_all(name = 'li',class_='card')
# print(li_list)
for li in li_list:
print(li)
print('tank' * 100 )
app_name = li.find(name = 'a',class_='name').text
print(app_name)
qpp_url = li.find(name = 'a',class_='name').attrs.get('href')
print(qpp_url)
download_num = li.find(name='span',class_='install-count').text
print(download_num)
app_size = li.find(name='span',attrs={"title":re._compile('\d+MB')}).text
print(app_size)
app_data ='''
游戏名称:{}
游戏地址:{}
下载人数:{}
游戏大小:{}
\n
'''.format(app_name,qpp_url,download_num,app_size)
print(app_data)
with open('wandoujia.text','a',encoding='utf-8') as f:
f.write(app_data)
f.flush()
if __name__ == '__main__':
for line in range(1,31):
url='https: / /wwW . wandouj ia . com/wdjweb/api/ category/more?catId=6001&subCatId=0&page=2{}&ctoken=vbw9lj1sRQsRddx0hD-XqCNF'.format(line)
print(url)
# 1.发送请求
# 往接口发送请求获取响应数据
response = get_page(url)
# print(response.text)
import json
# json.loads(response.text)
# print(type(response.jason()))
# print(type(response.text))
# print('tank' * 1000)
# 把json数据格式转换成python的字典
data = response.json()
# print(data['state'])
print(data.get('state').get('content'))
parse_data(type(response.text))