Python爬取的数据存为json文件，并读取

最新推荐文章于 2025-03-31 10:56:03 发布

Python_QB

最新推荐文章于 2025-03-31 10:56:03 发布

阅读量4.4k

点赞数 6

文章标签： python json chrome

本文链接：https://blog.csdn.net/qq_33267306/article/details/122428057

版权

Python爬取的数据存为json文件，并读取

import requests
import time
from lxml import etree
import json


def json_data_save(url):
    headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
                }
        
    html = requests.get(url,headers = headers).content.decode('gbk')
    data = etree.HTML(html)
    lists = data.xpath('//ul[@class="bigimg"]/li')
    dicts = {}
    names = []
    athors = []
    publishs = []
    publish_times = []
    prices = []
    for lis in lists:
        
        name = lis.xpath('./a/@title')[0]
        names.append(name)
        athor = lis.xpath('./p[@class="search_book_author"]/span/a/@title')[0]
        athors.append(athor)
        publish = lis.xpath('./p[@class="search_book_author"]/span/a/@title')[-1]
        publishs.append(publish)
        publish_time = lis.xpath('./p[@class="search_book_author"]/span/text()')[-2].split('/')[-1]
        publish_times.append(publish_time)
        price = lis.xpath('./p[@class="price"]/span[@class="search_now_price"]/text()')[0].split('¥')[-1]
        prices.append(price)
        
    dicts['name'] = names
    dicts['athor'] = athors
    dicts['publish'] = publishs
    dicts['publish_time'] = publish_times
    dicts['price'] = prices
    # 保存数据为json格式
    try:
        with open('a.json','a',encoding="utf-8") as f:
            f.write(json.dumps(dicts,ensure_ascii=False)) #ensure_ascii=False，则返回值可以包含非ascii值
    except IOError as e:
        print(str(e))
      
    finally:
        f.close()
        
def open_json(path):
    try:
        with open(path,'r',encoding='utf-8') as f:
            js_data = json.load(f)
            print('这是json数据：',js_data)
            print('这是数据类型：', type(js_data))
    except Exception as e:
        print(str(e))
    finally:
        f.close()
        
if __name__ == '__main__':
    url = 'http://search.dangdang.com/?key=%D0%A1%CD%F5%D7%D3&act=input&page_index=1'
    json_data_save(url)
    time.sleep(1)
    path = 'a.json'
    open_json(path)