字符串和 dic list转换
import json
data = '[{"name":"张三","age":20},{"name":"李四","age":18}]'
print(data)
list_data = json.loads(data)
print(list_data)
list2 = [{"name": "张三", "age": 20}, {"name": "李四", "age": 18}]
data_json = json.dumps(list2)
print(data_json)
读取、写入json文件
import json
list2 = [{"name": "张三", "age": 20}, {"name": "李四", "age": 18}]
fp = open('02new.json', 'w')
json.dump(list2, fp)
fp.close()
fp = open('02new.json', 'r')
resulst = json.load(fp)
print(resulst)
json 中的数据 转换 成 csv文件
import json
import csv
json_fp = open('02new.json', 'r',encoding='utf-8')
csv_fp = open('03csv.csv', 'w',encoding='utf-8')
data_list = json.load(json_fp)
sheet_title = data_list[0].keys()
print(sheet_title)
sheet_data = []
for data in data_list:
sheet_data.append(data.values())
print(sheet_data)
writer = csv.writer(csv_fp)
writer.writerow(sheet_title)
writer.writerows(sheet_data)
json_fp.close()
csv_fp.close()
案例(电子书网站)
电子书网站地址
import requests
from lxml import etree
from bs4 import BeautifulSoup
import json
class BookSpider(object):
def __init__(self):
self.base_url = 'http://www.allitebooks.com/page/{}'
self.headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'}
self.data_list = []
def get_url_list(self):
url_list = []
for i in range(1, 10):
url = self.base_url.format(i)
url_list.append(url)
return url_list
def send_request(self, url):
data = requests.get(url, headers=self.headers).content.decode()
print(url)
return data
def parse_xpath_data(self, data):
parse_data = etree.HTML(data)
book_list = parse_data.xpath('//div[@class="main-content-inner clearfix"]/article')
for book in book_list:
book_dict = {}
book_dict['book_name'] = book.xpath('.//h2[@class="entry-title"]//text()')[0]
book_dict['book_img_url'] = book.xpath('div[@class="entry-thumbnail hover-thumb"]/a/img/@src')[0]
book_dict['book_author'] = book.xpath('.//h5[@class="entry-author"]//text()')[0]
book_dict['book_info'] = book.xpath('.//div[@class="entry-summary"]/p/text()')[0]
self.data_list.append(book_dict)
def parse_bs4_data(self, data):
bs4_data = BeautifulSoup(data, 'lxml')
book_list = bs4_data.select('article')
for book in book_list:
book_dict = {}
book_dict['book_name'] = book.select_one('.entry-title').get_text()
book_dict['book_img_url'] = book.select_one('.attachment-post-thumbnail').get('src')
book_dict['book_author'] = book.select_one('.entry-author').get_text()[3:]
book_dict['book_info'] = book.select_one('.entry-summary p').get_text()
print(book_dict)
self.data_list.append(book_dict)
def save_data(self):
json.dump(self.data_list, open("04book.json", 'w'))
def start(self):
url_list = self.get_url_list()
for url in url_list:
data = self.send_request(url)
self.parse_bs4_data(data)
self.save_data()
BookSpider().start()