//(第三方库可以用清华的镜像网站安装:Simple Index) import json import requests import pandas as pd title = [] type1 = [] unit = [] content = [] protect_unit = [] rx_time = [] for i in range(1,307): print('爬取第{}页'.format(i)) recommed_url = 'http://www.ihchina.cn/Article/Index/getProject.html?province=&rx_time=&type=&cate=&keywords=&category_id=16&limit=10&p={0}'.format(i) try: response = requests.get(url=recommed_url) response.raise_for_status() # 如果请求不成功,会抛出异常 diss_dict = response.json() for item in diss_dict['list']: title.append(item['title']) # 名称 type1.append(item['type']) # 类型 unit.append(item['unit']) # 申报地区或单位 protect_unit.append(item['protect_unit']) # 保护单位 content.append(item['content']) # 内容 rx_time.append(item['rx_time']) # 时间 except requests.exceptions.RequestException as e: print(f"请求第 {i} 页时出现问题: {e}") data = { '名称': title, '类型': type1, '申报地区或单位': unit, '保护单位': protect_unit, '内容': content, '时间': rx_time } df = pd.DataFrame(data) try: with pd.ExcelWriter('国家级非物质文化遗产.xlsx', engine='xlsxwriter') as writer: df.to_excel(writer, index=False, sheet_name='Sheet1') print("数据已成功保存到 Excel 文件") except Exception as e: print(f"保存到 Excel 文件时出现问题: {e}")
Python 爬取国家级非物质文化遗产代表性项目名录代码(https://www.ihchina.cn/)
于 2024-06-20 21:59:59 首次发布