数据用户名有 ’ \ /还有空格的,因为不会python操作sql数据库,直接现学了MySQLdb,入库时过滤做不好,直接丢弃了这些特殊名称的数据了。
import json
import MySQLdb
import pymysql
conn = MySQLdb.connect(host='127.0.0.1', user='root',passwd='root',db='personal_information',port=3306,charset = 'utf8')
# 连接数据库
cursor=conn.cursor()
# 创建游标
wenjianbianhao = 1
# 设置 文件名变量
while True:
with open(r'E:\yidaiogongyu\dingdan\HYL'+str(wenjianbianhao)+'.txt','r',encoding='utf-8') as f:
a = f.read()
# 读取之前爬下来的用户数据
b = json.loads(a)
# 因为爬的json数据,所以转为字典
for i in b['data']['list'] : # 遍历有用的数据
sql_insert = 'insert into yidaigongyu_dingdan (order_id,apartment_id,vip_id,trade_code,begin_date,end_date,remarks,customer_name,customer_phone,deposit,count,create_time,update_time,is_custom_price,apartment_name,total_price,pay_price,refund_price,vip_name,vip_phone,sales_id,sales_name,team_id,vip_level) values ("{0}","{1}","{2}","{3}","{4}","{5}","{6}","{7}","{8}","{9}","{10}","{11}","{12}","{13}","{14}","{15}","{16}","{17}","{18}","{19}","{20}","{21}","{22}","{23}")'.format(i['order_id'],i['apartment_id'],i['vip_id'],i['trade_code'],i['begin_date'],i['end_date'],i['remarks'],i['customer_name'],i['customer_phone'],i['deposit'],i['count'],i['create_time'],i['update_time'],i['is_custom_price'],i['apartment_name'],i['total_price'],i['pay_price'],i['refund_price'],i['vip_name'],i['vip_phone'],i['sales_id'],i['sales_name'],i['team_id'],i['vip_level'])
# 拼接SQL语句
# print(sql_insert)
# 查看准备执行的SQL语句
try:
cursor.execute(sql_insert)
except:
print('发送错误',sql_insert)
# 输出错误SQL,大部分是带特殊符号,因为没过滤
f.close()
# 当前文件插入完成后关闭
print('文件:',wenjianbianhao,'插入完成')
# 输出插入完成好的文件
wenjianbianhao += 1
# 加 1 执行下一个文件,因为爬的时候,一些反爬等 所以分了几千个文件保存
if wenjianbianhao == 8109:
break
print('全部执行完成')