from pymongo importMongoClientimportasyncioimportxlwtimportjsonclassMongodb_Transfer_Excel():def __init__(self, db_name, table_name, ip='127.0.0.1', port=27017, excel_format=None, mongodb_type=None):""":param db_name: 数据库名
:param table_name: 数据表名
:param ip: IP
:param port: 端口
:param excel_format: 数据需求的字段:{'content': 0, 'field': 1}"""self.ip=ip
self.port=port
self.db_name=db_name
self.table_name=table_name
self.excel_format= excel_format #excel_format如:{'content': 0, 'field': 1}
self.wbk =xlwt.Workbook()
self.sheet=self.wbk.add_sheet(self.table_name)
self.mongodb_type=mongodb_type
self.loop=asyncio.get_event_loop()defdb_conn(self):"""创建数据库连接
:return:"""conn=MongoClient(self.ip, self.port)
db= conn[self.db_name] #连接mydb数据库,没有则自动创建
conn = db[self.table_name] #使用test_set集合,没有则自动创建
returnconndeffind_data(self):"""获取mongdb数据
:return:"""rows=self.db_conn().find()returnrowsdefcreate_excel(self):"""根据self.excel_format生成execl
:return:"""
ifself.excel_format:
excel_format=self.excel_formatelse:
excel_format= {'content': 0, 'content1': 1, 'title': 2, "weixin_code": 3, "weixin_name": 4, "type": 5, 'pubtime': 6}
self.excel_format=excel_formatfor key, value inexcel_format.items():
self.sheet.write(0, value, key)
self.wbk.save('{}.xls'.format(self.table_name))
asyncdefparse_rows(self, i, row):
dic=dict()
dic['cloumn'] =iif 'user' inrow:
user_info= row.get('user')if 'description' inuser_info:
dic['description'] = user_info['description']if 'screenName' inuser_info:
dic['screenName'] = user_info['screenName']if 'result' inrow:
result= row['result']ifisinstance(result, str):
result=json.loads(result)
content= result.get('content').replace('\n', '')if len(content) > 2000:
dic['content'] = content[0: 2000]
dic['conten1'] = content[2000: ]else:
dic['content'] =contentif 'title' inresult:
dic['title'] = result.get('title')if 'event' inresult:
event= result.get('event')ifisinstance(event, str):
event=json.loads(event)if 'weixin_code' inevent:
dic['weixin_code'] = event.get('weixin_code')if 'weixin_name' inevent:
dic['weixin_name'] = event.get('weixin_name')if 'type' inevent:
dic['type'] = event.get('type')if 'url' inevent:
dic['url'] = event.get('url')if 'pubtime' inresult:
dic['pubtime'] = result.get('pubtime')
await asyncio.sleep(1)
self.parse_dic(dic)defparse_dic(self, dic):for key, value indic.items():if key inself.excel_format:#print(key)
self.write_excel(key, value, dic['cloumn'])defwrite_excel(self, key, value, columns):"""写入数据
:param dic: 数据字典
:param columns: 插入excel的行
:return:"""self.sheet.write(columns, int(self.excel_format.get(key)), value)defsave_excel(self):
self.wbk.save('{}.xls'.format(self.table_name))defrun(self):
self.create_excel()
rows=self.find_data()
tasks= [self.parse_rows(i + 1, row) for i, row inenumerate(rows)]
self.loop.run_until_complete(asyncio.wait(tasks))
self.loop.close()
self.save_excel()if __name__ == '__main__':
excel_format= {} #指定excel文件格式如:{'content': 0, 'field': 1}
mongodb_type = 'weibo' #或者man_sheng_huo
obj = Mongodb_Transfer_Excel(db_name='db_name', table_name='table_name', mongodb_type=mongodb_type,
excel_format=excel_format)
obj.run()