使用python连接clickhouse并发送邮件
1. 概览
需求:给对应的广告商发送前一天的投放的广告数据
选择:说实话是因为自己java太差,感觉python做这个会简单一点,所以选了它
遇到的问题:
1.要在不影响线上python环境的前提下,安装自己所需要的各种模块
2.使用virtualenv虚拟python环境遇到的timeout的问题
3.模块的选择
4.发送给各个广告商的邮件的正式化(就是发送的模板好看一点)
2. 解决问题
-
最初真的不知道何为自己虚拟一个python环境来搞,还是同事大神告诉的用virtualenv来虚拟出来一个自己的python环境
1) 使用pip 安装 virtualenv
pip install virtualenv
2) 创建指定的目录
mkdir myproject cd myproject
3) 创建一个独立的运行环境
virtualenv myenv
4) 新建的python的环境就在当前目录下的myenv目录,用source进入该环境
source ./myenv/bin/activate
5) 退出虚拟环境
deactivate
-
刚开始遇到这个问题,真的无从下手,还是求助同事,pip换源
临时使用:切换成阿里云的镜像
pip install -i http://mirrors.aliyun.com/pypi/simple/tensorflow
永久使用:linux下,修改~/.pip/pip.conf(没有就创建一个)
[global] timeout = 60000 index-url = https://pypi.tuna.tsinghua.edu.cn/simple
-
使用到的所有的python模块
(发送邮件选取的模块还可以选择smtplib,不选这个的原因是我的虚拟环境找不到对应的包)
datetime 获取时间 os 调用系统命令 sys 系统输入输出 pandas 处理数据成xlsx,并添加表头 xlrd(刚开始我以为读取的是xlsx文件) 读取xlsx文件 clickhouse_driver 连接clickhouse exchangelib 发送邮件的模块 exchangelib.protocol 绕过ssl证书验证 -
代码奉上
#!/usr/bin/env python # coding:utf-8 import datetime import os import sys import pandas as pd import xlrd from clickhouse_driver import Client from exchangelib import Account, FileAttachment, Message, Mailbox, Configuration, Credentials, DELEGATE, HTMLBody from exchangelib.protocol import BaseProtocol, NoVerifyHTTPAdapter BaseProtocol.HTTP_ADAPTER_CLS = NoVerifyHTTPAdapter class ProxyAdData: def __init__(self): reload(sys) sys.setdefaultencoding('utf-8') # 初始化clickhouse连接 self.client = Client(host='127.0.0.1', port=9008, password='password') # 刚开始以为是读取xlsx文件获取对应信息,然后被喷,改为读取txt self.workbook = xlrd.open_workbook('./媒体邮件信息.xlsx', encoding_override='UTF-8').sheet_by_index(0) # 获取前一天的日期 self.date = str(datetime.date.today() + datetime.timedelta(-1)) self.game = {'game':'游戏'} # 对应的广告商 self.channel = {'channel':'渠道'} # 输出xlsx文件的表头 self.columns = ['column1','column2'] self.column_names = ['列名1','列名2'] # 为了保存历史数据 os.popen('mkdir -p ./data/%(dt)s' % {'dt': self.date}) def run(self): for line in open('./proxy.txt', 'r'): game, proxy, email, frequency, channel, ctype, aid, cid_name = line.strip('\n').split('\t') # 拼接sql sql = self.connect_sql(game, str(channel), str(ctype).split('.')[0], str(aid).split('.')[0], str(cid_name).split('.')[0]) print(sql) # 执行sql data = self.exec_sql(sql) if len(data) == 0: continue print(data) # excel_name excel_name = '%(proxy)s_%(channel)s_%(ctype)s.xlsx' % {'proxy': proxy, 'channel': channel, 'ctype': ctype} # 生成excel self.create_xlsx(data, excel_name) # 发送邮件 for pre_email in email.split(','): self.send_email(excel_name, game, channel, pre_email) def connect_sql(self, game, channel, ctype, aid, cid_name): channel_name = channel sql = 'select \'%(channel_name)s\',%(column)s from data01.ad_base where game = %(game)s and clicktime = %(date)s and channel = %(channel)s and ctype = %(ctype)s ' % { 'channel_name': channel_name, 'column': ','.join(self.columns), 'game': "'" + game + "'", 'date': "'" + self.date + "'", 'channel': "'" + self.channel[channel] + "'", 'ctype': "'" + ctype + "'"} if aid != '' or len(aid) != 0: sql += ' and aid = %(aid)s' % {'aid': "'" + aid + "'"} if cid_name != '' or len(cid_name) != 0: sql += ' and cid_name = %(cid_name)s ' % {'cid_name': "'" + cid_name + "'"} return sql def exec_sql(self, sql): return self.client.execute(sql) def create_xlsx(self, data, excel_name): dataf = pd.DataFrame(data) dataf.columns = self.column_names print(dataf) # 生成excel writer = pd.ExcelWriter('./data/' + self.date + '/' + excel_name) dataf.to_excel(writer, sheet_name='ad_data') writer.save() writer.close() def send_email(self, file, game, channel, to_email): credentials = Credentials('邮箱用户名', '邮箱密码') config = Configuration(credentials=credentials, server='邮箱服务器') a = Account(primary_smtp_address='邮箱用户名', access_type=DELEGATE, autodiscover=False, config=config) m = Message( account=a, subject='【%(game)s】【%(channel)s】%(dt)s 广告数据' % {'game': self.game[game], 'channel': channel, 'dt': self.date}, # 为了对外好看一点,所以回复的邮件正文是html的 body=HTMLBody(), to_recipients=[ Mailbox(email_address=to_email), ], # 抄送 cc_recipients=[''] ) with open(os.path.abspath(r"./data/" + self.date + "/" + file), "rb") as f: cont = f.read() attchF = FileAttachment(name=file, content=cont) m.attach(attchF) m.send_and_save() if __name__ == '__main__': pad = ProxyAdData() pad.run()
由衷感谢陪我成长的西米先生,手动笔芯