目录
背景:dataworks 下载只能下载10000条数据,如果表大小超出大小,需要limit 几次,或者导出到数据库,再从数据库中进行下载。 pyodps 可以通过python代码的方式操作表,这样就可以将表数据下载到文件,之后将文件通过邮件的方式发送给自己(这里还可以用来所pyodps数据推送,集成在dataworks上)
邮件推送csv格式文件
from odps import ODPS
import csv
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
import smtplib
#将表查询数据写入到当前文件中
fileName = 'dim_merchandise_sku_info.csv'
o=ODPS('access_id','access_key','project_name',endpoint='http://service.cn-hangzhou.maxcompute.aliyun.com/api') # 对应odps配置
tableSchema=o.get_table("dim_merchandise_sku_info").schema
head=list(map(lambda x: x.comment, tableSchema.columns)) #用表注释作为csv的head
data=[]
reader=o.execute_sql(' select * from dim_merchandise_sku_info ').open_reader(tunnel=True, limit=False)
for record in reader:
tmp_value=[]
for name in tableSchema.names:
tmp_value.append(record[name])
data.append(tmp_value)
with open(fileName,"w+",encoding="utf-8",newline='') as f:
csvf = csv.writer(f)
csvf.writerow(head)
csvf.writerows(data)
## 发送邮件
mail_host = 'smtp.163.com'
mail_username = '18612483486@163.com'
mail_password = '1111'
mail_sender = '18612483486@163.com'
mail_receivers = ['xxxxxx'] ##收件人邮箱地址
mail_content="" ##邮件内容
message = MIMEMultipart()
message['Subject'] = 'mail test'
message['From'] = mail_sender
message['To'] = mail_receivers[0]
file_name='aa.csv'
# 构造附件,传送当前目录下的 csv 文件
part = MIMEApplication(open(fileName, 'rb').read())
part.add_header('Content-Disposition', 'attachment', filename=file_name)
message.attach(part)
try:
smtpObj = smtplib.SMTP_SSL(mail_host+':465')
smtpObj.login(mail_username,mail_password)
smtpObj.sendmail(
mail_sender,mail_receivers,message.as_string())
smtpObj.quit()
print('mail send success')
except smtplib.SMTPException as e:
print('mail send error',e)
邮件推送xlsx文件
xlsx 推送和csv有点区别,需要引入第三方包xlsxwriter, 需要有独享资源组,操作流程如下
import smtplib
from email.mime.application import MIMEApplication
from email.mime.multipart import MIMEMultipart
import openpyxl as op
from odps import ODPS
import xlsxwriter # 导入模块
import re
# 从maxcompute 下载数据
def download_file(table_name,file_name,head_is_comment):
o=ODPS('access_id','access_key','project_name',endpoint='http://service.cn-hangzhou.maxcompute.aliyun.com/api') # 对应odps配置
tableSchema = o.get_table(table_name).schema
head = list(map(lambda x: x.comment, tableSchema.columns)) if head_is_comment else tableSchema.names
data = []
reader = o.execute_sql(' select * from project_name.'+table_name).open_reader(tunnel=True, limit=False)
for record in reader:
tmp_value = []
for name in tableSchema.names:
if isinstance(record[name],str ):
ILLEGAL_CHARACTERS_RE = re.compile(r'[\000-\010]|[\013-\014]|[\016-\037]')
tmp_value.append(ILLEGAL_CHARACTERS_RE.sub(r'', record[name]))
else:
tmp_value.append(record[name])
data.append(tmp_value)
workbook = xlsxwriter.Workbook(file_name) # 新建excel表
worksheet = workbook.add_worksheet('sheet1')
worksheet.write_row('A1', head)
row = 2
# 迭代数据并逐行写入
for i in range(0, len(data)):
worksheet.write_row('A' + str(row), data[i])
row += 1
workbook.close()
pass
# 根据附件发送邮件附件
def send_email(table_name,file_path,receivers):
mail_host = 'smtp.163.com'
mail_username = '18612483486@163.com'
mail_password = 'xxxx'
mail_sender = '18612483486@163.com'
mail_receivers = receivers ##收件人邮箱地址
message = MIMEMultipart()
message['Subject'] = 'download '+table_name
message['From'] = mail_sender
message['To'] = mail_receivers[0]
file_name = file_path
# 构造附件,传送当前目录下的 csv 文件
part = MIMEApplication(open(file_path, 'rb').read())
part.add_header('Content-Disposition', 'attachment', filename=file_name)
message.attach(part)
try:
smtpObj = smtplib.SMTP_SSL(mail_host + ':465')
smtpObj.login(mail_username, mail_password)
smtpObj.sendmail(
mail_sender, mail_receivers, message.as_string())
smtpObj.quit()
print('mail send success')
except smtplib.SMTPException as e:
print('mail send error', e)
table_name = 'dim_merchandise_sku_info' #表名称
head_is_comment=True #表头是否为备注
receivers= ['1111@111.com'] #接受者
file_name = table_name + '.xlsx'
download_file(table_name, file_name,head_is_comment)
send_email(table_name,file_name,receivers)
运行结果如下:
可以将上述代码做为一个模板,作为资源上传到dataworks,这样就可以通过pyodps调度任务,基于它做数据推送。
tip: 这里文件推送虽然可以避免条数的限制,但是邮件推送,邮件文件大小是有限制的。所以特别大的数据可能还是推送不了,可能需要中间转一下,通过oss,进行存储,之后返回文件的url。