dataworks 下载表数据

目录

邮件推送csv格式文件

邮件推送xlsx文件


背景:dataworks 下载只能下载10000条数据,如果表大小超出大小,需要limit 几次,或者导出到数据库,再从数据库中进行下载。 pyodps 可以通过python代码的方式操作表,这样就可以将表数据下载到文件,之后将文件通过邮件的方式发送给自己(这里还可以用来所pyodps数据推送,集成在dataworks上)

邮件推送csv格式文件

from odps import ODPS
import csv
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
import smtplib
#将表查询数据写入到当前文件中
fileName = 'dim_merchandise_sku_info.csv' 

o=ODPS('access_id','access_key','project_name',endpoint='http://service.cn-hangzhou.maxcompute.aliyun.com/api') # 对应odps配置
tableSchema=o.get_table("dim_merchandise_sku_info").schema
head=list(map(lambda x: x.comment, tableSchema.columns)) #用表注释作为csv的head
data=[]
reader=o.execute_sql(' select  * from dim_merchandise_sku_info ').open_reader(tunnel=True, limit=False)
for record in reader:
    tmp_value=[]
    for name in tableSchema.names:
        tmp_value.append(record[name])
    data.append(tmp_value)
with open(fileName,"w+",encoding="utf-8",newline='') as f:
    csvf = csv.writer(f)
    csvf.writerow(head)
    csvf.writerows(data)

## 发送邮件
mail_host = 'smtp.163.com'
mail_username = '18612483486@163.com'
mail_password = '1111'
mail_sender = '18612483486@163.com'
mail_receivers = ['xxxxxx']  ##收件人邮箱地址
mail_content=""        ##邮件内容
message = MIMEMultipart()

message['Subject'] = 'mail test'
message['From'] = mail_sender
message['To'] = mail_receivers[0]
file_name='aa.csv'
        # 构造附件,传送当前目录下的 csv 文件
part = MIMEApplication(open(fileName, 'rb').read())
part.add_header('Content-Disposition', 'attachment', filename=file_name)
message.attach(part)
try:
           smtpObj = smtplib.SMTP_SSL(mail_host+':465')
           smtpObj.login(mail_username,mail_password)
           smtpObj.sendmail(
               mail_sender,mail_receivers,message.as_string())
           smtpObj.quit()
           print('mail send success')
except smtplib.SMTPException as e:
           print('mail send error',e)

邮件推送xlsx文件

 xlsx 推送和csv有点区别,需要引入第三方包xlsxwriter, 需要有独享资源组,操作流程如下

依赖第三方包

import smtplib
from email.mime.application import MIMEApplication
from email.mime.multipart import MIMEMultipart
import openpyxl as op
from odps import ODPS
import xlsxwriter  # 导入模块
import re


# 从maxcompute 下载数据
def download_file(table_name,file_name,head_is_comment):
    o=ODPS('access_id','access_key','project_name',endpoint='http://service.cn-hangzhou.maxcompute.aliyun.com/api') # 对应odps配置
    tableSchema = o.get_table(table_name).schema
    head = list(map(lambda x: x.comment, tableSchema.columns)) if  head_is_comment else tableSchema.names
    data = []
    reader = o.execute_sql(' select  * from project_name.'+table_name).open_reader(tunnel=True, limit=False)
    for record in reader:
        tmp_value = []

        for name in tableSchema.names:
            if isinstance(record[name],str ):
                ILLEGAL_CHARACTERS_RE = re.compile(r'[\000-\010]|[\013-\014]|[\016-\037]')
                tmp_value.append(ILLEGAL_CHARACTERS_RE.sub(r'', record[name]))
            else:
                tmp_value.append(record[name])
        data.append(tmp_value)
    workbook = xlsxwriter.Workbook(file_name)  # 新建excel表
    worksheet = workbook.add_worksheet('sheet1')
    worksheet.write_row('A1', head)
    row = 2
    # 迭代数据并逐行写入
    for i in range(0, len(data)):
        worksheet.write_row('A' + str(row), data[i])
        row += 1
    workbook.close()
    pass


# 根据附件发送邮件附件
def send_email(table_name,file_path,receivers):
    mail_host = 'smtp.163.com'
    mail_username = '18612483486@163.com'
    mail_password = 'xxxx'
    mail_sender = '18612483486@163.com'
    mail_receivers = receivers ##收件人邮箱地址
    message = MIMEMultipart()

    message['Subject'] = 'download '+table_name
    message['From'] = mail_sender
    message['To'] = mail_receivers[0]
    file_name = file_path
    # 构造附件,传送当前目录下的 csv 文件
    part = MIMEApplication(open(file_path, 'rb').read())
    part.add_header('Content-Disposition', 'attachment', filename=file_name)
    message.attach(part)
    try:
        smtpObj = smtplib.SMTP_SSL(mail_host + ':465')
        smtpObj.login(mail_username, mail_password)
        smtpObj.sendmail(
            mail_sender, mail_receivers, message.as_string())
        smtpObj.quit()
        print('mail send success')
    except smtplib.SMTPException as e:
        print('mail send error', e)

table_name = 'dim_merchandise_sku_info'   #表名称
head_is_comment=True  #表头是否为备注
receivers= ['1111@111.com']   #接受者
file_name = table_name + '.xlsx'
download_file(table_name, file_name,head_is_comment) 
send_email(table_name,file_name,receivers)

运行结果如下:

可以将上述代码做为一个模板,作为资源上传到dataworks,这样就可以通过pyodps调度任务,基于它做数据推送。

tip: 这里文件推送虽然可以避免条数的限制,但是邮件推送,邮件文件大小是有限制的。所以特别大的数据可能还是推送不了,可能需要中间转一下,通过oss,进行存储,之后返回文件的url。

  • 2
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 6
    评论
评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值