参见文章
https://segmentfault.com/a/1190000008318279
0.问题描述
>>> from backend.dataflow.tasks import *
>>> from backend.dataflow.tasks import __get_repository
>>> repository = __get_repository()
INFO tasks 1702 当前使用的ETL资料库类型是:ODI -> ODI
>>> sql = interface_status_sql.get_sql(repository)
>>> etl_connect = get_repository_connect(repository)
>>> db_records = try_etl_connect_query(etl_connect, sql)
>>> db_records = [r for r in db_records]
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 1, in <listcomp>
File "/root/.pyenv/versions/data_assets/lib/python3.6/site-packages/records.py", line 122, in __iter__
yield next(self)
File "/root/.pyenv/versions/data_assets/lib/python3.6/site-packages/records.py", line 132, in __next__
nextrow = next(self._rows)
File "/root/.pyenv/versions/data_assets/lib/python3.6/site-packages/records.py", line 278, in <genexpr>
row_gen = (Record(cursor.keys(), row) for row in cursor)
File "/root/.pyenv/versions/data_assets/lib/python3.6/site-packages/sqlalchemy/engine/result.py", line 946, in __iter__
row = self.fetchone()
File "/root/.pyenv/versions/data_assets/lib/python3.6/site-packages/sqlalchemy/engine/result.py", line 1276, in fetchone
e, None, None, self.cursor, self.context
File "/root/.pyenv/versions/data_assets/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1468, in _handle_dbapi_exception
util.reraise(*exc_info)
File "/root/.pyenv/versions/data_assets/lib/python3.6/site-packages/sqlalchemy/util/compat.py", line 154, in reraise
raise value
File "/root/.pyenv/versions/data_assets/lib/python3.6/site-packages/sqlalchemy/engine/result.py", line 1268, in fetchone
row = self._fetchone_impl()
File "/root/.pyenv/versions/data_assets/lib/python3.6/site-packages/sqlalchemy/engine/result.py", line 1148, in _fetchone_impl
return self.cursor.fetchone()
File "/root/.pyenv/versions/data_assets/lib/python3.6/site-packages/sqlalchemy/dialects/oracle/cx_oracle.py", line 945, in <lambda>
outconverter=lambda value: value.read(),
UnicodeDecodeError: 'gbk' codec can't decode byte 0xaa in position 1293: illegal multibyte sequence
查看代码,发现db_records 是一个迭代器,到编码出错的记录时,直接报错了,而未进入后一个循环,在for循环里面try抓不到错误
for record in db_records:
try:
time_dict[str(record.trans_id)] = record.step_beg
if record.step_status == 1:
err_ids.append(record.trans_id)
msg_dict[str(record.trans_id)] = record.error_message # 报错信息
else:
ok_ids.append(record.trans_id)
record_dict[str(record.trans_id)] = record
except: # 无法抓到UnicodeDecodeError,是因为db_records获取那个迭代值的时候就出错了
pass
try: # 临时的解决方案,在循环之前先跑一次,这样保证没出错的结果还可以使用
db_records_ = [r for r in db_records]
except:
pass
1.使用sqlalchemy的情况
from sqlalchemy import create_engine
engine = create_engine('oracle://demo:123456@192.168.1.202/TEST')
conn = engine.connect()
sql = 'select t.type from TS t'
result = conn.execute(sql)
for row in result:
print(row['type'])
在create_engine函数中追加了encoding参数,将其更改为:
engine = create_engine('oracle://demo:123456@192.168.1.202/TEST',encoding="UTF-8")
而另外1种可用的方式直接在连接路径中指定编码,类似如下:
engine = create_engine('oracle://demo:123456@192.168.1.202/TEST?charset=utf-8')
2.使用records的情况
import records
database_url = "oracle+cx_oracle://{}:{}@{}:{}/?service_name={}".format(
username, password, host, port, db_name
)
# 增加一个参数encoding="UTF-8"即可
connect = records.Database(database_url, encoding="UTF-8")