def get_conn():
try:
conn = pymysql.connect(
host = "***",
port = ***,
user = "***",
password = "***",
db = "***")
except Exception as e:
logger.warning(e)
finally:
return conn
def get_data_from_db(filename):
conn = get_conn()
sql_v = """
select A,B,C
from tablename
"""
conn = get_conn()
cur = conn.cursor()
cur.execute(sql_v)
result = cur.fetchall()
#print(result[0])
fp = open(filename, "wb+")
count = 0
for line in result:
line = '\t'.join([ str(val) for val in line]) + "\n" #格式:A\tB\tC\n
#print (line)
fp.write(line.encode('utf8') ) #以utf8写入
count += 1
fp.close()
cur.close()
conn.close()
print("写入完成,共写入%d条数据……" % count)
def process(filename):
file = open(filename, "r", encoding='utf8')
while 1:
line = file.readline()
print(line)
if not line:
break
pass #process
if __name__ =='__main__':
filename = './understand/0.txt'
get_data_from_db(filename)
process(filename)
当时编码问题搞了会儿。以后就按这种写吧。
注意:
含有中文,往文件写的时候:
fp = open(filename, “wb+”)
fp.write(line.encode(‘utf8’) )编码了一下。
读的时候:
with open(filename, "r", encoding='utf8') as f:
...
python3的系统编码为utf-8
有时间看下这个吧 https://blog.csdn.net/weixin_29343807/article/details/112240197
新增注意
对于大文件的处理,尤其比如GB级别的日志,遍历操作不要用readlines(),readlines()一次性读入文件中所有行,存储到一个list返回,很耗内存,建议用迭代器。
1、不建议使用下面的遍历方式:
不建议用!
for line in open(filename).readlines():
process(line)
2、建议这样用:
使用文件迭代器遍历文件:
with open(filename) as f:
for line in f:
process(f)
或者流式输入: