# ===================== 逐块读取文本文件 ===========================
# pd.read_csv里的参数
nrows = 10 # 只读取一定行数的数据
chunksize = 4 # 分块读取,返回一个可迭代对象TextFileReader
iterator = True # 返回一个可迭代对象,使用df.get_chunk(10)查看数据
# ===================== 处理分隔符格式 ========================
import csv
def csv_read_file(file_path, delimiter=',', header=True, lineterminator='\r\n', quotechar='"', skipinitialspace=False):
"""csv模块读取形如
"a","b","c"
"1","2","3"
"1","2","3"
等带特殊符号的不规范数据
:param file_path: 文件路径
:param delimiter: 分隔符,默认为逗号
:param header: 文件中是否带标题行,默认True
:param lineterminator: 用于写操作的行结束符,默认为'\r\n'
:param quotechar: 用于带有特殊字符(如分隔符)的字段的引用符号,默认为'"'
:param skipinitialspace: 忽略分隔符后面的空白符,默认为False
:return df: 返回一个DataFrame
"""
with open(file_path) as f:
lines = list(csv.reader(f, delimiter=delimiter, lineterminator=lineterminator, quotechar=quotechar, skipinitialspace=skipinitialspace))
if header:
header, values = lines[0],lines[1:]
data_cidt = {k:v for k,v in zip(header, zip(*values))}
else:
data_cidt = {index:v for index,v in enumerate(zip(*lines))}
df_result = pd.DataFrame(data_cidt)
f.close()
return df_result
df = csv_read_file(r'C:\Users\86188\Desktop\python/ex1', header=True)