16.na_values:setcalar,str,list-like,ordict, default None#The default NaN recognized values are ['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', 'n/a', 'NA', '<NA>', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', ''].17.keep_default_na:boolean, default True#If keep_default_na is True, and na_values are specified, na_values is appended to the default NaN values used for parsing.#If keep_default_na is True, and na_values are not specified, only the default NaN values are used for parsing.#If keep_default_na is False, and na_values are specified, only the NaN values specified na_values are used for parsing.#If keep_default_na is False, and na_values are not specified, no strings will be parsed as NaN. 18.skip_blank_lines:boolean, default True#如果为真,则跳过空白行而不是解析为空值
19.parse_dates:boolean orlist of ints or names orlist of lists ordict, default False.#If True -> try parsing the index.#If [1, 2, 3] -> try parsing columns 1, 2, 3 each as a separate date column.#If [[1, 3]] -> combine columns 1 and 3 and parse as a single date column.#If {'foo': [1, 3]} -> parse columns 1, 3 as date and call result ‘foo’. A fast-path exists for iso8601-formatted dates.#由于csv文件中日期和时间被分为了两列,pd.read_csv命令读取文件时,需指定parse_dates = [ ['Date', 'Time'] ],#亦即将[ ['Date', 'Time'] ]两列的字符串先合并后解析方可。合并后的新列会以下划线'_'连接原列名命名,#本例中列名为'Date_Time'。解析得到的日期格式列会作为DataFrame的第一列,在index_col指定表格中的第几列作为Index时需要小心。#如本例中,指定参数index_col = 0,则此时会以新生曾的Date_Time列而不是IncidntNum作为Index。因此保险的方法是指定列名,如index_col = 'IncidntNum'。20.infer_datetime_format:boolean, default False#如果该参数为真,且parse_dates指定列时,可以提高处理速度21.keep_date_col:boolean, default False#解析日期后是否保留原始列22.date_parser:function, default None#转换字符串序列或者日期数组,默认使用第三方库dateutil.parser.parser,这个库可以解析所有人类可识别的日期
with pd.read_csv(r"D:\6_DataAnalysis\8_Datacleaning\##3Python_elements\wine_data.csv", sep=",",usecols=[0,1,2,3,4],nrows=20,chunksize=4)as reader:
reader
for chunk in reader:print(chunk)print("*"*30)
from io import BytesIO
data = b"word,length\n" b"Tr\xc3\xa4umen,7\n" b"Gr\xc3\xbc\xc3\x9fe,5"
data = data.decode("utf8").encode("latin-1")
dt16 = pd.read_csv(BytesIO(data), encoding="latin-1")
dt16
word
length
0
Träumen
7
1
Grüße
5
三、pd.to_csv()从python中导出数据
#参数如下
pd.to_csv(path_or_buf: A string path to the file to write or a fileobject. If a fileobject it must be opened with newline=''
sep : Field delimiter for the output file(default “,”)
na_rep: A string representation of a missing value (default ‘’)
float_format: Format string for floating point numbers
columns: Columns to write (default None)
header: Whether to write out the column names (default True)
index: whether to write row (index) names (default True)
index_label: Column label(s)for index column(s)if desired. If None(default),and header and index are True, then the index names are used.(A sequence should be given if the DataFrame uses MultiIndex).
mode : Python write mode, default ‘w’
encoding: a string representing the encoding to use if the contents are non-ASCII,for Python versions prior to 3
line_terminator: Character sequence denoting line end (default os.linesep)
quoting: Set quoting rules asin csv module (default csv.QUOTE_MINIMAL). Note that if you have set a float_format then floats are converted to strings and csv.QUOTE_NONNUMERIC will treat them as non-numeric
quotechar: Character used to quote fields (default ‘”’)
doublequote: Control quoting of quotechar in fields (default True)
escapechar: Character used to escape sep and quotechar when appropriate (default None)
chunksize: Number of rows to write at a time
date_format: Format string for datetime objects)