import numpy as np
import pandas as pd
import os
project_path = os.path.dirname(os.path.abspath(__file__)) # 获取当前文件路径的上一级目录
# 有表头的时候,跳过行 设置头部,获取行范围
x = pd.read_csv(project_path + '/test.csv', encoding='gbk', header=0, skiprows=0, nrows=4)
print(x)
# 没表头的时候,当列索引不存在时,默认从0开始索引
data = pd.read_csv(project_path+'/test.csv', encoding='gbk', header=None)
print(data)
# 没表头的时候, 设置列索引
data = pd.read_csv(project_path+'/test.csv', encoding='gbk', header=None, names=['year', '丰田', '通用', '雪佛兰', '红旗'])
print(data)
x = pd.read_csv(project_path + '/test.csv', encoding='gbk', header=0, sep='|', skiprows=range(1, 4))
print(x)
x = pd.read_csv(project_path + '/test.csv', encoding='gbk', sep='|', skiprows=[1, 2, 3, 4, 5, 6, 7, 8, 9])
print(x)
# 将一(多)列的元素作为行(多层次)索引
x = pd.read_csv(project_path + '/test.csv', encoding='gbk', header=None, names=['A', 'B', 'C', 'D', 'E'], index_col='A')
print(x)
x = pd.read_csv(project_path+'/test.csv', encoding='gbk', header=None, names=['A','B','C','D', 'E'], index_col=['A', 'C'])
print(x)
# 标签
df = pd.read_csv(project_path + "/test.csv",encoding='gbk')
data = np.array(df.loc[:, :])
labels = list(df.columns.values)
print(labels)
# 一般NULL nan 空格 等自动转换为NaN
x = pd.read_csv('data3.csv', na_values=[])
# 将某个元素值设置为NaN
x = pd.read_csv('data3.csv', na_values=['Nan'])
# 在对应列上设置元素为NaN
setNaN = {'C':['Nan'],'D':['b','c']}
x = pd.read_csv("data3.csv", na_values=setNaN)
# 保存数据到csv文件
x.to_csv('data3out.csv')
# 保存数据到csv文件,设置NaN的表示,去掉行索引,去掉列索引(header)
x.to_csv('data3out.csv',index=False,na_rep='NaN',header=False)
x = pd.read_csv("data3out.csv",names=['W','X','Y','Z'])
# 读取数据
x = pd.read_table('data4.txt', sep='\s+') # sep:分隔的正则表达式
# 使用numpy读取txt
x = np.loadtxt('data5.txt', delimiter='\t') # 分隔符
喜欢 (0)or分享 (0)