0. 数据集随机划分
datamap = pd.read_csv(r"C:\Users\omenuser\Desktop\c2.csv")
X=datamap.iloc[:,1:]
Y=datamap.iloc[:,:1]
shuffle_indexes = np.random.permutation(len(X))
#按什么比例分割
test_ratio = 0.2
#测试集的大小
test_size = int(test_ratio * len(X))
#测试集的索引
test_indexes = shuffle_indexes[:test_size]
#训练集的索引
train_indexes = shuffle_indexes[test_size:]
x_test = X.loc[test_indexes]
x_train = X.loc[train_indexes]
y_test = Y.loc[test_indexes]
y_train = Y.loc[train_indexes]
1.csv
1.1 读取每行数据
import csv
with open(r"C:\Users\omenuser\Desktop\b.csv") as f:
reader = csv.reader(f)
for i in range(835):
head_row=next(reader)
1.2 读取每列数据
import csv
with open("D:\\test.csv") as f:
reader = csv.reader(f)
rows=[row for row in reader]
print(rows[0])
1.3 保存数据为csv文件
import pandas as pd
save_file = pd.DataFrame(data=DataMap)
save_file.to_csv('./datamap1/kulun.csv', index=False, encoding="utf-8")
1.4 Pandas 中 iloc 和 loc 方法的用法
iloc[:,:] 切片操作,[行,列],输出数据类型为dataframe
loc[] 通过行索引取行数据
import pandas as pd
filepath=r'C:\Users\omenuser\Desktop\b.csv'
rawdata = pd.read_csv(filepath)
lat=rawdata.iloc[:,:1] #取第一列数据
for i in range(len(lat)): #提取第一列中每行数据进行数据处理
latdata=float(lat.loc[i])
#数据处理...
2.txt
2.1 写入数据
desktop_path = r"C:\Users\omenuser\Desktop\a.txt"
file = open(desktop_path, 'w')
def write_data(lng):
file.write('{"lng":')
file.write(lng)
file.write('\n') #换行
3.文件夹
3.1 获取文件夹中文件列表
import os
def getFileList(dir, Filelist, ext=None):
"""
获取文件夹及其子文件夹中文件列表
输入 dir:文件夹根目录
输入 ext: 扩展名
返回: 文件路径列表
"""
newDir = dir
if os.path.isfile(dir):
if ext is None:
Filelist.append(dir)
else:
if ext in dir[-3:]:
Filelist.append(dir)
elif os.path.isdir(dir):
for s in os.listdir(dir):
newDir = os.path.join(dir, s)
getFileList(newDir, Filelist, ext)
return Filelist
3.2 批量修改文件后缀
#将.xyz后缀重命名为.txt
datalist = getFileList(DataMapPath, [], 'xyz')
print(len(datalist))
for data in datalist:
ext=os.path.splitext(data)
if ext[1] == '.xyz': # 文件名:ext[0]
new_name = ext[0] + '.txt' # 文件后缀:ext[1]
os.rename(data, new_name)