os.path.join()(从哪里读数据,数据路径)
os.path.join()函数:连接两个或更多的路径名组件
- 如果各组件名首字母不包含’/’,则函数会自动加上
- 如果有一个组件是一个绝对路径,则在它之前的所有组件均会被舍弃 (第一个字母以”/”开头的参数开始拼接,之前的参数全部丢弃)
- 如果最后一个组件为空,则生成的路径以一个’/’分隔符结尾
import os
split_dir = os.path.join("..", "..", "data", "rmb_split")
train_dir = os.path.join(split_dir, "train")
valid_dir = os.path.join(split_dir, "valid")
print(split_dir)
print(train_dir)
print(valid_dir)
…\data\rmb_split
…\data\rmb_split\train
…\data\rmb_split\valid
Path1 = 'home'
Path2 = 'develop'
Path3 = '/code'
Path10 = Path1 + Path2 + Path3
Path20 = os.path.join(Path1,Path2,Path3)
print('Path10 = ',Path10)
print('Path20 = ',Path20)
Path10 = homedevelop/code
Path20 = /code
详细见:https://www.cnblogs.com/an-ning0920/p/10037790.html
os.walk()
- os.walk() 方法用于通过在目录树中游走输出在目录中的文件名,向上或者向下。
- os.walk() 方法是一个简单易用的文件、目录遍历器,可以帮助我们高效的处理文件、目录方面的事情。
os.walk(data_dir)
data_dir-- 是你所要遍历的目录的地址, 返回的是一个三元组(root,dirs,files)。
- root 所指的是当前正在遍历的这个文件夹的本身的地址
- dirs 是一个 list ,内容是该文件夹中所有的目录的名字(不包括子目录)
- files 同样是 list , 内容是该文件夹中所有的文件(不包括子目录)
该方法没有返回值。
import os
for root, dirs, files in os.walk(".", topdown=True):
# for name in files:
# print(os.path.join(root, name))
for name in dirs:
print(os.path.join(root, name))
.\data
.\data\RMB_data
.\data\RMB_data\1
.\data\RMB_data\100
import os
for root, dirs, files in os.walk(".", topdown=True):
for name in files:
print(os.path.join(root, name))
# for name in dirs:
# print(os.path.join(root, name))
.\split_dir.py
.\data\RMB_data\1\01B68AKT.jpg
.\data\RMB_data\100\013MNV9B.jpg
详细查看:https://www.runoob.com/python/os-walk.html
例子:
import os
import random
dataset_dir = os.path.join("..", "..", "rmbDataset", "data", "RMB_data")
#print(dataset_dir)
for root, dirs, files in os.walk(dataset_dir):
for name in dirs:
print(os.path.join(root, name))
# imgs = os.listdir(os.path.join(root, name))
# imgs = list(filter(lambda x: x.endswith('.jpg'), imgs))
# random.shuffle(imgs)
# img_count = len(imgs)
# print(imgs)
# print(img_count)
os.listdir()
概述:
- os.listdir() 方法用于返回指定的文件夹包含的文件或文件夹的名字的列表(不包括文件夹自己的名字)。
- 它不包括 . 和 … 即使它在文件夹中。
listdir() 方法的使用:
import os, sys
# 打开文件
path = "/var/www/html/"
dirs = os.listdir( path )
# 输出所有文件和文件夹
for file in dirs:
print file
如果目录下有中文目录,打印时遇到乱码解决方法:
cPath = os.getcwd()
# 如果目录名字为中文 需要转码处理
uPath = unicode(cPath,'utf-8')
for fileName in os.listdir(uPath) :
print fileName
详细见:https://www.runoob.com/python/os-listdir.html
例子:
import os
for root, dirs, files in os.walk(".", topdown=True):
for name in dirs:
print(os.path.join(root, name))
imgs = os.listdir(os.path.join(root, name))
print(imgs)
.\data
[‘RMB_data’]
.\data\RMB_data
[‘1’, ‘100’]
.\data\RMB_data\1
[‘01B68AKT.jpg’,‘0RPZ5WDL.jpg’]
.\data\RMB_data\100
[‘013MNV9B.jpg’, ‘0ZA9M8E2.jpg’]
import os
for root, dirs, files in os.walk(".", topdown=True):
for name in dirs:
#print(os.path.join(root, name))
imgs = os.listdir(os.path.join(root, name))
print(imgs)
[‘RMB_data’]
[‘1’, ‘100’]
[‘01B68AKT.jpg’, ‘0RPZ5WDL.jpg’]
[‘013MNV9B.jpg’, ‘0ZA9M8E2.jpg’]
import os
import random
for root, dirs, files in os.walk("..", topdown=True):
for name in dirs:
#print(os.path.join(root, name))
imgs = os.listdir(os.path.join(root, name))
#imgs = list(filter(lambda x: x.endswith('.jpg'), imgs))
random.shuffle(imgs)
img_count = len(imgs)
print(imgs)
print(img_count)
import os
import random
dataset_dir = os.path.join("..", "..", "rmbDataset", "data", "RMB_data")
#print(dataset_dir)
for root, dirs, files in os.walk(dataset_dir):
for name in dirs:
#print(os.path.join(root, name))
imgs = os.listdir(os.path.join(root, name))
#注意root,输出文件夹下面的文件名
imgs = list(filter(lambda x: x.endswith('.jpg'), imgs))
random.shuffle(imgs)
img_count = len(imgs)
print(imgs)
print(img_count)
import shutil
shutil – Utility functions for copying and archiving files and directory trees.
(用于复制和存档文件和目录树的实用功能。)
shutil.copy(文件1,文件2):拷贝文件和权限都进行copy
def copy(src, dst, *, follow_symlinks=True):
"""Copy data and mode bits ("cp src dst"). Return the file's destination.
The destination may be a directory.
If follow_symlinks is false, symlinks won't be followed. This
resembles GNU's "cp -P src dst".
If source and destination are the same file, a SameFileError will be
raised.
"""
if os.path.isdir(dst):
dst = os.path.join(dst, os.path.basename(src))
copyfile(src, dst, follow_symlinks=follow_symlinks)
copymode(src, dst, follow_symlinks=follow_symlinks)
return dst
详细:https://www.cnblogs.com/sui776265233/p/9225417.html
https://blog.51cto.com/12965094/2351292
1 import shutil
2
3 shutil.copy('f1.log', 'f2.log')
例子:
shutil.copy('D:/mynode/exercise/rmbDataset/11.txt','D:/mynode/exercise/rmbDataset/12.txt')
RMB_dataset:
import os
import random
import shutil
def makedir(dir):
if not os.path.exists(dir):
os.makedirs(dir)
if __name__=='__main__':
dataset_dir = os.path.join("..", "..", "rmbDataset", "data", "RMB_data")
#print(dataset_dir)
rmb_split=os.path.join("..", "..", "rmbDataset", "data", "RMB_split")
train_dir=os.path.join(rmb_split,"train")
valid_dir=os.path.join(rmb_split,"valid")
test_dir=os.path.join(rmb_split,"test")
train_poi=0.8
valid_poi=0.1
test_poi=0.1
for root, dirs, files in os.walk(dataset_dir):
for name in dirs:
#print(os.path.join(root, name))
imgs = os.listdir(os.path.join(root, name))
imgs = list(filter(lambda x: x.endswith('.jpg'), imgs))
random.shuffle(imgs)
img_count = len(imgs)
print(imgs)
# print(img_count)
train_num=int(img_count*test_poi)#80
valid_num=int(img_count*(train_poi+valid_poi))#90
#for i in range(img_count):
out_dir=os.path.join(train_dir, imgs[0])
in_dir=os.path.join(dataset_dir, name, imgs[0])
shutil.copy(in_dir,out_dir)
import os
import random
import shutil
def makedir(dir):
if not os.path.exists(dir):
os.makedirs(dir)
if __name__=='__main__':
dataset_dir = os.path.join("..", "..", "rmbDataset", "data", "RMB_data")
#print(dataset_dir)
rmb_split=os.path.join("..", "..", "rmbDataset", "data", "RMB_split")
train_dir=os.path.join(rmb_split,"train")
valid_dir=os.path.join(rmb_split,"valid")
test_dir=os.path.join(rmb_split,"test")
train_poi=0.8
valid_poi=0.1
test_poi=0.1
for root, dirs, files in os.walk(dataset_dir):
for name in dirs:
#print(os.path.join(root, name))
imgs = os.listdir(os.path.join(root, name))
imgs = list(filter(lambda x: x.endswith('.jpg'), imgs))
random.shuffle(imgs)
img_count = len(imgs)
#print(imgs)
# print(img_count)
train_num=int(img_count*train_poi)#80
valid_num=int(img_count*(train_poi+valid_poi))#90
for i in range(img_count):
if i<train_num:
out_dir=os.path.join(train_dir, name)
#out_dir=os.path.join(train_dir)
elif i<valid_num:
out_dir=os.path.join(valid_dir,name)
else:
out_dir=os.path.join(test_dir,name)
#新建地址
makedir(out_dir)
#复制每张图片
fout_dir=os.path.join(out_dir, imgs[i])
in_dir=os.path.join(dataset_dir, name, imgs[i])
shutil.copy(in_dir,fout_dir)
print('Class:{}, train:{}, valid:{}, test:{}'.format(name, train_num,
valid_num-train_num,
img_count-valid_num))
Class:1, train:80, valid:10, test:10
Class:100, train:80, valid:10, test:10