路径相关
#导入os
import os
#获取当前路径
os.getcwd()
#在当前新建文件夹
path = os.getcwd()+"\\新建文件夹"
os.makedirs(path)
#拼合路径
file_name = "新建文件夹"
path = os.path.join(".", file_name)
一段简单创建文件的程序
一种写法
import os
file_name = "新建文件夹"
def creat_cur_file(file_name):
path = os.path.join(".", file_name)
isExist = os.path.exists(path)
if not isExist:
os.makedirs(path)
return True
else:
print(path+" 目录已存在")
return False
creat_cur_file(file_name)
从指定网址下载数据
一种写法
import os
import tarfile
from six.moves import urllib
DOWNLOAD_ROOT = "https://raw.githubusercontent.com/ageron/handson-ml/master/"
HOUSING_URL = DOWNLOAD_ROOT + HOUSING_PATH + "/housing.tgz"
def fetch_housing_data(housing_url=HOUSING_URL, housing_path=HOUSING_PATH):
if not os.path.isdir(housing_path):
os.makedirs(housing_url)
tgz_path = os.path.join(housing_path, "housing.tgz")
urllib.request.urlretrieve(housing_url, tgz_path)#将URL表示的网络对象复制到本地文件
housing_tgz = tarfile.open(tgz_path)#打开下载好的文件
housing_tgz.extractall(path=hosing_path)#解压
housing_tgz.close()#关闭解压
fetch_housing_data()#调用函数下载数据
下载并显示下载进度
import os
import tarfile
from six.moves import urllib
import sys
DOWNLOAD_ROOT = "https://raw.githubusercontent.com/ageron/handson-ml/master/"
HOUSING_PATH = "datasets/housing"
HOUSING_URL = DOWNLOAD_ROOT + HOUSING_PATH + "/housing.tgz"
#保存图片程序
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "fundamentals"
def save_fig(fig_id, tight_layout=True):
path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
print("Saving figure", fig_id)
if tight_layout:
plt.tight_layout()
plt.savefig(path, format='png', dpi=300)
def fetch_housing_data(housing_url=HOUSING_URL, housing_path=HOUSING_PATH):
if not os.path.isdir(housing_path):
os.makedirs(housing_path)
tgz_path = os.path.join(housing_path, "housing.tgz")
#回调函数
def _progress(block_num, block_size, total_size):
filename = tgz_path.split('\\')[-1]
sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename, float(block_num * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
#_progress为urllib.request.urlretrieve( , , _progress)的第三个参数
urllib.request.urlretrieve(housing_url, tgz_path, _progress)
housing_tgz = tarfile.open(tgz_path)
housing_tgz.extractall(path=housing_path)
housing_tgz.close()
fetch_housing_data()
LoadData
import pandas as pd
import os
#从指定目录加载数据
def load_housing_data(housing_path = HOUSING_PATH):
csv_path = os.path.join(housing_path, "housing.csv")
return pd.read_csv(csv_path)
data = load_housing_data()
data.head()#快速查看数据
data.info()#数据信息速览