Python导入数据的方法:
1)导入外部数据
2)导入模块内部的数据集
1)导入外部数据——利用pandas
import pandas as pd
data1 = pd.read_excel('abc.xlsx')
data2 = pd.read_csv('efg.csv', encoding='gbk')
2)导入模块内部的数据集——以Statsmodel为例
# 先看看statsmodel里面有什么数据集
import statsmodels.api as sm
from pandas import DataFrame
dataDict = {'name': [], 'describe_short': []}
for modstr in dir(sm.datasets):
try:
mod = eval('sm.datasets.%s' % modstr)
dataDict['describe_short'].append(mod.DESCRSHORT)
dataDict['name'].append(modstr)
except Exception as e:
print('该模块无 DESCRSHORT 属性\n', e)
continue
dataDf = DataFrame({'describe_short': dataDict['describe_short']}, index=dataDict['name'])
print(dataDf)
# 导入cancer数据集
cancer_data = sm.datasets.cancer.load_pandas() # cancer对应cancer
df = cancer_data.data
df.head()