房租预测
载入库与数据读取
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
train_data = pd.read_csv('train_data.csv')
test_data = pd.read_csv('test_a.csv')
数据的预浏览
# 查看训练集以及测试集维度
print('train:',train_data.shape)
print('test:',test_data.shape)
# 查看两组变量的差别(差别为目标变量)
for var in train_data.columns:
if var not in test_data.columns:
print(var)
# 查看训练特征
print(train_data.columns)
# 查看每个变量的的情况
def get_null(df):
for var in df.columns:
if df[var].isnull().sum()>0:
print(var,'缺失值个数ÿ