import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
dataset = pd.read_csv("HR_comma_sep.csv")
x = dataset.iloc[:,:-1].values ##Independent variable
y = dataset.iloc[:,9].values ##Dependent variable
##Encoding the categorical variables
le_x1 = LabelEncoder()
x[:,7] = le_x1.fit_transform(x[:,7])
le_x2 = LabelEncoder()
x[:,8] = le_x1.fit_transform(x[:,8])
ohe = OneHotEncoder(categorical_features = [7,8])
x = ohe.fit_transform(x).toarray()
##splitting the dataset in training and testing data
from sklearn.cross_validation import train_test_split
y = pd.factorize(dataset['left'].values)[0].reshape(-1, 1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)