Useful command for python summary
Linear algebra
That is, we transform x⃗ by B and then transform that resulting vector by A much as we would with the nested function f(g(x⃗ )) .
A.dot(B).dot(a) / A.dot(B.dot(x))
def cosine(a,b): cos = np.dot(a,b)/(np.sqrt(np.dot(a,a)) * np.sqrt(np.dot(b,b)) ) return cos
a_norm = 1/np.sqrt(np.dot(a,a))*a
OLS Construction
u = x matrix
v = y matrix
N = U.shape[0]
X = np.column_stack((np.ones(N),U)) # Add a constant (column vector of ones)
B = la.inv(X.T.dot(X)).dot(X.T.dot(v)).round(2)
#Calculate the R Squared
v_hat = X.dot(B)
v_hat_centered = v_hat - v_hat.mean()
v_centered = v - v.mean()
r_squared = v_hat_centered.dot(v_hat_centered)/v_centered.dot(v_centered)
r_squared.round(2)
#sigma
n = X.shape[0]
p = X.shape[1]
sigma2 = e.T.dot(e)/(n-p)
cov_B = sigma2*la.inv(X.T.dot(X))
Eigenvector
#Draw out the relevant items.
items = anes[anes.columns[anes.columns.str.contains(‘V’)]]
#I am also going to rescale the variables just so they have the same mean and variance
#this makes the different thermometer metrics comparable.
items = items.apply(lambda x: (x-x.mean())/(x.std()),axis=0)
#Generate a correlation matrix
p = items.corr()
#Decompose
evals,evecs = la.eig§
variance_explained = evals/sum(evals)
print(‘Proportion of Variation Explained’)
for i,val in enumerate(variance_explained):
print(f’’’
Eigenvalue {i+1} accounts for {round(val*100,2)}% of the variance
‘’’)
#select first 6 eigenvalues
weights = evecs[:,[0,1,2,3,4,5]]
#values of the subsetted data
X = items.values
#reducing data by weighting the data with our chosen eigenvalues
reduced_data = X.dot(weights)
#Convert data pandas data frame
reduced_data = pd.DataFrame(reduced_data,columns=[f"comp_{i+1}" for i in range(6)])
#Bind onto original data
D = pd.concat([anes[[‘respondent_id’,‘party_id’]],reduced_data],axis=1).dropna()
#View the head of the data
D.head()
g = sns.pairplot(D[[‘comp_1’,‘comp_2’,‘comp_3’,‘comp_4’,‘comp_5’,‘comp_6’,‘party_id’]],hue=“party_id”,height=4)
f, axes = plt.subplots(1, 3,figsize=(15,5))
i= sns.boxplot(y=‘comp_1’, x=‘party_id’, data=D,ax=axes[0])
i= sns.boxplot(y=‘comp_2’, x=‘party_id’, data=D,ax=axes[1])
i= sns.boxplot(y=‘comp_3’, x=‘party_id’, data=D,ax=axes[2])