### ---1--- numpy 初步例子
# coding: utf-8
# # Numpy Introduction
# ## numpy arrays
import numpy as np;
arr = np.array([1,3,4,5,6]);
print(arr); print(arr.shape); print(arr.dtype);
#arr = np.array([1,'st','er',3]);
#print(arr.dtype);
print(np.sum(arr));
# ### Creating arrays
arr = np.array([[1,2,3],[2,4,6],[8,8,8]]);
print(arr.shape);
arr = np.zeros((2,4)); arr = np.ones((2,4));
arr = np.identity(3); arr = np.random.randn(3,4);
print(arr);
from io import BytesIO;
b = BytesIO(b"2,23,33\n32,42,63.4\n35,77,12");
arr = np.genfromtxt(b, delimiter=",");
print(arr);
print(arr[1]);
arr = np.arange(12).reshape(2,2,3);
print(arr);
arr = np.arange(10);
print(arr[5:]); print(arr[5:8]); print(arr[:-5]);
arr = np.arange(12).reshape(2,2,3);
print(arr[1:2]);
arr = np.arange(27).reshape(3,3,3);
print(arr[:,:,2]);
print(arr[...,2]);
arr = np.arange(9).reshape(3,3);
print(arr[[0,1,2],[1,0,0]]);
### ---2--- numpy例子继续
cities = np.array(["delhi","banglaore","mumbai","chennai","bhopal"]);
city_data = np.random.randn(5,3);
print(city_data);
print(city_data[cities =="delhi"]);
print(city_data[city_data >0]);
city_data[city_data >0] = 0;
print(city_data);
arr = np.arange(15).reshape(3,5);
print(arr + 5); print(arr * 2);
arr1 = np.arange(15).reshape(5,3);
arr2 = np.arange(5).reshape(5,1);
print( arr2 + arr1 );
arr1 = np.random.randn(5,3);
print(arr1);
print(np.modf(arr1));
### ---3--- numpy 解线性方程组
A = np.array([[1,2,3],[4,5,6],[7,8,9]]);
B = np.array([[9,8,7],[6,5,4],[1,2,3]]);
print(A.dot(B));
print("\n\n");
A = np.arange(15).reshape(3,5);
print(A.T);
np.linalg.svd(A);
a = np.array([[7,5,-3], [3,-5,2],[5,3,-7]]);
b = np.array([16,-8,0]);
x = np.linalg.solve(a, b);
print(x);
np.allclose(np.dot(a, x), b);
### ---4--- pandas
import pandas as pd;
d = [{'city':'Delhi',"data":1000},
{'city':'Banglaore',"data":2000},
{'city':'Mumbai',"data":1000}];
df = pd.DataFrame(d);
# print(df);
city_data = pd.read_csv(filepath_or_buffer='outVec-2021-12-07-0900.csv');
# print(city_data);
print(city_data.head(n=5));
# print(city_data.tail());
series_es = city_data;
#print(series_es[1:10:2]);
#print(series_es[:7]);
#print(series_es[:-2003]);
#print(city_data[:7]);
#print(city_data.iloc[:5,:4]);
print(city_data.iloc[:,:4]);
print( city_data[city_data['\t\t\t\t0'] > 3] );
print( city_data[city_data['\t\t\t\t0'] > 3][city_data.columns[pd.Series(city_data.columns).str.endswith('1')]] );
#print( city_data[city_data['\t\t\t\t0'] > 3][city_data.columns[pd.Series(city_data.columns).str.endswith('0')]] );
#
#print(city_data.columns);
#print(pd.Series(city_data.columns).str.endswith('1'));
### ---5--- scikit-learn
from sklearn import datasets;
diabetes = datasets.load_diabetes();
X = diabetes.data[:10];
y = diabetes.target;
print(X);
print(y);
feature_names=['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6'];
### ---6--- scikit-learn
# ## Scikit example regression
from sklearn import datasets;
from sklearn.linear_model import Lasso;
from sklearn import linear_model, datasets;
from sklearn.model_selection import GridSearchCV;
diabetes = datasets.load_diabetes();
X_train = diabetes.data[:310];
y_train = diabetes.target[:310];
X_test = diabetes.data[310:];
y_test = diabetes.target[310:];
lasso = Lasso(random_state=0);
alphas = np.logspace(-4, -0.5, 30);
scores = list();
scores_std = list();
estimator = GridSearchCV(lasso, param_grid = dict(alpha=alphas));
estimator.fit(X_train, y_train);
print(estimator.best_score_);
print(estimator.best_estimator_);
print( estimator.predict(X_test) );
print(y_test);
>>> import numpy as np;
>>> np.logspace(-6, 6, 13)
array([1.e-06, 1.e-05, 1.e-04, 1.e-03, 1.e-02, 1.e-01, 1.e+00, 1.e+01,
1.e+02, 1.e+03, 1.e+04, 1.e+05, 1.e+06])