python_对异常值进行处理_丢弃_转化
import pandas as pd
houses = pd.DataFrame()
houses['Price'] = [534433, 392333, 293222, 4322032]
houses['Bathrooms'] = [2, 3.5, 2, 116]
houses['Square_Feet'] = [1500, 2500, 1500, 48000]
houses[houses['Bathrooms'] < 20]
Price Bathrooms Square_Feet
0 534433 2.0 1500
1 392333 3.5 2500
2 293222 2.0 1500
import numpy as np
houses["Outlier"] = np.where(houses["Bathrooms"] < 20, 0, 1)
houses
Price Bathrooms Square_Feet Outlier
0 534433 2.0 1500 0
1 392333 3.5 2500 0
2 293222 2.0 1500 0
3 4322032 116.0 48000 1
houses["Log_Of_Square_Feet"] = [np.log(x) for x in houses["Square_Feet"]]
houses
Price Bathrooms Square_Feet Outlier Log_Of_Square_Feet
0 534433 2.0 1500 0 7.313220
1 392333 3.5 2500 0 7.824046
2 293222 2.0 1500 0 7.313220
3 4322032 116.0 48000 1 10.778956