import numpy as np
import pandas as pd
df = pd.read_csv("F:/data/Drugs.csv",index_col = ['State','COUNTY']).sort_index()
result = pd.pivot_table(df,index=['State','COUNTY','SubstanceName'],columns='YYYY',values='DrugReports',fill_value='-').reset_index().rename_axis(columns={'YYYY':''})
result.head()
| State | COUNTY | SubstanceName | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 |
---|
0 | KY | ADAIR | Buprenorphine | - | 3 | 5 | 4 | 27 | 5 | 7 | 10 |
---|
1 | KY | ADAIR | Codeine | - | - | 1 | - | - | - | - | 1 |
---|
2 | KY | ADAIR | Fentanyl | - | - | 1 | - | - | - | - | - |
---|
3 | KY | ADAIR | Heroin | - | - | 1 | 2 | - | 1 | - | 2 |
---|
4 | KY | ADAIR | Hydrocodone | 6 | 9 | 10 | 10 | 9 | 7 | 11 | 3 |
---|
result_melted = result.melt(id_vars=result.columns[:3],value_vars=result.columns[-8:],var_name='YYYY',value_name='DrugReports').query('DrugReports != "-"')
result2 = result_melted.sort_values(by=['State','COUNTY','YYYY','SubstanceName']).reset_index().drop(columns='index')
cols = list(result2.columns)
a, b = cols.index('SubstanceName'), cols.index('YYYY')
cols[b], cols[a] = cols[a], cols[b]
result2 = result2[cols].astype({'DrugReports':'int','YYYY':'int'})
result2.head()
result2.head()
| State | COUNTY | YYYY | SubstanceName | DrugReports |
---|
0 | KY | ADAIR | 2010 | Hydrocodone | 6 |
---|
1 | KY | ADAIR | 2010 | Methadone | 1 |
---|
2 | KY | ADAIR | 2011 | Buprenorphine | 3 |
---|
3 | KY | ADAIR | 2011 | Hydrocodone | 9 |
---|
4 | KY | ADAIR | 2011 | Morphine | 2 |
---|
df_tidy = df.reset_index().sort_values(by=result2.columns[:4].tolist()).reset_index().drop(columns='index')
df_tidy.equals(result2)
False
df = pd.read_csv('F:data/Earthquake.csv')
df = df.sort_values(by=df.columns.tolist()[:3]).sort_index(axis=1).reset_index().drop(columns='index')
result = pd.pivot_table(df,index=['日期','时间','维度','经度'],columns='方向',values=['烈度','深度','距离'],fill_value='-').stack(level=0).rename_axis(index={None:'地震参数'})
result.head(6)
| | | | 方向 | east | north | north_east | north_west | south | south_east | south_west | west |
---|
日期 | 时间 | 维度 | 经度 | 地震参数 | | | | | | | | |
---|
1912.08.09 | 12:29:00 AM | 40.6 | 27.2 | 深度 | - | - | - | - | - | 16 | - | - |
---|
烈度 | - | - | - | - | - | 6.7 | - | - |
---|
距离 | - | - | - | - | - | 4.3 | - | - |
---|
1912.08.10 | 12:23:00 AM | 40.6 | 27.1 | 深度 | - | - | - | - | - | - | 15 | - |
---|
烈度 | - | - | - | - | - | - | 6 | - |
---|
距离 | - | - | - | - | - | - | 2 | - |
---|
df_result = result.unstack().stack(0)[(~(result.unstack().stack(0)=='-')).any(1)].reset_index()
df_result.columns.name=None
df_result = df_result.sort_index(axis=1).astype({'深度':'float64','烈度':'float64','距离':'float64'})
df_result.head()
| 方向 | 日期 | 时间 | 深度 | 烈度 | 经度 | 维度 | 距离 |
---|
0 | south_east | 1912.08.09 | 12:29:00 AM | 16.0 | 6.7 | 27.2 | 40.6 | 4.3 |
---|
1 | south_west | 1912.08.10 | 12:23:00 AM | 15.0 | 6.0 | 27.1 | 40.6 | 2.0 |
---|
2 | south_west | 1912.08.10 | 12:30:00 AM | 15.0 | 5.2 | 27.1 | 40.6 | 2.0 |
---|
3 | south_east | 1912.08.11 | 12:19:04 AM | 30.0 | 4.9 | 27.2 | 40.6 | 4.3 |
---|
4 | south_west | 1912.08.11 | 12:20:00 AM | 15.0 | 4.5 | 27.1 | 40.6 | 2.0 |
---|
df_result.astype({'深度':'float64','烈度':'float64','距离':'float64'},copy=False).dtypes
方向 object
日期 object
时间 object
深度 float64
烈度 float64
经度 float64
维度 float64
距离 float64
dtype: object
df.equals(df_result)
True