pandas pandas具体用法相关内容(一)
#%% md
## 1、pandas加载文件
#%%
import pandas
food_info = pandas.read_csv("food_info.csv")
print(type(food_info))
#%%
food_info.dtypes
#%% md
## 2、pandas获取数据内容
#%%
first_rows = food_info.head() # 显示加载进来的数据
print(first_rows)
print("-------------------------------------")
print(food_info.head(3)) # 显示前3行的数据
print("**************************************")
print (food_info.columns) # 得到各列的列名字
print("...............................维度:")
print (food_info.shape) # 得到数据的维度
#%%
#pandas uses zero-indexing
#Series object representing the row at index 0.
print (food_info.loc[0]) # 获取第一行数据
# Series object representing the seventh row.
food_info.loc[6] # 获取第7行数据
# Will throw an error: "KeyError: 'the label [8620] is not in the [index]'"
#food_info.loc[8620]
#The object dtype is equivalent to a string in Python
#%% md
## 3、pandas数据类型种类
#%%
#object - For string values
#int - For integer values
#float - For float values
#datetime - For time values
#bool - For Boolean values
#print(food_info.dtypes)
#%%
# Returns a DataFrame containing the rows at indexes 3, 4, 5, and 6.
print(food_info.loc[3:6]) # 利用数据切片的方式获得所需的数据
print("-----------------------------")
# Returns a DataFrame containing the rows at indexes 2, 5, and 10. Either of the following approaches will work.
# Method 1
two_five_ten = [2,5,10]
print(food_info.loc[two_five_ten])
print("******************************")
# Method 2
food_info.loc[[2,5,10]]
#%% md
## 4、获取列数据
#%%
# Series object representing the "NDB_No" column.
ndb_col = food_info["NDB_No"] # 利用列名获取某列数据
print(ndb_col)
# Alternatively, you can access a column by passing in a string variable.
#col_name = "NDB_No"
#ndb_col = food_info[col_name]
#%%
columns = ["Zinc_(mg)", "Copper_(mg)"]
zinc_copper = food_info[columns] # 获取多列数据
print (zinc_copper)
#print zinc_copper
# Skipping the assignment.
#zinc_copper = food_info[["Zinc_(mg)", "Copper_(mg)"]]
#%%
#print(food_info.columns)
#print(food_info.head(2))
##########查找以“(g)”结尾的列名###########
col_names = food_info.columns.tolist() # 获取得到列名,并转换成列表的格式
print(col_names)
gram_columns = []
for c in col_names:
if c.endswith("(g)"):
gram_columns.append(c)
gram_df = food_info[gram_columns]
print(gram_df.head(3))
#%% md
#%%
# Series object representing the "NDB_No" column.
ndb_col = food_info["NDB_No"] # 利用列名获取某列数据
print(ndb_col)
# Alternatively, you can access a column by passing in a string variable.
#col_name = "NDB_No"
#ndb_col = food_info[col_name]
#%%
columns = ["Zinc_(mg)", "Copper_(mg)"]
zinc_copper = food_info[columns] # 获取多列数据
print (zinc_copper)
#print zinc_copper
# Skipping the assignment.
#zinc_copper = food_info[["Zinc_(mg)", "Copper_(mg)"]]
#%%
#print(food_info.columns)
#print(food_info.head(2))
##########查找以“(g)”结尾的列名###########
col_names = food_info.columns.tolist() # 获取得到列名,并转换成列表的格式
print(col_names)
gram_columns = []
for c in col_names:
if c.endswith("(g)"):
gram_columns.append(c)
gram_df = food_info[gram_columns]
print(gram_df.head(3))