import pandas
food_info = pandas.read_csv("food_info.csv")
print(type(food_info))#<class 'pandas.core.frame.DataFrame'>
print (food_info.dtypes)#每列数据的类型
取前后几行
first_rows = food_info.head()
print (first_rows)#前五行0 1 2 3 4
print(food_info.head(3))#前三行
print(food_info.tail(3))#末尾三行
print (food_info.columns)#所有列
print (food_info.shape)
通过索引取某一行,或某几行
#pandas uses zero-indexing
#Series object representing the row at index 0.
print (food_info.loc[0])#取得第0行
# Returns a DataFrame containing the rows at indexes 3, 4, 5, and 6.
food_info.loc[3:6]#注意,3,4,5,6都能取到
# Returns a DataFrame containing the rows at indexes 2, 5, and 10. Either of the following approaches will work.
# Method 1
two_five_ten = [2,5,10]
food_info.loc[two_five_ten]#取得2,5,10行
food_info.loc[[2,5,10]]#与上一样
通过列名取得特定的几列
columns = ["Zinc_(mg)", "Copper_(mg)"]
zinc_copper = food_info[columns]
print (zinc_copper)
把列名整理成列表
col_names = food_info.columns.tolist()
print (col_names)#把列名做成了list
gram_columns = []
#找到以g为结尾的列名,整理到gram_columns中
for c in col_names:
if c.endswith("(g)"):
gram_columns.append(c)
gram_df = food_info[gram_columns]
print(gram_df.head(3))
列与列之间的运算,创建新列
water_energy = food_info["Water_(g)"] * food_info["Energ_Kcal"]#元素逐个相乘
water_energy = food_info["Water_(g)"] * food_info["Energ_Kcal"]
iron_grams = food_info["Iron_(mg)"] / 1000
food_info["Iron_(g)"] = iron_grams#创造了新的一列,名字为Iron_(g),用iron_grams赋值
列中数据排序,是否改变原内容为可选项,升降序可选
#By default, pandas will sort the data by the column we specify in ascending order and return a new DataFrame
# Sorts the DataFrame in-place, rather than returning a new DataFrame.
food_info.sort_values("Sodium_(mg)", inplace=True)#直接在数据中更改为排好序的数据
print(food_info["Sodium_(mg)"])
#Sorts by descending order, rather than ascending.
food_info.sort_values("Sodium_(mg)", inplace=True, ascending=False)#降序
print (food_info["Sodium_(mg)"])