import pandas as pd
import numpy as np
# 读取文件
data = pd.read_csv("北向.csv",index_col=1)
data
# 查看基本信息
data.info
# 去除缺失值所在的行
data = data.dropna()
data
# 删除index这列del data["index"]
data
# 删除重复行
data.duplicated()
data.drop_duplicates(inplace =True)
data
# 将行索引进行升序排序
data = data.sort_index()
data
# 保存文件
data.to_csv("北向副1.csv")
import numpy as np
import pandas as pd
origin_data = pd.read_csv("FoodFacts.csv",usecols=["countries_en","additives_n"])
origin_data
# 去重
origin_data = origin_data.dropna()
origin_data
# 获取国家数据defget_country(data):
country =[]
data1 = data[~data.str.contains(",")]for country_name in data:if country_name in country:passelse:
country.append(country_name)return country
country_list = get_country(origin_data["countries_en"])# 对每一个国家的数据进行汇总
additives_n_data =[]for country in country_list:
data1 = origin_data[origin_data["countries_en"].str.contains(country,case=False)]# 求各个国家使用添加剂的总剂量# a = 0# for i in data1["additives_n"]:# a = a + i
a = data1["additives_n"].mean()
additives_n_data.append(a)# 创建新表保存数据
df = pd.DataFrame({"country": country_list,"num": additives_n_data}
df.to_csv("各过添加剂使用量")