这里写自定义目录标题
欢迎使用Markdown编辑器
你好! 这是你第一次使用 Markdown编辑器 所展示的欢迎页。如果你想学习如何使用Markdown编辑器, 可以仔细阅读这篇文章,了解一下Markdown的基本语法知识。
代码片
去博客设置页面,选择一款你喜欢的代码片高亮样式,下面展示同样高亮的 代码片
.
// An highlighted block
for dataset, df_tmp in DF_UNION_MAPPING.items():
print("saving dataset {0}".format(dataset))
unique_keys = None
variable_types = None
dataset_token = dataset[2:]
if dataset.startswith('F_'):
unique_keys = DATASET_PARSER_TOOLS.DATASET_UNIQUE_KEYS_MAPPING[dataset_token]
variable_types = DatasetParserTools.adaptive_extend_ft_types(DATASET_PARSER_TOOLS.DATASET_FT_TYPES_MAPPING[dataset_token], list(df_tmp.columns))
else:
unique_keys = [DICT_TABLE_UNIQUE_ID_MAPPING[dataset_token]]
variable_types = ADDITIONAL_FT_TYPES_MAPPING[dataset_token]
print("### {0}'s shape is ###\n{1}".format(dataset, df_tmp.shape))
for key in unique_keys:
print("{0} <- count(distinct({1}))".format(df_tmp[key].nunique(), key))
unique_index = None
if len(unique_keys) > 1:
composite_index_name, composite_index = DatasetParserTools.build_composite_index(df_tmp, unique_keys)
print("{0} <- count(distinct({1}))".format(composite_index.nunique(), composite_index_name))
df_tmp[composite_index_name] = composite_index
unique_index = composite_index_name
else:
unique_index = unique_keys[0]
# print("unique index is {0}".format(unique_index))
df_tmp.drop_duplicates(subset = unique_index, keep = 'last', inplace = True)
# df_tmp.dtypes
# shuffle
df_tmp = df_tmp.sample(frac=1, random_state=139).reset_index(drop=True)
es = es.entity_from_dataframe(entity_id = dataset,
dataframe = df_tmp,
variable_types = variable_types,
index = unique_index
)