笔记整理，包括MySql查询接口和os的查询调用，还有文本处理等

最新推荐文章于 2024-05-20 14:31:07 发布

奋斗的沐沐

最新推荐文章于 2024-05-20 14:31:07 发布

阅读量2.2k

点赞数

分类专栏：摸鱼日常文章标签： mysql elasticsearch 数据库

本文链接：https://blog.csdn.net/qq_39474168/article/details/123373467

版权

摸鱼日常专栏收录该内容

13 篇文章 0 订阅

订阅专栏

eng = pymysql.connect(host='', user='root', password='mysql', database='')
eng_gul = pymysql.connect(host='', user='root', password='mysql', database='')
es_host_target = ""
es_target = Elasticsearch([es_host_target], http_auth=('name', 'pswd'), port=9200)
indexName = 'tp'

以下是一些接口自提

def find_two(index):
    sql_find_tow = "SELECT REGNO from add_intcls WHERE IMAGETYPE LIKE '" + index + ".%'  ORDER BY RAND() LIMIT 5000"
    res_finder = pd.read_sql(sql_find_tow, eng)
    return res_finder

def find_in_es(nums, indexName):
    bodys = {
        "query": {
            "term": {
                "regis_number.keyword": {
                    "value": nums
                }
            }
        },
        "size": 300,
        "_source": ["regis_number", "imageType_second"]
    }

    return es_target.search(body=bodys, index=indexName)

def find_the_two_index(tow_lv_dict, two_lv):
    res = tow_lv_dict.get(two_lv)
    if res is None:
        return 0   # fan
    else:
        return tow_lv_dict.get(two_lv)

# find the regno in es and find the diffirent two lv
def find_two_lv_in_es(tow_dict, reg):
    ls_es_res_index = []
    res = find_in_es(reg, indexName)
    es_res = res['hits']['hits']
    for i in range(len(es_res)):
        es_res_two_lv = es_res[i]['_source']['imageType_second']
        es_res_ind = find_the_two_index(tow_dict, es_res_two_lv)
        ls_es_res_index.append(es_res_ind)
    return ls_es_res_index

# ---- 以下处理是为了获取每个类别5000张

   # sql_two = 'SELECT ZLBM,ID FROM graphics_elements_two'
    # res_two = pd.read_sql(sql_two, eng_gul)
    #
    # res_two_ls = res_two.values.tolist()
    #
    # json_file = 'short.json'
    # with open(json_file, 'r', encoding='utf8')as fp:
    #     ls_json_data = json.load(fp)
    #
    # toal_ls = []
    # for ind_two, id in tqdm(res_two_ls):
    #     if ind_two not in ls_json_data:
    #         res = find_two(ind_two)
    #         res['two'] = id
    #         ls_res = res.values.tolist()
    #         toal_ls.extend(ls_res)
    #     else:
    #         print('{} is in json'.format(ind_two))
    #
    #
    # dd_csv = DataFrame(toal_ls)
    # csv_file = '144_5000.csv'
    # dd_csv.to_csv(csv_file, index=False)

 # #  --get the dict of two lv
    # sql_two = 'SELECT ZLBM,ID FROM graphics_elements_two'
    # res_two = pd.read_sql(sql_two, eng_gul)
    #
    # res_two_ls = res_two.values.tolist()
    # tow_dict = {}
    # for ind_two, id in res_two_ls:
    #     tow_dict[ind_two] = id
    #
    # #  ----  以下处理是为了整理标签数据到固定格式
    # csv_file = '144_5000.csv'
    # df_csv = pd.read_csv(csv_file)
    # ls = ['REGNO', 'index']
    # for i in range(144):
    #     ls.append(i)
    # df_csv.columns = ['REGNO', 'index']
    # df_res = df_csv.reindex(columns=ls, fill_value=0)
    # ls_res = df_res.values.tolist()
    #
    # for i in tqdm(range(len(ls_res))):
    #     index = ls_res[i][1]
    #     REGNO = ls_res[i][0]
    #     res_ls = find_two_lv_in_es(tow_dict, REGNO)
    #     for ind in res_ls:
    #         ls_res[i][1+ind] = 1
    #
    # dd_frame = DataFrame(ls_res)
    # dd_frame.columns = ls
    # csv_file_s = '144_group_5000.csv'
    # dd_frame.to_csv(csv_file_s)

 # -- 处理csv文件  要求格式：没有表头，没有index， 类别的地方需要用空格代替
    csv_file = 'S_144_group_5000.csv'
    txt_file = 'S_144_group_5000.txt'
    df = pd.read_csv(csv_file)
    df_ls = df.values.tolist()
    print('start------------')
    with open(txt_file, 'w', encoding='utf-8') as f:
        for i in tqdm(range(len(df_ls))):
            img_d = df_ls[i][0] + '.jpg'
            f.write(img_d)
            f.write('\t')
            index_inf = list(df.iloc[i][1:])
            index_inf_str = ','.join('%s' %id for id in index_inf)   # - 这种方法是遍历一遍列表，然后进行字符串操作
            # index_inf_str = str(index_inf)
            # index_inf_str = index_inf_str.split('[')[1].split(']')[0]
            f.write(index_inf_str)
            f.write('\n')    # -- 回车
            # break
    print('finished------------')

奋斗的沐沐

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
笔记整理，包括MySql查询接口和os的查询调用，还有文本处理等

eng = pymysql.connect(host='', user='root', password='mysql', database='')eng_gul = pymysql.connect(host='', user='root', password='mysql', database='')es_host_target = ""es_target = Elasticsearch([es_host_target], http_auth=('name', 'pswd'), port=9200)
复制链接

扫一扫