笔记整理,包括MySql查询接口和os的查询调用,还有文本处理等

eng = pymysql.connect(host='', user='root', password='mysql', database='')
eng_gul = pymysql.connect(host='', user='root', password='mysql', database='')
es_host_target = ""
es_target = Elasticsearch([es_host_target], http_auth=('name', 'pswd'), port=9200)
indexName = 'tp'

以下是一些接口 自提

def find_two(index):
    sql_find_tow = "SELECT REGNO from add_intcls WHERE IMAGETYPE LIKE '" + index + ".%'  ORDER BY RAND() LIMIT 5000"
    res_finder = pd.read_sql(sql_find_tow, eng)
    return res_finder

def find_in_es(nums, indexName):
    bodys = {
        "query": {
            "term": {
                "regis_number.keyword": {
                    "value": nums
                }
            }
        },
        "size": 300,
        "_source": ["regis_number", "imageType_second"]
    }

    return es_target.search(body=bodys, index=indexName)

def find_the_two_index(tow_lv_dict, two_lv):
    res = tow_lv_dict.get(two_lv)
    if res is None:
        return 0   # fan
    else:
        return tow_lv_dict.get(two_lv)

# find the regno in es and find the diffirent two lv
def find_two_lv_in_es(tow_dict, reg):
    ls_es_res_index = []
    res = find_in_es(reg, indexName)
    es_res = res['hits']['hits']
    for i in range(len(es_res)):
        es_res_two_lv = es_res[i]['_source']['imageType_second']
        es_res_ind = find_the_two_index(tow_dict, es_res_two_lv)
        ls_es_res_index.append(es_res_ind)
    return ls_es_res_index

# ---- 以下处理是为了获取每个类别5000张

   # sql_two = 'SELECT ZLBM,ID FROM graphics_elements_two'
    # res_two = pd.read_sql(sql_two, eng_gul)
    #
    # res_two_ls = res_two.values.tolist()
    #
    # json_file = 'short.json'
    # with open(json_file, 'r', encoding='utf8')as fp:
    #     ls_json_data = json.load(fp)
    #
    # toal_ls = []
    # for ind_two, id in tqdm(res_two_ls):
    #     if ind_two not in ls_json_data:
    #         res = find_two(ind_two)
    #         res['two'] = id
    #         ls_res = res.values.tolist()
    #         toal_ls.extend(ls_res)
    #     else:
    #         print('{} is in json'.format(ind_two))
    #
    #
    # dd_csv = DataFrame(toal_ls)
    # csv_file = '144_5000.csv'
    # dd_csv.to_csv(csv_file, index=False)
 # #  --get the dict of two lv
    # sql_two = 'SELECT ZLBM,ID FROM graphics_elements_two'
    # res_two = pd.read_sql(sql_two, eng_gul)
    #
    # res_two_ls = res_two.values.tolist()
    # tow_dict = {}
    # for ind_two, id in res_two_ls:
    #     tow_dict[ind_two] = id
    #
    # #  ----  以下处理是为了整理标签数据到固定格式
    # csv_file = '144_5000.csv'
    # df_csv = pd.read_csv(csv_file)
    # ls = ['REGNO', 'index']
    # for i in range(144):
    #     ls.append(i)
    # df_csv.columns = ['REGNO', 'index']
    # df_res = df_csv.reindex(columns=ls, fill_value=0)
    # ls_res = df_res.values.tolist()
    #
    # for i in tqdm(range(len(ls_res))):
    #     index = ls_res[i][1]
    #     REGNO = ls_res[i][0]
    #     res_ls = find_two_lv_in_es(tow_dict, REGNO)
    #     for ind in res_ls:
    #         ls_res[i][1+ind] = 1
    #
    # dd_frame = DataFrame(ls_res)
    # dd_frame.columns = ls
    # csv_file_s = '144_group_5000.csv'
    # dd_frame.to_csv(csv_file_s)
 # -- 处理csv文件  要求格式:没有表头,没有index, 类别的地方需要用空格代替
    csv_file = 'S_144_group_5000.csv'
    txt_file = 'S_144_group_5000.txt'
    df = pd.read_csv(csv_file)
    df_ls = df.values.tolist()
    print('start------------')
    with open(txt_file, 'w', encoding='utf-8') as f:
        for i in tqdm(range(len(df_ls))):
            img_d = df_ls[i][0] + '.jpg'
            f.write(img_d)
            f.write('\t')
            index_inf = list(df.iloc[i][1:])
            index_inf_str = ','.join('%s' %id for id in index_inf)   # - 这种方法是遍历一遍列表,然后进行字符串操作
            # index_inf_str = str(index_inf)
            # index_inf_str = index_inf_str.split('[')[1].split(']')[0]
            f.write(index_inf_str)
            f.write('\n')    # -- 回车
            # break
    print('finished------------')
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值