redis导入列表文件,写入列表文件

# -*- coding:utf-8 -*-
import redis
import json
import os
import sys
import jieba
import time

# 设定项目的root路径, 方便后续相关代码文件的导入
root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(root_path)

pool = redis.ConnectionPool(host='47.xxx.xxx.xx', port=xxxx, decode_responses=True, password='password')
r = redis.StrictRedis(connection_pool=pool)
start_time = time.time()
# 医药产业链行业分类编码
medicine = ['0171', '0179', '0251', '0252', '0392', '0399', '0411', '0412', '0421', '0422', '3543', '3544', '2780',
            '7340',
            '7512', '7520', '7530', '7540', '7590', '2710', '2720', '2730', '2740', '2761', '2762', '2770', '3581',
            '3582', '3583', '3584',
            '3585', '3586', '3587', '3589', '1491', '1492', '2666', '2683', '2665', '2915', '5151', '5152', '5154',
            '5126', '5134', '5142', '5193',
            '5251', '5252', '5254', '5255', '5225', '5234', '5236', '5242', '5211', '5212', '5219', '5292', '7115',
            '7121', '7122', '5960',
            '5990', '6432', '6434', '8411', '8412', '8413', '8414', '8415', '8425', '8432', '8433', '8416', '8515',
            '8512', '8513', '8522',
            '8434', '8492', '8499', '7244', '7451', '8435', '8436', '8491', '8992', '8930', '8053', '8514', '8521',
            '8010', '8090']
# 医药产业链关键词
med_words = ['医药', '药材', '中药材', '制药', '药用', '医学', '医疗', '中药', '药品', '医用', '消毒', '口腔', '康复', '外科', '内科', '西药', '营养',
             '诊断', '兽医', '护理', '医疗器械', '医药用品', '保健', '健康', '防护', '口腔', '保健品', '生育', '工商', '医院', '临床', '中西医', '疗养',
             '精神', '急救', '疾病', '制药', '供血', '养生', '妇幼', '残疾', '残疾人', '老年人', '急救', '体检', '防治', '疫苗', '注射', '计生', '药业',
             '胶囊剂', '片剂', '特殊输液', '麻醉类', '手术', '抗肿瘤', '医药中间体', '生物医药', '口腔义齿', '内窥镜', '医护人员', '中药饮片', '药制剂',
             '抗生素', '防护用品', '消毒产品', '健康技术', '医疗卫生', '诊所']

# 现代金融产业链行业分类编码
financial = ['6610', '6650', '6750', '6870', '6940', '6513', '7271', '6550', '7295', '6621', '6622', '6623', '6624',
             '6631',
             '6632', '6633', '6634', '6635', '6636', '6637', '6639', '6640', '6711', '6712', '6720', '6731', '6732',
             '6739',
             '6741', '6749', '6760', '6790', '6811', '6812', '6813', '6814', '6820', '6830', '6840', '6851', '6852',
             '6853',
             '6860', '6890', '6991', '6999', '6911', '6919', '6920', '6930', '6950']
# 现代金融产业链关键词
fin_words = ['银行', '证券', '保险', '期货', '金融', '信用', '货币', '资金', '融资', '贷款', '借贷', '理财', '基金', '投资', '资本', '信托', '资产', '财产',
             '控股', '律师', '支付', '人民币', '存款', '长期贷款', '票据', '金融债券', '外汇', '理财产品', '中国保监会', '款项']

# 科技服务产业链行业分类编码
science = ['4014', '4015', '3581', '6311', '6312', '6319', '6321', '6322', '6331', '6550', '7310', '7320', '7330',
           '7340',
           '7350', '6621', '6622', '6631', '6635', '6640', '6760', '6820', '6853', '6860', '6890', '6911', '6930',
           '6950',
           '6410', '6440', '6450', '6490', '6513', '6531', '6532', '6540', '6560', '6520', '6599', '7483', '7484',
           '7491',
           '7492', '6621', '6622', '6631', '6635', '6640', '6760', '6820', '6853', '6860', '6890', '6911', '6930',
           '6950',
           '6410', '6440', '6450', '6490', '6513', '6531', '6532', '6540', '6560', '6520', '6599', '7259', '7251',
           '7281',
           '6429', '6431', '6432', '6433', '7520', '7231', '7232', '7239']
# 科技服务产业链关键词
sci_words = ['科技', '电信', '试验机', '传输', '卫星', '研发', '科学', '新能源', '遥感', '测绘', '集成', '电路', '互联网', '分析仪器', '恒温', '干燥箱',
             '离心机', '培养箱', '科学实验', '科学研究', '新材料']

# 高端装备制造产业链分类编码
equipment = ['7320', '6531', '6550', '7491', '3444', '3445', '3446', '3451', '3453', '3484', '3489', '3499', '3813',
             '3821',
             '3421', '3422', '3423', '3424', '3425', '3429', '4011', '4014', '4015', '4016', '4019', '4029', '3744',
             '3812',
             '3921', '3922', '3412', '3453', '3716', '3811', '3821', '3891', '3899', '2641', '3532', '3575', '3592',
             '3791',
             '3792', '3811', '3824', '3921', '3491', '3492', '3493', '3964', '3511', '3512', '3513', '3515', '3516',
             '3521',
             '3522', '3523', '3524', '3531', '3542', '3551', '3569', '3571', '3599', '3360', '3399', '3432', '3433',
             '3434',
             '3439', '3499', '3531', '3532', '3572', '3579', '3741', '3749', '3742', '3743', '3921', '3962', '4023',
             '3711',
             '3712', '3714', '3716', '3720', '3424', '3737', '3513', '7452', '7520', '7251', '7259', '7281', '7289',
             '5179',
             '5164', '7224', '7112', '7119', '7296', '4320', '4330', '4360', '4343', '6331', '6339', '6571', '7441',
             '4341',
             '5331', '5333', '4831', '4832', '7231', '7439', '7481', '7484']
# 高端装备制造产业链关键词
equ_words = ['高端', '装备', '智能', '航空', '通信', '轨道', '交通', '铁路', '海洋', '潜水', '水下', '救捞', '机器人', '石油', '钻探', '冶金', '起重机',
             '航空器',
             '飞机', '航天', '高铁', '交通', '深海', '遥感', '直升机', '航空航天', '民航飞行器', '航空装备', '机场', '航站楼', '消防车', '船用', '航空运输',
             '航空器材',
             '高温合金', '液压', '精密', '精密机械', '有轨电车', '涂料', '航标', '船坞', '海上']

# 新一代信息技术产业链分类编码
information = ['3562', '3563', '3569', '3831', '3832', '3971', '3972', '3974', '3975', '3976', '3979', '3981', '3982',
               '3983',
               '3984', '3989', '3912', '3913', '3919', '3921', '3932', '3931', '3911', '3914', '3919', '3922', '4023',
               '3940',
               '3990', '3915', '4028', '2651', '2669', '2921', '3042', '3051', '3073', '3091', '3841', '3842', '3849',
               '3985',
               '3973', '6311', '6312', '6319', '6410', '6440', '6513', '6531', '6532', '6560', '6431', '6450', '6433',
               '6434',
               '6490', '6450', '6421', '6429', '6550', '5176', '5211', '5273', '5274', '5292', '7281', '7289', '6540',
               '8121',
               '4390', '4910', '8122']
inf_words = ['信息', '元器件', '半导体', '电子', '电缆', '光纤', '元件', '电路', '电阻', '电容', '雷达', '锂电子', '锂电池', '锂电', '镍氢',
             '电池', '通讯', '二极管', '电位器', '电感器', '光电', '晶体管', '液晶显示', 'IC', '硬盘', 'CPU', '导航', '制导', '雷达', '工控机', '石墨']

# 人工智能产业链分类编码
artificial = ['7320', '6519', '7491', '6450', '6550', '6520', '3562', '3973', '3824', '3979', '4023', '3581', '3563',
              '3922',
              '3976', '3983', '4013', '4021', '4022', '4023', '4024', '4025', '4026', '4027', '4028', '4029', '3484',
              '3489', '6511', '6512', '6513', '3964', '3492', '3963', '3969', '3491',
              '3961', '3969', '3990', '3483', '3499', '3511', '3531', '3551', '3561', '3423', '3434', '3439', '3446',
              '3464',
              '3572', '3823', '3874', '3913', '3919', '3921', '3922', '3962', '4014', '4029', '3914', '3915', '6531',
              '6431',
              '6433', '6434', '6490', '6532', '0519', '6319', '6540', '4320', '4330', '4415', '4416', '4420', '4910',
              '8132',
              '6631', '5179', '5136', '5137', '5154', '5171', '5172', '5175', '5177', '5193', '5282', '5283', '5292',
              '5211',
              '5273', '5279', '5254', '7281']
# 人工智能产业链关键词
art_words = ['人工智能', '算法', '智能化', '云平台', '芯片', 'AI', '可编程', '单晶硅', '机器人', '自动控制系统', '智能', '大数据', '无人机', '计算机',
             '物联网', '互联网']

# 文化创意产业链行业分类编码
culture = ['7212', '7211', '7221', '9051', '9053', '9059', '8070', '5181', '7298', '3542', '3474', '3931', '3932',
           '3933',
           '3934', '3939', '3471', '3953', '3963', '3472', '3473', '3873', '2461', '2462', '2469', '2221', '2222',
           '2642',
           '2644', '2664', '8730', '8770', '8710', '8720', '8740', '8810', '8870', '8890', '2431', '2432', '2433',
           '2434',
           '2435', '2436', '2437', '2438', '2439', '3075', '3076', '2411', '2412', '2414', '2459', '2672', '3951',
           '3952',
           '3961', '3969', '2421', '2422', '2423', '2429', '8750', '8760', '8820', '8393', '8399', '8831', '8832',
           '8840',
           '8850', '9011', '9012', '9013', '9019', '9090', '9030', '5622', '8621', '8623', '8624', '8625', '8626',
           '8629',
           '7520', '7284', '6572', '6422', '6319', '6579', '6432', '2311', '2312', '2319', '2320', '2330', '8060',
           '7251',
           '7259', '7484', '7491', '7492', '7485', '8610', '8622', '6421', '6429', '5143', '5144', '5145', '5243',
           '5244',
           '7124', '7125', '5164', '5245', '5246', '5183', '5184', '5175', '5178', '5248', '5147', '5247', '5141',
           '5241',
           '5137', '5271', '5149', '5249', '7121', '7123']
# 文化创意产业链关键词
cul_words = ['文化经纪', '体育赛事', '旅游', '文化', '艺术交流', '体育表演', '影视文化', '雕塑', '字画', '工艺礼品', '广播', '电影院线', '影城', '收藏品',
             '舞台艺术', '珠宝', '玉器', '陶瓷', '刺绣', '电子乐器', '乐器', '影片', '音像制品', '艺术', '印刷', '喷绘', '广告设计', '新闻']

# 国际商贸产业链行业分类编码
commerce = ['7010', '7020', '6513', '6550', '7211', '7212', '7213', '7221', '7222', '7223', '7224', '7231', '7232',
            '7241',
            '7242', '7243', '7244', '7245', '7246', '7251', '7259', '7262', '7264', '7271', '7272', '7281', '7282',
            '7283',
            '7284', '7289', '7291', '7293', '7294', '7296', '7297', '7298', '5181', '5182', '5183', '5184', '5189',
            '5171',
            '5172', '5173', '5174', '5175', '5176', '5177', '5178', '5179', '5161', '5162', '5163', '5164', '5165',
            '5151',
            '5152', '5153', '5154', '5141', '5142', '5143', '5144', '5145', '5146', '5147', '5131', '5132', '5133',
            '5134',
            '5135', '5136', '5137', '5138', '5121', '5122', '5123', '5124', '5125', '5126', '5127', '5211', '5212',
            '5213',
            '5221', '5222', '5223', '5224', '5225', '5226', '5231', '5232', '5233', '5234', '5235', '5236', '5237',
            '5238',
            '5241', '5242', '5243', '5244', '5245', '5246', '5247', '5248', '5251', '5252', '5253', '5254', '5255',
            '5261',
            '5262', '5263', '5264', '5265', '5271', '5272', '5273', '5274', '5281', '5282', '5283', '5284', '5285',
            '5286',
            '5287', '5292', '5293', '5294', '5521', '5522', '5523', '5532', '5320', '5432', '5521', '5522', '5523',
            '5612', '5930','5810','5821', '7451', '7452', '7454', '7455', '7459']
comm_words = ['房地产', '物业', '法律', '供应链', '房屋', '物业', '住房', '餐饮服务', '保健服务', '家政服务', '健康咨询服务', '政府部门购物中心', '零售',
              '商务', '贸易', '销售', '日用百货', '零配件', '电子产品', '批发', '卫生用品', '灶具', '厨具', '餐具', '批发', '灯具', '食用油', '肉', '禽',
              '蛋', '水产品', '调味品', '酒', '汽车', '零配件', '建筑装潢材料', '金属材料', '化工原料', '日用品', '五金', '化妆品', '销售', '电线电缆', '茶叶',
              '纺织品', '旧车零售', '音响设备', '仓储', '配送', '货物运输', '冷藏车', '运输']

# 直接匹配的行业分类编码
industry = ['8366', '8341', '8342', '8391', '8392', '8336']


def demo():
    g = 0
    res = "w"
    null = " "

    g += 1
    # print(g)
    res = r.lrange("industry_code:mhCompany2:", 0, -1)
    for new_res in res:
        new_res = eval(new_res)
        companyDict = new_res[1]
        coding1 = new_res[1]['industryCode']
        coding1 = filter(str.isdigit, coding1)
        coding = ''.join(list(coding1))
        print(coding)
        business = new_res[1]["scope"]
        companyDict["result"] = "1"
        print(companyDict)

        if coding in medicine:
            a = 0
            for i in med_words:
                if i in business:
                    a += 1
            if a >= 0:
                r.lpushx("industry_code:zhang:", companyDict)
        elif coding in financial:
            a = 0
            for i in fin_words:
                if i in business:
                    a += 1
            if a >= 0:
                r.lpushx("industry_code:zhang:", companyDict)
        elif coding in science:
            a = 0
            for i in sci_words:
                if i in business:
                    a += 1
            if a >= 0:
                r.lpushx("industry_code:zhang:", companyDict)
        elif coding in equipment:
            a = 0
            for i in equ_words:
                if i in business:
                    a += 1
            if a >= 0:
                r.lpushx("industry_code:zhang:", companyDict)
        elif coding in information:
            a = 0
            for i in inf_words:
                if i in business:
                    a += 1
            if a >= 0:
                r.lpushx("industry_code:zhang:", companyDict)
        elif coding in artificial:
            a = 0
            for i in art_words:
                if i in business:
                    a += 1
            if a >= 0:
                r.lpushx("industry_code:zhang:", companyDict)
        elif coding in culture:
            a = 0
            for i in cul_words:
                if i in business:
                    a += 1
            if a >= 0:
                r.lpushx("industry_code:zhang:", companyDict)
        elif coding in commerce:
            a = 0
            for i in comm_words:
                if i in business:
                    a += 1
            if a >= 0:
                r.lpushx("industry_code:zhang:", companyDict)
        elif coding in industry:
            r.lpushx("industry_code:zhang:", companyDict)
        else:
            print("不属于任何一个行业")


if __name__ == '__main__':
    # 公司的行业分类编码
    # coding = '0171'
    # # 公司的经营范围
    # business = '恒瑞医药'
    demo()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值