批量从mongodb里解json

12 篇文章 0 订阅
4 篇文章 0 订阅
create or replace function get_json_by_python(origin_file text,layers int,schema_n text,table_n text)
returns void 
language plpython3u
as $$
import re
import json as js
import ast
#plpy.execute("truncate public.json_result;")
def get_json_nums_of_layers(origin_json):
    jsonstr=str(origin_json)
    pos1=list()#用于存放
    plpy.notice('判断函数运行了!')
    for m in re.finditer('{', jsonstr):
        pos1.append(m.start())
    pos2=jsonstr.find('}')
    for i in range(len(pos1)):
        if pos1[i]>pos2:
            return i
        else:
            return len(pos1)
 
def get_json(origin_json,nums_layer_toget):
    s=''#s是用于放字段名的
    v=''#v用于放记录的
    if(nums_layer_toget>get_json_nums_of_layers(origin_json)):
        plpy.execute("raise notice '所要解析的层数超过该json拥有的层数!请修正'")
    else:
        origin_json= ast.literal_eval(origin_json)
        get_json_operation('list',origin_json, nums_layer_toget,s,v)
                           
def get_json_operation(key_name,jsonstr,nums_layer_toget,s,v):
    while(nums_layer_toget>=0):
        if nums_layer_toget==0:#遍历到底层了
            key_name=str(key_name)+str(nums_layer_toget)
            s=s+str(key_name)+","
            plpy.notice('s是这样的',s)
            col=s.split(',')
            col.remove('')
            for col_n in col:
                plan=plpy.prepare("select tyc_column_exists_in_table($1,$2,$3)",["text","text","text"])
                plpy.execute(plan,[col_n,schema_n,table_n])
            jsonstr=str(jsonstr).replace("'", "''")
            v=v+"'"+str(jsonstr)+"',"
            result_txt=s+'<ly>'+v
            plan = plpy.prepare("insert into public.json_result (command) values($1)", ["text"])
            plpy.execute(plan,[result_txt])
            return 
        else:
            m=list(jsonstr.keys())
            for key in jsonstr:
                if isinstance(jsonstr[key], list) and nums_layer_toget>=1:
                    key_name=key
                    print('该层列表有',len(jsonstr[key]),'个元素') 
                    j=0
                    cd=len(jsonstr[key])
                    temp_json=jsonstr[key]
                    if j<=cd-1:                 
                        jsonstr=jsonstr[key][j]
                        get_json_operation(key_name,jsonstr, nums_layer_toget-1,s,v)
                        j=j+1
                        if j==cd:#已经遍历完了                     
                            return  
                        else:
                            while(j<cd):
                                jsonstr=temp_json[j]
                                get_json_operation(key_name,jsonstr, nums_layer_toget-1,s,v)
                                j=j+1 
                            return               
                if isinstance(jsonstr[key], dict) and nums_layer_toget>=1:
                    key_name='dict'+str(key)+str(nums_layer_toget)
                    jsonstr=jsonstr[key]
                    get_json_operation(key_name,jsonstr, nums_layer_toget-1,s,v)
                    s='请删除我'
                    v='请删除我'
                else:
                    jsonstr[key]=str(jsonstr[key]).replace("'","''")
                    s=s+str(key)+","
                    v=v+"'"+str(jsonstr[key])+"',"
            if '请删除我' not in s:
                plpy.notice('s是这样的',s)
                col=s.split(',')
                col.remove('')
                for col_n in col:
                    plan=plpy.prepare("select tyc_column_exists_in_table($1,$2,$3)",["text","text","text"])
                    plpy.execute(plan,[col_n,schema_n,table_n])
                result_txt=s+'<ly>'+v
                plan = plpy.prepare("insert into public.json_result (command) values($1)", ["text"])
                plpy.execute(plan,[result_txt])
            return#遍历到没有list,dict的时候直接返回上一层调用
if __name__ == '__main__':
    get_json(origin_file,layers)
$$;


create or replace function operate_mid_txt()
returns void 
language plpgsql
as $$
declare curs refcursor;
		s text;
		m int;
		i int =1;
begin
raise notice '1';
open curs for select command from public.json_result;
select count(*) into m from public.json_result;
while i<=m loop
	fetch curs into s;
--	raise notice '%',s;
	s=cast(s as text);
--	raise notice '2!';
	s=replace(s,',<ly>',')values(');
--	raise notice '3!';
	s='insert into table_name('||s;
--	raise notice '4!';
	s=btrim(s,',');
	s=s||')';	
	update public.json_result set command=s where current of curs;
    i=i+1;
end loop;
close curs;
end;
$$;
-----判断字段是否存在
drop function if exists public.tyc_column_exists_in_table;
CREATE OR REPLACE FUNCTION public.tyc_column_exists_in_table(v_column varchar,v_table_schema varchar,v_table_name varchar)
RETURNS varchar LANGUAGE plpgsql AS $function$
declare
	v_record record;
	v_flag int;
begin
	--select * into v_record from foreign_data.config_tyc_connection_parameter where api_code=v_api_code;
	select count(1) into v_flag from information_schema."columns" where table_name=v_table_name
	and table_schema=v_table_schema	and column_name=v_column;
	if v_flag=0 then 
		raise notice '% not exists' ,v_column;
		execute format('alter table %I.%I add column %I text',v_table_schema,v_table_name,v_column);
		raise notice '创建列%成功',v_column;
	else 
		raise notice '% is exists' ,v_column;
	end if;
return v_flag;
end;
$function$
;

CREATE OR REPLACE FUNCTION public.tyc_query_data_from_mongodb
(v_host varchar,v_port int,v_db varchar,v_collection varchar,v_api_code varchar,layer_num int) 
RETURNS varchar LANGUAGE plpython3u AS $function$   
import pymongo
import re 
plpy.execute("truncate public.json_result;")
client = pymongo.MongoClient(host=v_host, port=v_port)
db = client[v_db]
collection = db[v_collection]
records = 0
plpy.notice('开始处理 ' +str(v_api_code)+' 数据...')
for item in collection.find({},{'_id':0},no_cursor_timeout = True):
	plpy.notice('处理的json'+str(item))
#	pattern = re.compile(r'(?<=").*?(?=")')
#	searchObj=re.search(pattern, item)
#	if searchObj is not None:
#		m=searchObj.group().replace("'",'单引号')
#		item=re.sub(pattern,m,item)
#		item=item.replace("'",'"')
#		item=item.replace('单引号',"'")
#	else:
#		item=item.replace("'",'"')
#	plpy.notice('item的类型',type(item))
	plan = plpy.prepare("select get_json_by_python($1,$2,$3,$4)", ["text","int","text","text"])
	plpy.execute(plan,[item,layer_num,'public','test1111'])
	records=records+1
	plpy.notice('已执行记录数: '+str(records))
plpy.execute("select operate_mid_txt();")
plpy.notice('完成处理 ' +str(v_api_code)+' 数据!')
return records
$function$
;

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值