mysql复杂的多表查询优化的案例

简介

这几天要搬运老项目中的报表查询语句,并且是从oracle数据库里搬过来,一路上总结了一些优化经验,写下来给大家参考一下

原查询语句

这个也不算最初的查询,最初的查询是oracle的语句,先得翻译成mysql的语句。这里简单讲一下翻译的步骤:
1.所有带有(+),转成mysql的时候变成外连接
2.所有的在mysql中有区别的函数要转换
3.Start with 树形结构查询的语句在mysql中没有对应的查询方式,最后是手动先进行这种关联查询后,再把查询的结果放置在大的sql语句当中。
这是转换后的结果,这条语句是要放在Groovy脚本里面运行的,所以${orgId} 和 ? 是参数信息,大家可以不用理会。

SELECT
  '' id,
  '' rpt_baseinfo_id,
  '' rpt_no,
  '' rpt_name,
  '' rpt_cyc,
  '' rpt_ym,
  '' org_no,
  cons_name,
  cons_no,
  tg_name,
  org_name,
  resource_req_name,
  install_name,
  started_time,
  last_state_time,
  mobile,
  mad_no,
  meter_type,
  latest_chk_date,
  rcvble_amt,
  electype,
  volte_code,
  metercap,
  app_run_cap,
  app_type_code,
  '' remark1,
  '' remark2,
  '' remark3,
  '' stand1,
  '' stand2,
  '' stand3
FROM
  (
    SELECT
      cons.cons_name cons_name,
      a.cons_no cons_no,
      g.tg_name tg_name,
      o.name org_name,
      u.user_name resource_req_name,
      u.user_name install_name,
      date_format(ah.START_TIME_, '%Y-%m-%d') started_time,
      date_format(ah.END_TIME_, '%Y-%m-%d') last_state_time,
      t.mobile mobile,
      d.made_no mad_no,
      p.name meter_type,
      date_format(d.latest_chk_date, '%Y-%m-%d') latest_chk_date,
      0 rcvble_amt,
      p1.name electype,
      p2.name volte_code,
      p3.name metercap,
      a.app_run_cap app_run_cap,
      p4.name app_type_code
    FROM
      (act_hi_taskinst ah,
      mp_info c,
      cons_info cons,
      tg_info g,
      org_info o,
      contact_info t,
      meter_mp_rela rela,
      meter_info d)
    INNER JOIN procinst ahp ON ahp.ID_ =  ah.PROC_INST_ID_ AND (ahp.DELETE_REASON_ <> 'manager' OR ahp.DELETE_REASON_ IS NULL)
    INNER JOIN app_arc a ON ahp.BUSINESS_KEY_  = a.id
    LEFT JOIN user_info u ON  ah.ASSIGNEE_=u.login_name
    LEFT JOIN code_msg p ON p.code_type = 'meterModelNo' AND p.value = d.model_code
    LEFT JOIN code_msg p1 ON p1.code_type = 'elecTypeCode' AND p1.value = a.elec_type_code
    LEFT JOIN code_msg p2 ON p2.code_type = 'psVoltCode' AND p2.value = a.volt_code
    LEFT JOIN code_msg p3 ON p3.code_type = 'meterRcSort' AND p3.value = d.rated_current 
    LEFT JOIN code_msg p4 ON p4.code_type = 'busiTypeCode' AND p4.value = a.app_type_code 
    WHERE
      a.cons_id = c.cons_id
      AND a.cust_id = t.cust_id
      AND c.id = rela.mp_id
      AND rela.meter_id = d.id
      AND cons.org_no = o.id
      AND a.main_app_id IS NULL
      AND c.cons_id = cons.id
      AND c.tg_id = g.id
      AND c.mp_level = 1
      AND ah.TASK_DEF_KEY_ = 'installInfoInput' 
      AND ah.END_TIME_ >= date_format(?, '%Y-%m-%d')
      AND ah.END_TIME_ < DATE_ADD(?, INTERVAL 1 MONTH)
      AND a.ps_org_no in (${orgId})
    UNION ALL
    SELECT
      cons.cons_name cons_name,
      a.cons_no cons_no,
      g.tg_name tg_name,
      o.name org_name,
      u.user_name resource_req_name,
      u.user_name install_name,
      date_format(ah.START_TIME_, '%Y-%m-%d') started_time,
      date_format(ah.END_TIME_, '%Y-%m-%d') last_state_time,
      t.mobile mobile,
      d.made_no mad_no,
      p.name meter_type,
      date_format(d.latest_chk_date, '%Y-%m-%d') latest_chk_date,
      0 rcvble_amt,
      p1.name electype,
      p2.name volte_code,
      p3.name metercap,
      a.app_run_cap app_run_cap,
      p4.name app_type_code
    FROM
      (act_hi_taskinst ah,
      batch_app_arc batch,
      mp_info c,
      cons_info cons,
      tg_info g,
      org_info o,
      contact_info t,
      meter_mp_rela rela,
      meter_info d)
    INNER JOIN procinst ahp ON ahp.ID_ =  ah.PROC_INST_ID_ AND (ahp.DELETE_REASON_ <> 'manager' OR ahp.DELETE_REASON_ IS NULL)
    INNER JOIN app_arc a ON ahp.BUSINESS_KEY_  = a.id
    LEFT JOIN user_info u ON  ah.ASSIGNEE_=u.login_name
    LEFT JOIN code_msg p ON p.code_type = 'meterModelNo' AND p.value = d.model_code
    LEFT JOIN code_msg p1 ON p1.code_type = 'elecTypeCode' AND p1.value = a.elec_type_code
    LEFT JOIN code_msg p2 ON p2.code_type = 'psVoltCode' AND p2.value = a.volt_code
    LEFT JOIN code_msg p3 ON p3.code_type = 'meterRcSort' AND p3.value = d.rated_current 
    LEFT JOIN code_msg p4 ON p4.code_type = 'busiTypeCode' AND p4.value = a.app_type_code  
    WHERE
      a.id = batch.app_id
      AND a.cons_id = c.cons_id
      AND a.cust_id = t.cust_id
      AND c.id = rela.mp_id
      AND rela.meter_id = d.id
      AND cons.org_no = o.id
      AND a.main_app_id IS NULL
      AND c.cons_id = cons.id
      AND c.tg_id = g.id
      AND c.mp_level = 1
      AND ah.TASK_DEF_KEY_ = 'installInfoInput' 
      AND ah.END_TIME_ >= date_format(?, '%Y-%m-%d')
      AND ah.END_TIME_ < DATE_ADD(?, INTERVAL 1 MONTH)
      AND a.ps_org_no in (${orgId})
  ) t
ORDER BY
  org_name,
  cons_no,
  last_state_time;

分析

优化前,先分析一下结构,这个结构还是比较清晰的。

SELECT * FROM(
	SELECT * FROM xxx a
	UNION ALL
	SELECT * FROM xxx b
)

先简单的理解为上面这种结构,我们主要优化的对象就是里面的两个由UNION ALL 连接的查询语句。由于这两个查询的处理方式是类似的,所以,我这里只展示其中一个的优化方式。
先把它摘出来。

SELECT
 cons.cons_name cons_name,
  a.cons_no cons_no,
  g.tg_name tg_name,
  o.name org_name,
  u.user_name resource_req_name,
  u.user_name install_name,
  date_format(ah.START_TIME_, '%Y-%m-%d') started_time,
  date_format(ah.END_TIME_, '%Y-%m-%d') last_state_time,
  t.mobile mobile,
  d.made_no mad_no,
  p.name meter_type,
  date_format(d.latest_chk_date, '%Y-%m-%d') latest_chk_date,
  0 rcvble_amt,
  p1.name electype,
  p2.name volte_code,
  p3.name metercap,
  a.app_run_cap app_run_cap,
  p4.name app_type_code
FROM
  (act_hi_taskinst ah,
  mp_info c,
  cons_info cons,
  tg_info g,
  org_info o,
  contact_info t,
  meter_mp_rela rela,
  meter_info d)
INNER JOIN procinst ahp ON ahp.ID_ =  ah.PROC_INST_ID_ AND (ahp.DELETE_REASON_ <> 'manager' OR ahp.DELETE_REASON_ IS NULL)
INNER JOIN app_arc a ON ahp.BUSINESS_KEY_  = a.id
INNER JOIN procinst ahp ON ahp.ID_ =  ah.PROC_INST_ID_ AND (ahp.DELETE_REASON_ <> 'manager' OR ahp.DELETE_REASON_ IS NULL)
LEFT JOIN user_info u ON  ah.ASSIGNEE_=u.login_name
LEFT JOIN code_msg p ON p.code_type = 'meterModelNo' AND p.value = d.model_code
LEFT JOIN code_msg p1 ON p1.code_type = 'elecTypeCode' AND p1.value = a.elec_type_code
LEFT JOIN code_msg p2 ON p2.code_type = 'psVoltCode' AND p2.value = a.volt_code
LEFT JOIN code_msg p3 ON p3.code_type = 'meterRcSort' AND p3.value = d.rated_current 
LEFT JOIN code_msg p4 ON p4.code_type = 'busiTypeCode' AND p4.value = a.app_type_code 
WHERE
  a.cons_id = c.cons_id
  AND a.cust_id = t.cust_id
  AND c.id = rela.mp_id
  AND rela.meter_id = d.id
  AND cons.org_no = o.id
  AND a.main_app_id IS NULL
  AND c.cons_id = cons.id
  AND c.tg_id = g.id
  AND c.mp_level = 1
  AND ah.TASK_DEF_KEY_ = 'installInfoInput' 
  AND ah.END_TIME_ >= date_format(?, '%Y-%m-%d')
  AND ah.END_TIME_ < DATE_ADD(?, INTERVAL 1 MONTH)
  AND a.ps_org_no in (${orgId})

这里我讲一下优化思路:

  1. 要把所有在 WHERE 中出现的语句带着相关的表,变成 INNER JOIN,因为直接内连接会直接进行笛卡尔积相乘,sql语句的执行顺序是,先执行 JOIN 再执行 where 条件,但是上面这个sql中,有好几张表的数据已经达到30万条了,绝对不能这样用这样的内连接直接放在一起,这对于运行效率有很大影响。
  2. 所有 INNER JOIN 的语句要放在 LEFT JOIN 以前,因为当你使用 LEFT JOIN 的时候,一般是要查询某条记录的额外的信息,如果 INNER JOIN 在前的话,那就能过滤掉很多记录,但如果 LEFT JOIN 在前的话相当于要为一些后面需要被过滤掉的记录查询额外信息,这是没必要的。
  3. 能筛选掉数据的条件要尽早使用。

开始优化

刚开始转换的时候很头大,因为内关联这么多张表,筛选条件一大堆。但后来找到了些窍门:从直接内连接的表和 WHERE 后面的条件下手。如果直接内连接的表 A 在 WHERE 条件中相关联的筛选条件全是常量,或者至于其他一张表有关联,那么表 A 就可以立马转换为INNER JOIN
例如下面的 tg_info,org_info,contact_info,meter_info 这几张表,都只在 Where 条件里找到一条筛选条件

SELECT * -- 先不考虑查询的字段
FROM
  (act_hi_taskinst ah,
  mp_info c,
  cons_info cons,
  tg_info g, -- 一条筛选条件
  org_info o, -- 一条筛选条件
  contact_info t, -- 一条筛选条件
  meter_mp_rela rela,
  meter_info d) -- 一条筛选条件
INNER JOIN procinst ahp ON ahp.ID_ =  ah.PROC_INST_ID_ AND (ahp.DELETE_REASON_ <> 'manager' OR ahp.DELETE_REASON_ IS NULL)
INNER JOIN app_arc a ON ahp.BUSINESS_KEY_  = a.id
INNER JOIN procinst ahp ON ahp.ID_ =  ah.PROC_INST_ID_ AND (ahp.DELETE_REASON_ <> 'manager' OR ahp.DELETE_REASON_ IS NULL)
LEFT JOIN user_info u ON  ah.ASSIGNEE_=u.login_name
LEFT JOIN code_msg p ON p.code_type = 'meterModelNo' AND p.value = d.model_code
LEFT JOIN code_msg p1 ON p1.code_type = 'elecTypeCode' AND p1.value = a.elec_type_code
LEFT JOIN code_msg p2 ON p2.code_type = 'psVoltCode' AND p2.value = a.volt_code
LEFT JOIN code_msg p3 ON p3.code_type = 'meterRcSort' AND p3.value = d.rated_current 
LEFT JOIN code_msg p4 ON p4.code_type = 'busiTypeCode' AND p4.value = a.app_type_code 
WHERE
  a.cons_id = c.cons_id
  AND a.cust_id = t.cust_id -- 需要关注的筛选条件
  AND c.id = rela.mp_id
  AND rela.meter_id = d.id -- 需要关注的筛选条件
  AND cons.org_no = o.id -- 需要关注的筛选条件
  AND a.main_app_id IS NULL
  AND c.cons_id = cons.id
  AND c.tg_id = g.id  -- 需要关注的筛选条件
  AND c.mp_level = 1
  AND ah.TASK_DEF_KEY_ = 'installInfoInput' 
  AND ah.END_TIME_ >= date_format(?, '%Y-%m-%d')
  AND ah.END_TIME_ < DATE_ADD(?, INTERVAL 1 MONTH)
  AND a.ps_org_no in (${orgId})

在变成 INNER JOIN 语句前,如果筛选条件里又出现了这几个表之间的关联筛选,需要注意INNER JOIN 的顺序。 不过这四张表没有相互之间联系,所以可以直接转换。当然还遵循一个原则,小表在前大表在后,我先查询了这几张表的数据量然后再根据数据量把 INNER JOIN 进行合理的排序

SELECT * -- 先不考虑查询的字段
FROM
  (act_hi_taskinst ah,
  mp_info c,
  cons_info cons, -- 一条筛选条件 
  meter_mp_rela rela) -- 一条筛选条件
INNER JOIN procinst ahp ON ahp.ID_ =  ah.PROC_INST_ID_ AND (ahp.DELETE_REASON_ <> 'manager' OR ahp.DELETE_REASON_ IS NULL)
INNER JOIN app_arc a ON ahp.BUSINESS_KEY_  = a.id
INNER JOIN procinst ahp ON ahp.ID_ =  ah.PROC_INST_ID_ AND (ahp.DELETE_REASON_ <> 'manager' OR ahp.DELETE_REASON_ IS NULL)
INNER JOIN tg_info g ON c.tg_id = g.id
INNER JOIN org_info o ON cons.org_no = o.id
INNER JOIN contact_info t ON a.cust_id = t.cust_id
INNER JOIN meter_info d ON rela.meter_id = d.id
LEFT JOIN user_info u ON  ah.ASSIGNEE_=u.login_name
LEFT JOIN code_msg p ON p.code_type = 'meterModelNo' AND p.value = d.model_code
LEFT JOIN code_msg p1 ON p1.code_type = 'elecTypeCode' AND p1.value = a.elec_type_code
LEFT JOIN code_msg p2 ON p2.code_type = 'psVoltCode' AND p2.value = a.volt_code
LEFT JOIN code_msg p3 ON p3.code_type = 'meterRcSort' AND p3.value = d.rated_current 
LEFT JOIN code_msg p4 ON p4.code_type = 'busiTypeCode' AND p4.value = a.app_type_code 
WHERE
  a.cons_id = c.cons_id
  AND c.id = rela.mp_id -- 需要关注的筛选条件
  AND a.main_app_id IS NULL
  AND c.cons_id = cons.id -- 需要关注的筛选条件
  AND c.mp_level = 1
  AND ah.TASK_DEF_KEY_ = 'installInfoInput' 
  AND ah.END_TIME_ >= date_format(?, '%Y-%m-%d')
  AND ah.END_TIME_ < DATE_ADD(?, INTERVAL 1 MONTH)
  AND a.ps_org_no in (${orgId})

一口气干掉4个表,很舒服。然后再分析剩余的条件里面有没有能调整的,此时发现 cons_info 和 meter_mp_rela 又可以转换了。不过这里要注意和其他INNER JOIN 之间的顺序。例如 INNER JOIN org_info o ON cons.org_no = o.id 语句用到了 cons_info 表,所以 cons_info 表的 INNER JOIN 要在 org_info 表以前

SELECT * -- 先不考虑查询的字段
FROM
  (act_hi_taskinst ah,
  mp_info c) -- 可以INNER JOIN
INNER JOIN procinst ahp ON ahp.ID_ =  ah.PROC_INST_ID_ AND (ahp.DELETE_REASON_ <> 'manager' OR ahp.DELETE_REASON_ IS NULL)
INNER JOIN app_arc a ON ahp.BUSINESS_KEY_  = a.id
INNER JOIN procinst ahp ON ahp.ID_ =  ah.PROC_INST_ID_ AND (ahp.DELETE_REASON_ <> 'manager' OR ahp.DELETE_REASON_ IS NULL)
INNER JOIN tg_info g ON c.tg_id = g.id
INNER JOIN cons_info cons ON c.cons_id = cons.id
INNER JOIN org_info o ON cons.org_no = o.id
INNER JOIN contact_info t ON a.cust_id = t.cust_id
INNER JOIN meter_mp_rela rela ON c.id = rela.mp_id
INNER JOIN meter_info d ON rela.meter_id = d.id
LEFT JOIN user_info u ON  ah.ASSIGNEE_=u.login_name
LEFT JOIN code_msg p ON p.code_type = 'meterModelNo' AND p.value = d.model_code
LEFT JOIN code_msg p1 ON p1.code_type = 'elecTypeCode' AND p1.value = a.elec_type_code
LEFT JOIN code_msg p2 ON p2.code_type = 'psVoltCode' AND p2.value = a.volt_code
LEFT JOIN code_msg p3 ON p3.code_type = 'meterRcSort' AND p3.value = d.rated_current 
LEFT JOIN code_msg p4 ON p4.code_type = 'busiTypeCode' AND p4.value = a.app_type_code 
WHERE
  a.cons_id = c.cons_id -- 需要关注的条件
  AND a.main_app_id IS NULL
  AND c.mp_level = 1 -- 需要关注的条件
  AND ah.TASK_DEF_KEY_ = 'installInfoInput' 
  AND ah.END_TIME_ >= date_format(?, '%Y-%m-%d')
  AND ah.END_TIME_ < DATE_ADD(?, INTERVAL 1 MONTH)
  AND a.ps_org_no in (${orgId})

转换 mp_info 表,注意:它要放在 app_arc 表后,放在 tg_info,cons_info,meter_mp_rela 等表的前面

SELECT * -- 先不考虑查询的字段
FROM act_hi_taskinst ah
INNER JOIN procinst ahp ON ahp.ID_ =  ah.PROC_INST_ID_ AND (ahp.DELETE_REASON_ <> 'manager' OR ahp.DELETE_REASON_ IS NULL)
INNER JOIN app_arc a ON ahp.BUSINESS_KEY_  = a.id
INNER JOIN procinst ahp ON ahp.ID_ =  ah.PROC_INST_ID_ AND (ahp.DELETE_REASON_ <> 'manager' OR ahp.DELETE_REASON_ IS NULL)
INNER JOIN mp_info c ON a.cons_id = c.cons_id AND c.mp_level = 1
INNER JOIN tg_info g ON c.tg_id = g.id
INNER JOIN cons_info cons ON c.cons_id = cons.id
INNER JOIN org_info o ON cons.org_no = o.id
INNER JOIN contact_info t ON a.cust_id = t.cust_id
INNER JOIN meter_mp_rela rela ON c.id = rela.mp_id
INNER JOIN meter_info d ON rela.meter_id = d.id
LEFT JOIN user_info u ON  ah.ASSIGNEE_=u.login_name
LEFT JOIN code_msg p ON p.code_type = 'meterModelNo' AND p.value = d.model_code
LEFT JOIN code_msg p1 ON p1.code_type = 'elecTypeCode' AND p1.value = a.elec_type_code
LEFT JOIN code_msg p2 ON p2.code_type = 'psVoltCode' AND p2.value = a.volt_code
LEFT JOIN code_msg p3 ON p3.code_type = 'meterRcSort' AND p3.value = d.rated_current 
LEFT JOIN code_msg p4 ON p4.code_type = 'busiTypeCode' AND p4.value = a.app_type_code 
WHERE
  AND a.main_app_id IS NULL
  AND ah.TASK_DEF_KEY_ = 'installInfoInput' 
  AND ah.END_TIME_ >= date_format(?, '%Y-%m-%d')
  AND ah.END_TIME_ < DATE_ADD(?, INTERVAL 1 MONTH)
  AND a.ps_org_no in (${orgId})

最终,根据能筛选数据的条件要尽早使用的原则,把其他筛选条件加入到 JOIN 当中

SELECT * -- 先不考虑查询的字段
FROM act_hi_taskinst ah
INNER JOIN procinst ahp ON ahp.ID_ =  ah.PROC_INST_ID_ 
	AND (ahp.DELETE_REASON_ <> 'manager' OR ahp.DELETE_REASON_ IS NULL)
	AND ah.TASK_DEF_KEY_ = 'installInfoInput'
	AND ah.END_TIME_ >= date_format(?, '%Y-%m-%d')
  	AND ah.END_TIME_ < DATE_ADD(?, INTERVAL 1 MONTH)
INNER JOIN app_arc a ON ahp.BUSINESS_KEY_  = a.id
	AND a.main_app_id IS NULL
	AND a.ps_org_no in (${orgId})
INNER JOIN procinst ahp ON ahp.ID_ =  ah.PROC_INST_ID_ AND (ahp.DELETE_REASON_ <> 'manager' OR ahp.DELETE_REASON_ IS NULL)
INNER JOIN mp_info c ON a.cons_id = c.cons_id AND c.mp_level = 1
INNER JOIN tg_info g ON c.tg_id = g.id
INNER JOIN cons_info cons ON c.cons_id = cons.id
INNER JOIN org_info o ON cons.org_no = o.id
INNER JOIN contact_info t ON a.cust_id = t.cust_id
INNER JOIN meter_mp_rela rela ON c.id = rela.mp_id
INNER JOIN meter_info d ON rela.meter_id = d.id
LEFT JOIN user_info u ON  ah.ASSIGNEE_=u.login_name
LEFT JOIN code_msg p ON p.code_type = 'meterModelNo' AND p.value = d.model_code
LEFT JOIN code_msg p1 ON p1.code_type = 'elecTypeCode' AND p1.value = a.elec_type_code
LEFT JOIN code_msg p2 ON p2.code_type = 'psVoltCode' AND p2.value = a.volt_code
LEFT JOIN code_msg p3 ON p3.code_type = 'meterRcSort' AND p3.value = d.rated_current 
LEFT JOIN code_msg p4 ON p4.code_type = 'busiTypeCode' AND p4.value = a.app_type_code

至此,所有WHERE 条件都被干掉了,全部转换为 JOIN 的条件。
接下来的优化方向:

  1. 根据表内数据量大小,把INNER JOIN 进行排序,小表在前大表在后,但不要忘记INNER JOIN 涉及其他表时候的顺序要求。
  2. 根据筛选条件添加合适的索引。值得注意的是 ah.END_TIME_ 是时间,如果在这个字段上加普通索引的话,这里一般不会走索引必须强制要求它走索引,即在 FROM act_hi_taskinst ah 后面加上 FORCE INDEX (ACT_IDX_HI_TASK_INST_PROCINST,act_hi_taskinst_END_TIME_index),让它走自己制定的索引,我这里制定了两个。

至此,优化完毕了,优化效果很明显,原来跑5分钟都没跑完,现在几秒钟就结束了

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值