How to optimise a query containing joins and subqueries

狂风中的少年 提交于 2020-01-05 08:51:50

问题


I have inherited the following query and DB structure and I want to optimise it as it is slow. It contains joins and subqueries which I've read isn't a good plan. I've tried various ways to improve it but am getting stuck/lost.

If it is good as it is then fine but if there are suggestions for improving it I would be immensely grateful...

The query draws data from various tables to produce a report on how many clickthroughs to a supplier's website, telephone number 'reveals' there have been for a supplier and emails have been sent to a supplier.

The WHERE clause uses 1=1 as conditions are sometimes added to filter the report down by region, county, and supplier's business type.

The code is copied from mysql_slow log to interpolate all the $variables. The structure of the tables is output from a mysql dump.

 

The query:

SELECT Business.*, 
       ( SELECT Count(Message.id) FROM messages as Message 
         WHERE (U.id = Message.from_to OR U.id = Message.user_id)  
           AND Message.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
       ) as message_no, 
       ( SELECT Count(DISTINCT(MessageUnique.user_id)) FROM messages as MessageUnique 
         WHERE (U.id = MessageUnique.from_to OR U.id = MessageUnique.user_id) 
           AND (MessageUnique.parent_message_id is null OR MessageUnique.parent_message_id = MessageUnique.id)  
           AND MessageUnique.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
       ) as message_unique_no, 
       ( SELECT Count(*) FROM business_counties as bc2 
         WHERE Business.id = bc2.business_id ) as county_no, 
       ( SELECT Count(click.id) FROM business_clickthroughs as click 
         WHERE Business.id = click.business_id  
           AND click.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
       ) as clicks, 
       ( SELECT Count(*) FROM business_regions as br2 
         WHERE Business.id = br2.business_id ) as region_no, 
       ( SELECT count(BusinessReveal.id) as reveal_no FROM business_reveals as BusinessReveal
         WHERE 1=1  
           AND BusinessReveal.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59' 
           AND BusinessReveal.business_id = Business.id
       ) as reveals_no 
FROM businesses as Business 
LEFT JOIN users as U ON Business.id = U.business_id  
LEFT JOIN business_counties as bc ON Business.id = bc.business_id 
LEFT JOIN businesses_business_types as bt ON Business.id = bt.business_id 
LEFT JOIN business_regions as br ON Business.id = br.business_id 
WHERE 1=1  
Group By Business.id;

 

The table structures:

/*
 Navicat MySQL Data Transfer

 Source Server         : _Localhost
 Source Server Type    : MySQL
 Source Server Version : 50530
 Target Server Type    : MySQL
 Target Server Version : 50530
 File Encoding         : utf-8
*/


-- ----------------------------
--  Table structure for `business_clickthroughs`
-- ----------------------------
DROP TABLE IF EXISTS `business_clickthroughs`;
CREATE TABLE `business_clickthroughs` (
  `id` bigint(12) unsigned NOT NULL AUTO_INCREMENT,
  `business_id` int(8) unsigned NOT NULL,
  `registered_user` tinyint(1) unsigned DEFAULT '0',
  `created` datetime NOT NULL,
  PRIMARY KEY (`id`),
  KEY `bid` (`business_id`)
) ENGINE=InnoDB AUTO_INCREMENT=29357 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT;

-- ----------------------------
--  Table structure for `business_counties`
-- ----------------------------
DROP TABLE IF EXISTS `business_counties`;
CREATE TABLE `business_counties` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `business_id` int(11) NOT NULL,
  `county_id` int(11) NOT NULL,
  PRIMARY KEY (`id`),
  KEY `bcid` (`business_id`)
) ENGINE=MyISAM AUTO_INCREMENT=20124 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci ROW_FORMAT=FIXED;

-- ----------------------------
--  Table structure for `business_regions`
-- ----------------------------
DROP TABLE IF EXISTS `business_regions`;
CREATE TABLE `business_regions` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `business_id` int(11) NOT NULL,
  `region_id` int(11) NOT NULL,
  PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=2719 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci ROW_FORMAT=FIXED;

-- ----------------------------
--  Table structure for `business_reveals`
-- ----------------------------
DROP TABLE IF EXISTS `business_reveals`;
CREATE TABLE `business_reveals` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `business_id` int(11) NOT NULL,
  `customer_id` int(11) DEFAULT NULL,
  `created` datetime NOT NULL,
  `modified` datetime NOT NULL,
  PRIMARY KEY (`id`),
  KEY `bid` (`business_id`)
) ENGINE=InnoDB AUTO_INCREMENT=3172 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT;

-- ----------------------------
--  Table structure for `businesses_business_types`
-- ----------------------------
DROP TABLE IF EXISTS `businesses_business_types`;
CREATE TABLE `businesses_business_types` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `business_id` int(11) NOT NULL,
  `business_type_id` int(11) NOT NULL,
  `level` int(2) NOT NULL DEFAULT '2',
  PRIMARY KEY (`id`),
  KEY `bid` (`business_id`) COMMENT '(null)'
) ENGINE=MyISAM AUTO_INCREMENT=4484 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci ROW_FORMAT=FIXED;

-- ----------------------------
--  Table structure for `messages`
-- ----------------------------
DROP TABLE IF EXISTS `messages`;
CREATE TABLE `messages` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `subject` varchar(500) DEFAULT NULL,
  `message` text,
  `user_id` int(11) DEFAULT NULL,
  `message_folder_id` int(11) DEFAULT NULL,
  `parent_message_id` int(11) DEFAULT NULL,
  `status` int(11) DEFAULT NULL,
  `direction` int(11) DEFAULT NULL,
  `from_to` varchar(500) DEFAULT NULL,
  `attachment` varchar(500) DEFAULT NULL,
  `created` datetime DEFAULT NULL,
  `modified` datetime DEFAULT NULL,
  `guest_sender` varchar(255) DEFAULT NULL,
  PRIMARY KEY (`id`),
  KEY `fromto` (`from_to`(255)),
  KEY `uid` (`user_id`),
  KEY `pmid` (`parent_message_id`)
) ENGINE=InnoDB AUTO_INCREMENT=4582 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT;

-- ----------------------------
--  Table structure for `users`
-- ----------------------------
DROP TABLE IF EXISTS `users`;
CREATE TABLE `users` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `login` varchar(255) COLLATE latin1_general_ci NOT NULL,
  `password` varchar(255) COLLATE latin1_general_ci NOT NULL,
  `name` varchar(255) COLLATE latin1_general_ci NOT NULL,
  `email` varchar(255) COLLATE latin1_general_ci NOT NULL,
  `title` varchar(20) COLLATE latin1_general_ci NOT NULL,
  `firstname` varchar(255) COLLATE latin1_general_ci NOT NULL,
  `lastname` varchar(255) COLLATE latin1_general_ci NOT NULL,
  `active` tinyint(1) NOT NULL DEFAULT '0',
  `first_visit` tinyint(1) NOT NULL DEFAULT '1',
  `signature` text COLLATE latin1_general_ci,
  `type` varchar(45) COLLATE latin1_general_ci DEFAULT 'customer',
  `business_id` int(11) DEFAULT NULL,
  `admin_monitor` tinyint(1) NOT NULL DEFAULT '0',
  `partner_name` varchar(255) COLLATE latin1_general_ci DEFAULT NULL,
  `postcode` varchar(255) COLLATE latin1_general_ci DEFAULT NULL,
  `venue_postcode` varchar(255) COLLATE latin1_general_ci DEFAULT NULL,
  `wedding_date` datetime DEFAULT NULL,
  `phone` varchar(255) COLLATE latin1_general_ci NOT NULL,
  `register_date` datetime DEFAULT NULL,
  `event` text COLLATE latin1_general_ci,
  `mailing_list` tinyint(1) NOT NULL DEFAULT '0',
  `created` datetime NOT NULL,
  `modified` datetime NOT NULL,
  PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=2854 DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci ROW_FORMAT=DYNAMIC;

 

The Explain plan.

id  select_type         table           type    possible_keys       key     key_len     ref             rows    Extra

1   PRIMARY             Business        ALL     -                   -       -           -               444     Using temporary; Using filesort
1   PRIMARY             U               ALL     -                   -       -           -               2658    -
1   PRIMARY             bc              ref     bcid                bcid    4           Business.id     7       Using index
1   PRIMARY             bt              ref     bid                 bid     4           Business.id     9       Using index
1   PRIMARY             br              ALL     -                   -       -           -               440     -
7   DEPENDENT SUBQUERY  BusinessReveal  ref     bid                 bid     4           func            5       Using where
6   DEPENDENT SUBQUERY  br2             ALL     -                   -       -           -               440     Using where
5   DEPENDENT SUBQUERY  click           ref     bid                 bid     4           func            22      Using where
4   DEPENDENT SUBQUERY  bc2             ref     bcid                bcid    4           func            7       Using index
3   DEPENDENT SUBQUERY  MessageUnique   ALL     fromto,uid,pmid     -       -           -               4958    Using where
2   DEPENDENT SUBQUERY  Message         ALL     fromto,uid          -       -           -               4958    Using where

回答1:


Your query has 6 correlated sub queries, and in total is returning 444 rows. Each of those correlated sub queries is effectively being executed for each returned row. Hence your single query is resulting in just under 3000 queries.

Personally I prefer to avoid then, using a large join or joining against sub queries. However it depends on the number of rows returned

Further you are also joining directly to the tables you are doing the left joins on anyway, which will generate a lot of duplicates which the GROUP BY then excludes. As you take nothing directly from most of those tables and the GROUP BY is on what appears to be a unique key it seems irrelevant.

If you keep the correlated sub queries:-

SELECT Count(Message.id) FROM messages as Message 
WHERE (U.id = Message.from_to OR U.id = Message.user_id)  
AND Message.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'

There is no useful index on this table for this sub query. As you are checking 2 different columns for the U.id there is not much that can be done there, but an index on created would help. It might be better to duplicate this sub query, once checking from_to and once checking user_id, and adding the results together. As you could then have an index on the relevant id field and the date.

Also, you are doing a count on the value which appears to be the unique key so should never be null.

SELECT Count(DISTINCT(MessageUnique.user_id)) FROM messages as MessageUnique 
WHERE (U.id = MessageUnique.from_to OR U.id = MessageUnique.user_id) 
AND (MessageUnique.parent_message_id is null OR MessageUnique.parent_message_id = MessageUnique.id)  
AND MessageUnique.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'

Same problem as the previous sub query.

SELECT Count(*) FROM business_counties as bc2 
WHERE Business.id = bc2.business_id

This has a key on business_id and should be OK

SELECT Count(click.id) FROM business_clickthroughs as click 
WHERE Business.id = click.business_id  
AND click.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'

While indexed on business id there is no index that covers both business id and the created date, which would probably help here.

SELECT Count(*) FROM business_regions as br2 
WHERE Business.id = br2.business_id

This requires an index on business_id on the business regions table

SELECT count(BusinessReveal.id) as reveal_no FROM business_reveals as BusinessReveal
WHERE 1=1  
AND BusinessReveal.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59' 
AND BusinessReveal.business_id = Business.id

Here the key does't cover the created date, just the business id.

If you want to try doing joins against sub queries (which can be more efficient, despite MySQL being poor at joining onto sub queries) then something like this (not tested):-

SELECT Business.*, 
       mess_1.mess_count + mess_2.mess_count as message_no, 
       mess_3.mess_count + mess_4.mess_count as message_unique_no, 
       business1.county_no, 
       click1.clicks, 
       business_regions.region_no, 
       business_reveals1.reveals_no 
FROM businesses as Business 
LEFT JOIN users as U ON Business.id = U.business_id  
LEFT OUTER JOIN
(
    SELECT Message.from_to, Count(Message.id) AS mess_count
    FROM messages as Message 
    WHERE Message.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
    GROUP BY  Message.from_to
) AS mess_1
ON U.id = mess_1.from_to
LEFT OUTER JOIN
(
    SELECT Message.user_id, Count(Message.id) AS mess_count
    FROM messages as Message 
    WHERE Message.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
    GROUP BY  Message.user_id
) AS mess_2
ON U.id = mess_2.user_id
LEFT OUTER JOIN
( 
    SELECT MessageUnique.from_to, Count(DISTINCT(MessageUnique.user_id))  AS mess_count
    FROM messages as MessageUnique 
    WHERE (MessageUnique.parent_message_id is null OR MessageUnique.parent_message_id = MessageUnique.id)  
    AND MessageUnique.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
    GROUP BY  MessageUnique.from_to
) AS mess_3
ON U.id = mess_3.from_to
LEFT OUTER JOIN
( 
    SELECT MessageUnique.user_id, Count(DISTINCT(MessageUnique.user_id))  AS mess_count
    FROM messages as MessageUnique 
    WHERE (MessageUnique.parent_message_id is null OR MessageUnique.parent_message_id = MessageUnique.id)  
    AND MessageUnique.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
    GROUP BY  MessageUnique.user_id
) AS mess_4
ON U.id = mess_4.from_to
LEFT OUTER JOIN
( 
    SELECT business_id, Count(*)  AS county_no
    FROM business_counties as bc2 
    GROUP BY  Business.id 
) as business1
ON Business.id = business1.business_id 
LEFT OUTER JOIN
( 
    SELECT click.business_id, Count(click.id) AS clicks
    FROM business_clickthroughs as click 
    WHERE click.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
    GROUP BY click.business_id 
) as click1 
ON Business.id = click1.business_id  
LEFT OUTER JOIN
( 
    SELECT br2.business_id, Count(*) AS region_no 
    FROM business_regions as br2 
    WHERE Business.id = br2.business_id 
    GROUP BY br2.business_id 
) as business_regions 
ON Business.id = business_regions.business_id 
LEFT OUTER JOIN
( 
    SELECT BusinessReveal.business_id, count(BusinessReveal.id) as reveal_no 
    FROM business_reveals as BusinessReveal
    WHERE BusinessReveal.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59' 
    GROUP BY BusinessReveal.business_id
) as business_reveals1 
ON business_reveals1.business_id = Business.id


来源:https://stackoverflow.com/questions/24312832/how-to-optimise-a-query-containing-joins-and-subqueries

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!