问题
I have inherited the following query and DB structure and I want to optimise it as it is slow. It contains joins and subqueries which I've read isn't a good plan. I've tried various ways to improve it but am getting stuck/lost.
If it is good as it is then fine but if there are suggestions for improving it I would be immensely grateful...
The query draws data from various tables to produce a report on how many clickthroughs to a supplier's website, telephone number 'reveals' there have been for a supplier and emails have been sent to a supplier.
The WHERE clause uses 1=1 as conditions are sometimes added to filter the report down by region, county, and supplier's business type.
The code is copied from mysql_slow log to interpolate all the $variables. The structure of the tables is output from a mysql dump.
The query:
SELECT Business.*,
( SELECT Count(Message.id) FROM messages as Message
WHERE (U.id = Message.from_to OR U.id = Message.user_id)
AND Message.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
) as message_no,
( SELECT Count(DISTINCT(MessageUnique.user_id)) FROM messages as MessageUnique
WHERE (U.id = MessageUnique.from_to OR U.id = MessageUnique.user_id)
AND (MessageUnique.parent_message_id is null OR MessageUnique.parent_message_id = MessageUnique.id)
AND MessageUnique.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
) as message_unique_no,
( SELECT Count(*) FROM business_counties as bc2
WHERE Business.id = bc2.business_id ) as county_no,
( SELECT Count(click.id) FROM business_clickthroughs as click
WHERE Business.id = click.business_id
AND click.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
) as clicks,
( SELECT Count(*) FROM business_regions as br2
WHERE Business.id = br2.business_id ) as region_no,
( SELECT count(BusinessReveal.id) as reveal_no FROM business_reveals as BusinessReveal
WHERE 1=1
AND BusinessReveal.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
AND BusinessReveal.business_id = Business.id
) as reveals_no
FROM businesses as Business
LEFT JOIN users as U ON Business.id = U.business_id
LEFT JOIN business_counties as bc ON Business.id = bc.business_id
LEFT JOIN businesses_business_types as bt ON Business.id = bt.business_id
LEFT JOIN business_regions as br ON Business.id = br.business_id
WHERE 1=1
Group By Business.id;
The table structures:
/*
Navicat MySQL Data Transfer
Source Server : _Localhost
Source Server Type : MySQL
Source Server Version : 50530
Target Server Type : MySQL
Target Server Version : 50530
File Encoding : utf-8
*/
-- ----------------------------
-- Table structure for `business_clickthroughs`
-- ----------------------------
DROP TABLE IF EXISTS `business_clickthroughs`;
CREATE TABLE `business_clickthroughs` (
`id` bigint(12) unsigned NOT NULL AUTO_INCREMENT,
`business_id` int(8) unsigned NOT NULL,
`registered_user` tinyint(1) unsigned DEFAULT '0',
`created` datetime NOT NULL,
PRIMARY KEY (`id`),
KEY `bid` (`business_id`)
) ENGINE=InnoDB AUTO_INCREMENT=29357 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT;
-- ----------------------------
-- Table structure for `business_counties`
-- ----------------------------
DROP TABLE IF EXISTS `business_counties`;
CREATE TABLE `business_counties` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`business_id` int(11) NOT NULL,
`county_id` int(11) NOT NULL,
PRIMARY KEY (`id`),
KEY `bcid` (`business_id`)
) ENGINE=MyISAM AUTO_INCREMENT=20124 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci ROW_FORMAT=FIXED;
-- ----------------------------
-- Table structure for `business_regions`
-- ----------------------------
DROP TABLE IF EXISTS `business_regions`;
CREATE TABLE `business_regions` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`business_id` int(11) NOT NULL,
`region_id` int(11) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=2719 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci ROW_FORMAT=FIXED;
-- ----------------------------
-- Table structure for `business_reveals`
-- ----------------------------
DROP TABLE IF EXISTS `business_reveals`;
CREATE TABLE `business_reveals` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`business_id` int(11) NOT NULL,
`customer_id` int(11) DEFAULT NULL,
`created` datetime NOT NULL,
`modified` datetime NOT NULL,
PRIMARY KEY (`id`),
KEY `bid` (`business_id`)
) ENGINE=InnoDB AUTO_INCREMENT=3172 DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT;
-- ----------------------------
-- Table structure for `businesses_business_types`
-- ----------------------------
DROP TABLE IF EXISTS `businesses_business_types`;
CREATE TABLE `businesses_business_types` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`business_id` int(11) NOT NULL,
`business_type_id` int(11) NOT NULL,
`level` int(2) NOT NULL DEFAULT '2',
PRIMARY KEY (`id`),
KEY `bid` (`business_id`) COMMENT '(null)'
) ENGINE=MyISAM AUTO_INCREMENT=4484 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci ROW_FORMAT=FIXED;
-- ----------------------------
-- Table structure for `messages`
-- ----------------------------
DROP TABLE IF EXISTS `messages`;
CREATE TABLE `messages` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`subject` varchar(500) DEFAULT NULL,
`message` text,
`user_id` int(11) DEFAULT NULL,
`message_folder_id` int(11) DEFAULT NULL,
`parent_message_id` int(11) DEFAULT NULL,
`status` int(11) DEFAULT NULL,
`direction` int(11) DEFAULT NULL,
`from_to` varchar(500) DEFAULT NULL,
`attachment` varchar(500) DEFAULT NULL,
`created` datetime DEFAULT NULL,
`modified` datetime DEFAULT NULL,
`guest_sender` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `fromto` (`from_to`(255)),
KEY `uid` (`user_id`),
KEY `pmid` (`parent_message_id`)
) ENGINE=InnoDB AUTO_INCREMENT=4582 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT;
-- ----------------------------
-- Table structure for `users`
-- ----------------------------
DROP TABLE IF EXISTS `users`;
CREATE TABLE `users` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`login` varchar(255) COLLATE latin1_general_ci NOT NULL,
`password` varchar(255) COLLATE latin1_general_ci NOT NULL,
`name` varchar(255) COLLATE latin1_general_ci NOT NULL,
`email` varchar(255) COLLATE latin1_general_ci NOT NULL,
`title` varchar(20) COLLATE latin1_general_ci NOT NULL,
`firstname` varchar(255) COLLATE latin1_general_ci NOT NULL,
`lastname` varchar(255) COLLATE latin1_general_ci NOT NULL,
`active` tinyint(1) NOT NULL DEFAULT '0',
`first_visit` tinyint(1) NOT NULL DEFAULT '1',
`signature` text COLLATE latin1_general_ci,
`type` varchar(45) COLLATE latin1_general_ci DEFAULT 'customer',
`business_id` int(11) DEFAULT NULL,
`admin_monitor` tinyint(1) NOT NULL DEFAULT '0',
`partner_name` varchar(255) COLLATE latin1_general_ci DEFAULT NULL,
`postcode` varchar(255) COLLATE latin1_general_ci DEFAULT NULL,
`venue_postcode` varchar(255) COLLATE latin1_general_ci DEFAULT NULL,
`wedding_date` datetime DEFAULT NULL,
`phone` varchar(255) COLLATE latin1_general_ci NOT NULL,
`register_date` datetime DEFAULT NULL,
`event` text COLLATE latin1_general_ci,
`mailing_list` tinyint(1) NOT NULL DEFAULT '0',
`created` datetime NOT NULL,
`modified` datetime NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=2854 DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci ROW_FORMAT=DYNAMIC;
The Explain plan.
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY Business ALL - - - - 444 Using temporary; Using filesort
1 PRIMARY U ALL - - - - 2658 -
1 PRIMARY bc ref bcid bcid 4 Business.id 7 Using index
1 PRIMARY bt ref bid bid 4 Business.id 9 Using index
1 PRIMARY br ALL - - - - 440 -
7 DEPENDENT SUBQUERY BusinessReveal ref bid bid 4 func 5 Using where
6 DEPENDENT SUBQUERY br2 ALL - - - - 440 Using where
5 DEPENDENT SUBQUERY click ref bid bid 4 func 22 Using where
4 DEPENDENT SUBQUERY bc2 ref bcid bcid 4 func 7 Using index
3 DEPENDENT SUBQUERY MessageUnique ALL fromto,uid,pmid - - - 4958 Using where
2 DEPENDENT SUBQUERY Message ALL fromto,uid - - - 4958 Using where
回答1:
Your query has 6 correlated sub queries, and in total is returning 444 rows. Each of those correlated sub queries is effectively being executed for each returned row. Hence your single query is resulting in just under 3000 queries.
Personally I prefer to avoid then, using a large join or joining against sub queries. However it depends on the number of rows returned
Further you are also joining directly to the tables you are doing the left joins on anyway, which will generate a lot of duplicates which the GROUP BY then excludes. As you take nothing directly from most of those tables and the GROUP BY is on what appears to be a unique key it seems irrelevant.
If you keep the correlated sub queries:-
SELECT Count(Message.id) FROM messages as Message
WHERE (U.id = Message.from_to OR U.id = Message.user_id)
AND Message.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
There is no useful index on this table for this sub query. As you are checking 2 different columns for the U.id there is not much that can be done there, but an index on created would help. It might be better to duplicate this sub query, once checking from_to and once checking user_id, and adding the results together. As you could then have an index on the relevant id field and the date.
Also, you are doing a count on the value which appears to be the unique key so should never be null.
SELECT Count(DISTINCT(MessageUnique.user_id)) FROM messages as MessageUnique
WHERE (U.id = MessageUnique.from_to OR U.id = MessageUnique.user_id)
AND (MessageUnique.parent_message_id is null OR MessageUnique.parent_message_id = MessageUnique.id)
AND MessageUnique.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
Same problem as the previous sub query.
SELECT Count(*) FROM business_counties as bc2
WHERE Business.id = bc2.business_id
This has a key on business_id and should be OK
SELECT Count(click.id) FROM business_clickthroughs as click
WHERE Business.id = click.business_id
AND click.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
While indexed on business id there is no index that covers both business id and the created date, which would probably help here.
SELECT Count(*) FROM business_regions as br2
WHERE Business.id = br2.business_id
This requires an index on business_id on the business regions table
SELECT count(BusinessReveal.id) as reveal_no FROM business_reveals as BusinessReveal
WHERE 1=1
AND BusinessReveal.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
AND BusinessReveal.business_id = Business.id
Here the key does't cover the created date, just the business id.
If you want to try doing joins against sub queries (which can be more efficient, despite MySQL being poor at joining onto sub queries) then something like this (not tested):-
SELECT Business.*,
mess_1.mess_count + mess_2.mess_count as message_no,
mess_3.mess_count + mess_4.mess_count as message_unique_no,
business1.county_no,
click1.clicks,
business_regions.region_no,
business_reveals1.reveals_no
FROM businesses as Business
LEFT JOIN users as U ON Business.id = U.business_id
LEFT OUTER JOIN
(
SELECT Message.from_to, Count(Message.id) AS mess_count
FROM messages as Message
WHERE Message.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
GROUP BY Message.from_to
) AS mess_1
ON U.id = mess_1.from_to
LEFT OUTER JOIN
(
SELECT Message.user_id, Count(Message.id) AS mess_count
FROM messages as Message
WHERE Message.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
GROUP BY Message.user_id
) AS mess_2
ON U.id = mess_2.user_id
LEFT OUTER JOIN
(
SELECT MessageUnique.from_to, Count(DISTINCT(MessageUnique.user_id)) AS mess_count
FROM messages as MessageUnique
WHERE (MessageUnique.parent_message_id is null OR MessageUnique.parent_message_id = MessageUnique.id)
AND MessageUnique.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
GROUP BY MessageUnique.from_to
) AS mess_3
ON U.id = mess_3.from_to
LEFT OUTER JOIN
(
SELECT MessageUnique.user_id, Count(DISTINCT(MessageUnique.user_id)) AS mess_count
FROM messages as MessageUnique
WHERE (MessageUnique.parent_message_id is null OR MessageUnique.parent_message_id = MessageUnique.id)
AND MessageUnique.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
GROUP BY MessageUnique.user_id
) AS mess_4
ON U.id = mess_4.from_to
LEFT OUTER JOIN
(
SELECT business_id, Count(*) AS county_no
FROM business_counties as bc2
GROUP BY Business.id
) as business1
ON Business.id = business1.business_id
LEFT OUTER JOIN
(
SELECT click.business_id, Count(click.id) AS clicks
FROM business_clickthroughs as click
WHERE click.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
GROUP BY click.business_id
) as click1
ON Business.id = click1.business_id
LEFT OUTER JOIN
(
SELECT br2.business_id, Count(*) AS region_no
FROM business_regions as br2
WHERE Business.id = br2.business_id
GROUP BY br2.business_id
) as business_regions
ON Business.id = business_regions.business_id
LEFT OUTER JOIN
(
SELECT BusinessReveal.business_id, count(BusinessReveal.id) as reveal_no
FROM business_reveals as BusinessReveal
WHERE BusinessReveal.created BETWEEN '2014-04-01 00:00:00' and '2014-04-30 23:59:59'
GROUP BY BusinessReveal.business_id
) as business_reveals1
ON business_reveals1.business_id = Business.id
来源:https://stackoverflow.com/questions/24312832/how-to-optimise-a-query-containing-joins-and-subqueries