Simple way to calculate median with MySQL

后端 未结 30 1560
北荒
北荒 2020-11-22 04:20

What\'s the simplest (and hopefully not too slow) way to calculate the median with MySQL? I\'ve used AVG(x) for finding the mean, but I\'m having a hard time fi

30条回答
  •  野趣味
    野趣味 (楼主)
    2020-11-22 04:36

    A comment on this page in the MySQL documentation has the following suggestion:

    -- (mostly) High Performance scaling MEDIAN function per group
    -- Median defined in http://en.wikipedia.org/wiki/Median
    --
    -- by Peter Hlavac
    -- 06.11.2008
    --
    -- Example Table:
    
    DROP table if exists table_median;
    CREATE TABLE table_median (id INTEGER(11),val INTEGER(11));
    COMMIT;
    
    
    INSERT INTO table_median (id, val) VALUES
    (1, 7), (1, 4), (1, 5), (1, 1), (1, 8), (1, 3), (1, 6),
    (2, 4),
    (3, 5), (3, 2),
    (4, 5), (4, 12), (4, 1), (4, 7);
    
    
    
    -- Calculating the MEDIAN
    SELECT @a := 0;
    SELECT
    id,
    AVG(val) AS MEDIAN
    FROM (
    SELECT
    id,
    val
    FROM (
    SELECT
    -- Create an index n for every id
    @a := (@a + 1) mod o.c AS shifted_n,
    IF(@a mod o.c=0, o.c, @a) AS n,
    o.id,
    o.val,
    -- the number of elements for every id
    o.c
    FROM (
    SELECT
    t_o.id,
    val,
    c
    FROM
    table_median t_o INNER JOIN
    (SELECT
    id,
    COUNT(1) AS c
    FROM
    table_median
    GROUP BY
    id
    ) t2
    ON (t2.id = t_o.id)
    ORDER BY
    t_o.id,val
    ) o
    ) a
    WHERE
    IF(
    -- if there is an even number of elements
    -- take the lower and the upper median
    -- and use AVG(lower,upper)
    c MOD 2 = 0,
    n = c DIV 2 OR n = (c DIV 2)+1,
    
    -- if its an odd number of elements
    -- take the first if its only one element
    -- or take the one in the middle
    IF(
    c = 1,
    n = 1,
    n = c DIV 2 + 1
    )
    )
    ) a
    GROUP BY
    id;
    
    -- Explanation:
    -- The Statement creates a helper table like
    --
    -- n id val count
    -- ----------------
    -- 1, 1, 1, 7
    -- 2, 1, 3, 7
    -- 3, 1, 4, 7
    -- 4, 1, 5, 7
    -- 5, 1, 6, 7
    -- 6, 1, 7, 7
    -- 7, 1, 8, 7
    --
    -- 1, 2, 4, 1
    
    -- 1, 3, 2, 2
    -- 2, 3, 5, 2
    --
    -- 1, 4, 1, 4
    -- 2, 4, 5, 4
    -- 3, 4, 7, 4
    -- 4, 4, 12, 4
    
    
    -- from there we can select the n-th element on the position: count div 2 + 1 
    

提交回复
热议问题