How to repair a corrupted MPTT tree (nested set) in the database using SQL?

前端 未结 4 598
我在风中等你
我在风中等你 2020-12-23 22:12

I have an MPTT tree of over 100,000 records stored in MySQL using lft, rght and parent_id columns. Now the left/right values became co

相关标签:
4条回答
  • 2020-12-23 22:44

    Using SQL Server, following script seems to be working for me.

    Output testscript

    category_id name                 parent      lft         rgt         lftcalc     rgtcalc
    ----------- -------------------- ----------- ----------- ----------- ----------- -----------
    1           ELECTRONICS          NULL        1           20          1           20
    2           TELEVISIONS          1           2           9           2           9
    3           TUBE                 2           3           4           3           4
    4           LCD                  2           5           6           5           6
    5           PLASMA               2           7           8           7           8
    6           PORTABLE ELECTRONICS 1           10          19          10          19
    7           MP3 PLAYERS          6           11          14          11          14
    8           FLASH                7           12          13          12          13
    9           CD PLAYERS           6           15          16          15          16
    10          2 WAY RADIOS         6           17          18          17          18
    

    Script

    SET NOCOUNT ON
    GO
    
    DECLARE @nested_category TABLE (
     category_id INT PRIMARY KEY,
     name VARCHAR(20) NOT NULL,
     parent INT,
     lft INT,
     rgt INT
    );
    
    DECLARE @current_Category_ID INTEGER
    DECLARE @current_parent INTEGER
    DECLARE @SafeGuard INTEGER
    DECLARE @myLeft INTEGER
    SET @SafeGuard = 100
    
    INSERT INTO @nested_category 
    SELECT           1,'ELECTRONICS',NULL,NULL,NULL
    UNION ALL SELECT 2,'TELEVISIONS',1,NULL,NULL
    UNION ALL SELECT 3,'TUBE',2,NULL,NULL
    UNION ALL SELECT 4,'LCD',2,NULL,NULL
    UNION ALL SELECT 5,'PLASMA',2,NULL,NULL
    UNION ALL SELECT 6,'PORTABLE ELECTRONICS',1,NULL,NULL
    UNION ALL SELECT 7,'MP3 PLAYERS',6,NULL,NULL
    UNION ALL SELECT 8,'FLASH',7,NULL,NULL
    UNION ALL SELECT 9,'CD PLAYERS',6,NULL,NULL
    UNION ALL SELECT 10,'2 WAY RADIOS',6,NULL,NULL
    
    /* Initialize */
    UPDATE  @nested_category 
    SET     lft = 1
            , rgt = 2
    WHERE   parent IS NULL
    
    UPDATE  @nested_category 
    SET     lft = NULL
            , rgt = NULL
    WHERE   parent IS NOT NULL
    
    WHILE EXISTS (SELECT * FROM @nested_category WHERE lft IS NULL) AND @SafeGuard > 0
    BEGIN
      SELECT  @current_Category_ID = MAX(nc.category_id)
      FROM    @nested_category nc
              INNER JOIN @nested_category nc2 ON nc2.category_id = nc.parent
      WHERE   nc.lft IS NULL
              AND nc2.lft IS NOT NULL
    
      SELECT  @current_parent = parent
      FROM    @nested_category
      WHERE   category_id = @current_category_id
    
      SELECT  @myLeft = lft
      FROM    @nested_category
      WHERE   category_id = @current_parent
    
      UPDATE @nested_category SET rgt = rgt + 2 WHERE rgt > @myLeft;
      UPDATE @nested_category SET lft = lft + 2 WHERE lft > @myLeft;
      UPDATE @nested_category SET lft = @myLeft + 1, rgt = @myLeft + 2 WHERE category_id = @current_category_id
    
      SET @SafeGuard = @SafeGuard - 1
    END
    
    SELECT * FROM @nested_category ORDER BY category_id
    
    SELECT  COUNT(node.name), node.name, MIN(node.lft)
    FROM    @nested_category AS node,
            @nested_category AS parent
    WHERE   node.lft BETWEEN parent.lft AND parent.rgt
    GROUP BY 
            node.name
    ORDER BY
            3, 1
    

    Testscript ##

    SET NOCOUNT ON
    GO
    
    DECLARE @nested_category TABLE (
     category_id INT PRIMARY KEY,
     name VARCHAR(20) NOT NULL,
     parent INT,
     lft INT,
     rgt INT, 
     lftcalc INT,
     rgtcalc INT
    );
    
    INSERT INTO @nested_category 
    SELECT           1,'ELECTRONICS',NULL,1,20,NULL,NULL
    UNION ALL SELECT 2,'TELEVISIONS',1,2,9,NULL,NULL
    UNION ALL SELECT 3,'TUBE',2,3,4,NULL,NULL
    UNION ALL SELECT 4,'LCD',2,5,6,NULL,NULL
    UNION ALL SELECT 5,'PLASMA',2,7,8,NULL,NULL
    UNION ALL SELECT 6,'PORTABLE ELECTRONICS',1,10,19,NULL,NULL
    UNION ALL SELECT 7,'MP3 PLAYERS',6,11,14,NULL,NULL
    UNION ALL SELECT 8,'FLASH',7,12,13,NULL,NULL
    UNION ALL SELECT 9,'CD PLAYERS',6,15,16,NULL,NULL
    UNION ALL SELECT 10,'2 WAY RADIOS',6,17,18,NULL,NULL
    
    /* Initialize */
    UPDATE  @nested_category 
    SET     lftcalc = 1
            , rgtcalc = 2
    WHERE   parent IS NULL
    
    DECLARE @current_Category_ID INTEGER
    DECLARE @current_parent INTEGER
    DECLARE @SafeGuard INTEGER
    DECLARE @myRight INTEGER
    DECLARE @myLeft INTEGER
    SET @SafeGuard = 100
    WHILE EXISTS (SELECT * FROM @nested_category WHERE lftcalc IS NULL) AND @SafeGuard > 0
    BEGIN
      SELECT  @current_Category_ID = MAX(nc.category_id)
      FROM    @nested_category nc
              INNER JOIN @nested_category nc2 ON nc2.category_id = nc.parent
      WHERE   nc.lftcalc IS NULL
              AND nc2.lftcalc IS NOT NULL
    
      SELECT  @current_parent = parent
      FROM    @nested_category
      WHERE   category_id = @current_category_id
    
      SELECT  @myLeft = lftcalc
      FROM    @nested_category
      WHERE   category_id = @current_parent
    
      UPDATE @nested_category SET rgtcalc = rgtcalc + 2 WHERE rgtcalc > @myLeft;
      UPDATE @nested_category SET lftcalc = lftcalc + 2 WHERE lftcalc > @myLeft;
      UPDATE @nested_category SET lftcalc = @myLeft + 1, rgtcalc = @myLeft + 2 WHERE category_id = @current_category_id
    
      SELECT * FROM @nested_category WHERE category_id = @current_parent
      SELECT * FROM @nested_category ORDER BY category_id
      SET @SafeGuard = @SafeGuard - 1
    END
    
    SELECT * FROM @nested_category ORDER BY category_id
    
    SELECT  COUNT(node.name), node.name, MIN(node.lft)
    FROM    @nested_category AS node,
            @nested_category AS parent
    WHERE   node.lft BETWEEN parent.lft AND parent.rgt
    GROUP BY 
            node.name
    ORDER BY
            3, 1
    
    0 讨论(0)
  • 2020-12-23 22:52

    Here's what I have adapted from @Lieven's answer, incorporating feedback from here for better performance:

    DROP PROCEDURE IF EXISTS tree_recover;
    
    DELIMITER //
    
    CREATE PROCEDURE tree_recover ()
    MODIFIES SQL DATA
    BEGIN
    
        DECLARE currentId, currentParentId  CHAR(36);
        DECLARE currentLeft                 INT;
        DECLARE startId                     INT DEFAULT 1;
    
        # Determines the max size for MEMORY tables.
        SET max_heap_table_size = 1024 * 1024 * 512;
    
        START TRANSACTION;
    
        # Temporary MEMORY table to do all the heavy lifting in,
        # otherwise performance is simply abysmal.
        CREATE TABLE `tmp_tree` (
            `id`        char(36) NOT NULL DEFAULT '',
            `parent_id` char(36)          DEFAULT NULL,
            `lft`       int(11)  unsigned DEFAULT NULL,
            `rght`      int(11)  unsigned DEFAULT NULL,
            PRIMARY KEY      (`id`),
            INDEX USING HASH (`parent_id`),
            INDEX USING HASH (`lft`),
            INDEX USING HASH (`rght`)
        ) ENGINE = MEMORY
        SELECT `id`,
               `parent_id`,
               `lft`,
               `rght`
        FROM   `tree`;
    
        # Leveling the playing field.
        UPDATE  `tmp_tree`
        SET     `lft`  = NULL,
                `rght` = NULL;
    
        # Establishing starting numbers for all root elements.
        WHILE EXISTS (SELECT * FROM `tmp_tree` WHERE `parent_id` IS NULL AND `lft` IS NULL AND `rght` IS NULL LIMIT 1) DO
    
            UPDATE `tmp_tree`
            SET    `lft`  = startId,
                   `rght` = startId + 1
            WHERE  `parent_id` IS NULL
              AND  `lft`       IS NULL
              AND  `rght`      IS NULL
            LIMIT  1;
    
            SET startId = startId + 2;
    
        END WHILE;
    
        # Switching the indexes for the lft/rght columns to B-Trees to speed up the next section, which uses range queries.
        DROP INDEX `lft`  ON `tmp_tree`;
        DROP INDEX `rght` ON `tmp_tree`;
        CREATE INDEX `lft`  USING BTREE ON `tmp_tree` (`lft`);
        CREATE INDEX `rght` USING BTREE ON `tmp_tree` (`rght`);
    
        # Numbering all child elements
        WHILE EXISTS (SELECT * FROM `tmp_tree` WHERE `lft` IS NULL LIMIT 1) DO
    
            # Picking an unprocessed element which has a processed parent.
            SELECT     `tmp_tree`.`id`
              INTO     currentId
            FROM       `tmp_tree`
            INNER JOIN `tmp_tree` AS `parents`
                    ON `tmp_tree`.`parent_id` = `parents`.`id`
            WHERE      `tmp_tree`.`lft` IS NULL
              AND      `parents`.`lft`  IS NOT NULL
            LIMIT      1;
    
            # Finding the element's parent.
            SELECT  `parent_id`
              INTO  currentParentId
            FROM    `tmp_tree`
            WHERE   `id` = currentId;
    
            # Finding the parent's lft value.
            SELECT  `lft`
              INTO  currentLeft
            FROM    `tmp_tree`
            WHERE   `id` = currentParentId;
    
            # Shifting all elements to the right of the current element 2 to the right.
            UPDATE `tmp_tree`
            SET    `rght` = `rght` + 2
            WHERE  `rght` > currentLeft;
    
            UPDATE `tmp_tree`
            SET    `lft` = `lft` + 2
            WHERE  `lft` > currentLeft;
    
            # Setting lft and rght values for current element.
            UPDATE `tmp_tree`
            SET    `lft`  = currentLeft + 1,
                   `rght` = currentLeft + 2
            WHERE  `id`   = currentId;
    
        END WHILE;
    
        # Writing calculated values back to physical table.
        UPDATE `tree`, `tmp_tree`
        SET    `tree`.`lft`  = `tmp_tree`.`lft`,
               `tree`.`rght` = `tmp_tree`.`rght`
        WHERE  `tree`.`id`   = `tmp_tree`.`id`;
    
        COMMIT;
    
        DROP TABLE `tmp_tree`;
    
    END//
    
    DELIMITER ;
    

    Worked well with some test data, but it's still running on my 100,000 records tree, so I can't give any final judgement yet. The naïve script working directly on the physical table has abysmal performance, running for at least hours, more likely days. Switching to a temporary MEMORY table brought this time down to about an hour, choosing the right indexes cut it down to 10 mins.

    0 讨论(0)
  • 2020-12-23 22:56

    You are rescue me!!! I use mixed tree model, so when the day is coming, my tree (30000+) was corrupted. I learn from both of your tech, but is not recovery, just fully rebuild with lost all of sorting and reverse tree... I think, need to keep in mind older cat_left.... Just for possible integrity. So, it maybe looks like...

    DROP PROCEDURE IF EXISTS tree_recover;
    
    DELIMITER |
    
    CREATE PROCEDURE tree_recover ()
    MODIFIES SQL DATA
    BEGIN
    
        DECLARE currentId, currentParentId  CHAR(36);
        DECLARE currentLeft                 INT;
        DECLARE startId                     INT DEFAULT 1;
    
        # Determines the max size for MEMORY tables.
        SET max_heap_table_size = 1024 * 1024 * 512;
    
        START TRANSACTION;
    
        # Temporary MEMORY table to do all the heavy lifting in,
        # otherwise performance is simply abysmal.
        DROP TABLE IF EXISTS `tmp_cat`;
        CREATE TABLE `tmp_cat` (
            `cat_id`        char(36) NOT NULL DEFAULT '',
            `cat_parent` char(36)          DEFAULT NULL,
            `cat_left`       int(11)  unsigned DEFAULT NULL,
            `cat_right`      int(11)  unsigned DEFAULT NULL,
            `cat_left_old`   int(11)  unsigned DEFAULT NULL,
            PRIMARY KEY      (`cat_id`),
            INDEX USING HASH (`cat_parent`),
            INDEX USING HASH (`cat_left`),
            INDEX USING HASH (`cat_right`),
        INDEX USING HASH (`cat_left_old`)
        ) ENGINE = MEMORY
        SELECT `cat_id`,
               `cat_parent`,
               `cat_left`,
               `cat_right`,
           `cat_left` as cat_left_old
        FROM   `catalog`;
    
        # Leveling the playing field.
        UPDATE  `tmp_cat`
        SET     `cat_left`  = NULL,
                `cat_right` = NULL;
    
        # Establishing starting numbers for all root elements.
        WHILE EXISTS (SELECT * FROM `tmp_cat` WHERE `cat_parent` IS NULL AND `cat_left` IS NULL AND `cat_right` IS NULL ORDER BY cat_left_old LIMIT 1) DO
    
            UPDATE `tmp_cat`
            SET    `cat_left`  = startId,
                   `cat_right` = startId + 1
            WHERE  `cat_parent` IS NULL
              AND  `cat_left`       IS NULL
              AND  `cat_right`      IS NULL
            LIMIT  1;
    
            SET startId = startId + 2;
    
        END WHILE;
    
        # Switching the indexes for the cat_left/rght columns to B-Trees to speed up the next section, which uses range queries.
        DROP INDEX `cat_left`  ON `tmp_cat`;
        DROP INDEX `cat_right` ON `tmp_cat`;
        DROP INDEX `cat_left_old` ON `tmp_cat`;
    
        CREATE INDEX `cat_left`  USING BTREE ON `tmp_cat` (`cat_left`);
        CREATE INDEX `cat_right` USING BTREE ON `tmp_cat` (`cat_right`);
        CREATE INDEX `cat_left_old` USING BTREE ON `tmp_cat` (`cat_left_old`);
    
        # Numbering all child elements
        WHILE EXISTS (SELECT * FROM `tmp_cat` WHERE `cat_left` IS NULL ORDER BY cat_left_old LIMIT 1) DO
    
            # Picking an unprocessed element which has a processed parent.
            SELECT     `tmp_cat`.`cat_id`
              INTO     currentId
            FROM       `tmp_cat`
            INNER JOIN `tmp_cat` AS `parents`
                    ON `tmp_cat`.`cat_parent` = `parents`.`cat_id`
            WHERE      `tmp_cat`.`cat_left` IS NULL
              AND      `parents`.`cat_left`  IS NOT NULL
        ORDER BY `tmp_cat`.cat_left_old DESC
            LIMIT      1;
    
            # Finding the element's parent.
            SELECT  `cat_parent`
              INTO  currentParentId
            FROM    `tmp_cat`
            WHERE   `cat_id` = currentId;
    
            # Finding the parent's cat_left value.
            SELECT  `cat_left`
              INTO  currentLeft
            FROM    `tmp_cat`
            WHERE   `cat_id` = currentParentId;
    
            # Shifting all elements to the right of the current element 2 to the right.
            UPDATE `tmp_cat`
            SET    `cat_right` = `cat_right` + 2
            WHERE  `cat_right` > currentLeft;
    
            UPDATE `tmp_cat`
            SET    `cat_left` = `cat_left` + 2
            WHERE  `cat_left` > currentLeft;
    
            # Setting cat_left and rght values for current element.
            UPDATE `tmp_cat`
            SET    `cat_left`  = currentLeft + 1,
                   `cat_right` = currentLeft + 2
            WHERE  `cat_id`   = currentId;
    
        END WHILE;
    
        # Writing calculated values back to physical table.
        UPDATE `catalog`, `tmp_cat`
        SET    `catalog`.`cat_left`  = `tmp_cat`.`cat_left`,
               `catalog`.`cat_right` = `tmp_cat`.`cat_right`
        WHERE  `catalog`.`cat_id`   = `tmp_cat`.`cat_id`;
    
        COMMIT;
    
        DROP TABLE IF EXISTS `tmp_cat`;
    
    END|
    
    0 讨论(0)
  • 2020-12-23 22:58

    In all solutions provided, I was getting a problem where MySQL would prompt that it would be Running query for hours but nothing would happen.

    I then realized that if I set the lft and rght values to 1 and 2 in the first record of the tmp_tree table (the one with parent_id = 0), everything worked fine. Maybe the procedure needs updating to do this automatically.

    0 讨论(0)
提交回复
热议问题