How to reset the sequence for IDs on PostgreSQL tables

后端 未结 9 930
北荒
北荒 2020-12-05 02:37

I recently imported a lot of data from an old database into a new Postgresql database as the basis for models in a new Django site.

I used the IDs from the old datab

9条回答
  •  自闭症患者
    2020-12-05 03:07

    Here is a more-or-less completely dynamic solution I just implemented in a management command that has no restriction as to the name of the Primary Key you are attempting to reset as it gathers it based on the connection params you have in settings.

    The only sequencing I could not reset included PKs that are not integers, which is apparent in the PK for django.contrib.sessions, but again I have never run into sequencing errors with that so I doubt it is an issue.

    Here is the command, run using python manage.py reset_sequences (obviously as long as your file/command is named reset_sequences.py)

    import psycopg2
    from django.conf import settings
    from django.core.management.base import BaseCommand
    from django.db import connections
    
    
    def dictfetchall(cursor):
        """Return all rows from a cursor as a dict"""
        columns = [col[0] for col in cursor.description]
        return [
            dict(zip(columns, row))
            for row in cursor.fetchall()
        ]
    
    
    class Command(BaseCommand):
        help = "Resets sequencing errors in Postgres which normally occur due to importing/restoring a DB"
    
        def handle(self, *args, **options):
            # loop over all databases in system to figure out the tables that need to be reset
            for name_to_use_for_connection, connection_settings in settings.DATABASES.items():
                db_name = connection_settings['NAME']
                host = connection_settings['HOST']
                user = connection_settings['USER']
                port = connection_settings['PORT']
                password = connection_settings['PASSWORD']
    
                # connect to this specific DB
                conn_str = f"host={host} port={port} user={user} password={password}"
    
                conn = psycopg2.connect(conn_str)
                conn.autocommit = True
    
                select_all_table_statement = f"""SELECT *
                                        FROM information_schema.tables
                                        WHERE table_schema = 'public'
                                        ORDER BY table_name;
                                    """
                # just a visual representation of where we are
                print('-' * 20, db_name)
                try:
                    not_reset_tables = list()
                    # use the specific name for the DB
                    with connections[name_to_use_for_connection].cursor() as cursor:
                        # using the current db as the cursor connection
                        cursor.execute(select_all_table_statement)
                        rows = dictfetchall(cursor)
                        # will loop over table names in the connected DB
                        for row in rows:
                            find_pk_statement = f"""
                                SELECT k.COLUMN_NAME
                                FROM information_schema.table_constraints t
                                LEFT JOIN information_schema.key_column_usage k
                                USING(constraint_name,table_schema,table_name)
                                WHERE t.constraint_type='PRIMARY KEY'
                                    AND t.table_name='{row['table_name']}';
                            """
                            cursor.execute(find_pk_statement)
                            pk_column_names = dictfetchall(cursor)
                            for pk_dict in pk_column_names:
                                column_name = pk_dict['column_name']
    
                            # time to build the reset sequence command for each table
                            # taken from django: https://docs.djangoproject.com/en/3.0/ref/django-admin/#sqlsequencereset
                            # example: SELECT setval(pg_get_serial_sequence('"[TABLE]"','id'), coalesce(max("id"), 1), max("id") IS NOT null) FROM "[TABLE]";
                            try:
                                reset_statement = f"""SELECT setval(pg_get_serial_sequence('"{row['table_name']}"','{column_name}'), 
                                                        coalesce(max("{column_name}"), 1), max("{column_name}") IS NOT null) FROM "{row['table_name']}" """
                                cursor.execute(reset_statement)
                                return_values = dictfetchall(cursor)
                                # will be 1 row
                                for value in return_values:
                                    print(f"Sequence reset to {value['setval']} for {row['table_name']}")
                            except Exception as ex:
                                # will only fail if PK is not an integer...
                                # currently in my system this is from django.contrib.sessions
                                not_reset_tables.append(f"{row['table_name']} not reset")
    
                except psycopg2.Error as ex:
                    raise SystemExit(f'Error: {ex}')
    
                conn.close()
                print('-' * 5, ' ALL ERRORS ', '-' * 5)
                for item_statement in not_reset_tables:
                    # shows which tables produced errors, so far I have only
                    # seen this with PK's that are not integers because of the MAX() method
                    print(item_statement)
    
                # just a visual representation of where we are
                print('-' * 20, db_name)
    
    

提交回复
热议问题