Rails: Faster way to perform updates on many records

安稳与你 提交于 2019-12-03 07:38:52

Try wrapping your entire code into a single database transaction. Since you're on Heroku it'll be a Postgres bottom-end. With that many update statements, you can probably benefit greatly by transacting them all at once, so your code executes quicker and basically just leaves a "queue" of 6500 statements to run on Postgres side as the server is able to dequeue them. Depending on the bottom end, you might have to transact into smaller chunks - but even transacting 100 at a time (and then close and re-open the transaction) would greatly improve throughput into Pg.

http://api.rubyonrails.org/classes/ActiveRecord/Transactions/ClassMethods.html http://www.postgresql.org/docs/9.2/static/sql-set-transaction.html

So before line 2 you'd add something like:

def add_details(shop, shopify_orders)
  Order.transaction do
    shopify_orders.each do |shopify_order|

And then at the very end of your method add another end:

      if !payment_details.blank?
        PaymentDetail.add_details(order, payment_details)
      end
    end //shopify_orders.each..
  end //Order.transaction..
end //method

You can monkey-patch ActiveRecord like this:

class ActiveRecord::Base

  #http://stackoverflow.com/questions/15317837/bulk-insert-records-into-active-record-table?lq=1
  #https://gist.github.com/jackrg/76ade1724bd816292e4e
  #  "UPDATE THIS SET <list_of_column_assignments>  FROM <table_name> THIS  JOIN (VALUES (<csv1>, <csv2>,...) VALS ( <column_names> ) ON <list_of_primary_keys_comparison>"
  def self.bulk_update(record_list)
      pk = self.primary_key
      raise "primary_key not found" unless pk.present?

      raise "record_list not an Array of Hashes" unless record_list.is_a?(Array) && record_list.all? {|rec| rec.is_a? Hash }
      return nil if record_list.empty?

      result = nil

      #test if every hash has primary keys, so we can JOIN
      record_list.each { |r|  raise "Primary Keys '#{self.primary_key.to_s}' not found on record: #{r}" unless hasAllPKs?(r) }


      #list of primary keys comparison
      pk_comparison_array = []
      if (pk).is_a?(Array)
          pk.each {|thiskey| pk_comparison_array << "THIS.#{thiskey} = VALS.#{thiskey}" }
      else
          pk_comparison_array << "THIS.#{pk} = VALS.#{pk}"
      end
      pk_comparison = pk_comparison_array.join(' AND ')

      #SQL
      (1..record_list.count).step(1000).each do |start|
        key_list, value_list = convert_record_list(record_list[start-1..start+999])
        #csv values
        csv_vals = value_list.map {|v| "(#{v.join(", ")})" }.join(", ")
        #column names
        column_names = key_list.join(", ")
        #list of columns assignments
        columns_assign_array = []
        key_list.each {|col|
          unless inPK?(col)
            columns_assign_array << "THIS.#{col} = VALS.#{col}"
          end }
        columns_assign = columns_assign_array.join(', ')

        sql = "UPDATE THIS SET #{columns_assign}  FROM #{self.table_name} THIS  JOIN ( VALUES #{csv_vals} ) VALS ( #{column_names} ) ON ( #{pk_comparison} )"
        result = self.connection.execute(sql)

        return result if result<0
      end

      return result

  end

  def self.inPK?(str)
      pk = self.primary_key

      test = str.to_s
      if pk.is_a?(Array)
            (pk.include?(test))
      else
            (pk==test)
      end
  end

  #test if given hash has primary keys included as hash keys and those keys are not empty
  def self.hasAllPKs?(hash)
      h = hash.stringify_keys
      pk = self.primary_key

      if pk.is_a?(Array)
           (pk.all? {|k| h.key?(k) and h[k].present? })
      else
           h.key?(pk) and h[pk].present?
      end
  end

  def self.convert_record_list(record_list)
    # Build the list of keys
    key_list = record_list.map(&:keys).flatten.map(&:to_s).uniq.sort

    value_list = record_list.map do |rec|
      list = []
      key_list.each {|key| list <<  ActiveRecord::Base.connection.quote(rec[key] || rec[key.to_sym]) }
      list
    end

    # If table has standard timestamps and they're not in the record list then add them to the record list
    time = ActiveRecord::Base.connection.quote(Time.now)
    for field_name in %w(created_at updated_at)
      if self.column_names.include?(field_name) && !(key_list.include?(field_name))
        key_list << field_name
        value_list.each {|rec| rec << time }
      end
    end

    return [key_list, value_list]
  end
end

Then, you can generate a array of hashes containing your models attributes (including theirs primary keys) and do something like:

ActiveRecord::Base.transaction do
   Model.bulk_update [ {attr1: val1, attr2: val2,...},  {attr1: val1, attr2: val2,...},   ... ]
end

It will be a single SQL command without Rails callbacks and validations.

For PostgreSQL, there are several issues that the above approach does not address:

  1. You must specify an actual table, not just an alias, in the update target table.
  2. You cannot repeat the target table in the FROM phrase. Since you are joining the target table to a VALUES table (hence there is only one table in the FROM phrase, you won't be able to use JOIN, you must instead use "WHERE ".
  3. You don't get the same "free" casts in a VALUES table that you do in a simple "UPDATE" command, so you must cast date/timestamp values as such (#val_cast does this).

    class ActiveRecord::Base
    
      def self.update!(record_list)
        raise ArgumentError "record_list not an Array of Hashes" unless record_list.is_a?(Array) && record_list.all? {|rec| rec.is_a? Hash }
        return record_list if record_list.empty?
    
        (1..record_list.count).step(1000).each do |start|
          field_list, value_list = convert_record_list(record_list[start-1..start+999])
          key_field = self.primary_key
          non_key_fields = field_list - [%Q["#{self.primary_key}"], %Q["created_at"]]
          columns_assign = non_key_fields.map {|field| "#{field} = #{val_cast(field)}"}.join(",")
          value_table = value_list.map {|row| "(#{row.join(", ")})" }.join(", ")
          sql = "UPDATE #{table_name} AS this SET #{columns_assign} FROM (VALUES #{value_table}) vals (#{field_list.join(", ")}) WHERE this.#{key_field} = vals.#{key_field}"
          self.connection.update_sql(sql)
        end
    
        return record_list
      end
    
      def self.val_cast(field)
        field = field.gsub('"', '')
        if (column = columns.find{|c| c.name == field }).sql_type =~ /time|date/
          "cast (vals.#{field} as #{column.sql_type})"
        else
          "vals.#{field}"
        end
      end
    
      def self.convert_record_list(record_list)
        # Build the list of fields
        field_list = record_list.map(&:keys).flatten.map(&:to_s).uniq.sort
    
        value_list = record_list.map do |rec|
          list = []
          field_list.each {|field| list <<  ActiveRecord::Base.connection.quote(rec[field] || rec[field.to_sym]) }
          list
        end
    
        # If table has standard timestamps and they're not in the record list then add them to the record list
        time = ActiveRecord::Base.connection.quote(Time.now)
        for field_name in %w(created_at updated_at)
          if self.column_names.include?(field_name) && !(field_list.include?(field_name))
            field_list << field_name
            value_list.each {|rec| rec << time }
          end
        end
    
        field_list.map! {|field| %Q["#{field}"] }
    
        return [field_list, value_list]
      end
    end
    
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!