I am trying to compare two Ruby Hashes using the following code:
#!/usr/bin/env ruby
require \"yaml\"
require \"active_support\"
file1 = YAML::load(File.op
Here is algorithm to deeply compare two Hashes, which also will compare nested Arrays:
HashDiff.new(
{val: 1, nested: [{a:1}, {b: [1, 2]}] },
{val: 2, nested: [{a:1}, {b: [1]}] }
).report
# Output:
val:
- 1
+ 2
nested > 1 > b > 1:
- 2
Implementation:
class HashDiff
attr_reader :left, :right
def initialize(left, right, config = {}, path = nil)
@left = left
@right = right
@config = config
@path = path
@conformity = 0
end
def conformity
find_differences
@conformity
end
def report
@config[:report] = true
find_differences
end
def find_differences
if hash?(left) && hash?(right)
compare_hashes_keys
elsif left.is_a?(Array) && right.is_a?(Array)
compare_arrays
else
report_diff
end
end
def compare_hashes_keys
combined_keys.each do |key|
l = value_with_default(left, key)
r = value_with_default(right, key)
if l == r
@conformity += 100
else
compare_sub_items l, r, key
end
end
end
private
def compare_sub_items(l, r, key)
diff = self.class.new(l, r, @config, path(key))
@conformity += diff.conformity
end
def report_diff
return unless @config[:report]
puts "#{@path}:"
puts "- #{left}" unless left == NO_VALUE
puts "+ #{right}" unless right == NO_VALUE
end
def combined_keys
(left.keys + right.keys).uniq
end
def hash?(value)
value.is_a?(Hash)
end
def compare_arrays
l, r = left.clone, right.clone
l.each_with_index do |l_item, l_index|
max_item_index = nil
max_conformity = 0
r.each_with_index do |r_item, i|
if l_item == r_item
@conformity += 1
r[i] = TAKEN
break
end
diff = self.class.new(l_item, r_item, {})
c = diff.conformity
if c > max_conformity
max_conformity = c
max_item_index = i
end
end or next
if max_item_index
key = l_index == max_item_index ? l_index : "#{l_index}/#{max_item_index}"
compare_sub_items l_item, r[max_item_index], key
r[max_item_index] = TAKEN
else
compare_sub_items l_item, NO_VALUE, l_index
end
end
r.each_with_index do |item, index|
compare_sub_items NO_VALUE, item, index unless item == TAKEN
end
end
def path(key)
p = "#{@path} > " if @path
"#{p}#{key}"
end
def value_with_default(obj, key)
obj.fetch(key, NO_VALUE)
end
module NO_VALUE; end
module TAKEN; end
end
... and now in module form to be applied to a variety of collection classes (Hash among them). It's not a deep inspection, but it's simple.
# Enable "diffing" and two-way transformations between collection objects
module Diffable
# Calculates the changes required to transform self to the given collection.
# @param b [Enumerable] The other collection object
# @return [Array] The Diff: A two-element change set representing items to exclude and items to include
def diff( b )
a, b = to_a, b.to_a
[a - b, b - a]
end
# Consume return value of Diffable#diff to produce a collection equal to the one used to produce the given diff.
# @param to_drop [Enumerable] items to exclude from the target collection
# @param to_add [Enumerable] items to include in the target collection
# @return [Array] New transformed collection equal to the one used to create the given change set
def apply_diff( to_drop, to_add )
to_a - to_drop + to_add
end
end
if __FILE__ == $0
# Demo: Hashes with overlapping keys and somewhat random values.
Hash.send :include, Diffable
rng = Random.new
a = (:a..:q).to_a.reduce(Hash[]){|h,k| h.merge! Hash[k, rng.rand(2)] }
b = (:i..:z).to_a.reduce(Hash[]){|h,k| h.merge! Hash[k, rng.rand(2)] }
raise unless a == Hash[ b.apply_diff(*b.diff(a)) ] # change b to a
raise unless b == Hash[ a.apply_diff(*a.diff(b)) ] # change a to b
raise unless a == Hash[ a.apply_diff(*a.diff(a)) ] # change a to a
raise unless b == Hash[ b.apply_diff(*b.diff(b)) ] # change b to b
end
I had the same problem and sent a pull request to rails
https://github.com/elfassy/rails/commit/5f3410e04fe8c4d4745397db866c9633b80ccec6
This was answered in "Comparing ruby hashes". Rails adds a diff method to hashes. It works well.
You can compare hashes directly for equality:
hash1 = {'a' => 1, 'b' => 2}
hash2 = {'a' => 1, 'b' => 2}
hash3 = {'a' => 1, 'b' => 2, 'c' => 3}
hash1 == hash2 # => true
hash1 == hash3 # => false
hash1.to_a == hash2.to_a # => true
hash1.to_a == hash3.to_a # => false
You can convert the hashes to arrays, then get their difference:
hash3.to_a - hash1.to_a # => [["c", 3]]
if (hash3.size > hash1.size)
difference = hash3.to_a - hash1.to_a
else
difference = hash1.to_a - hash3.to_a
end
Hash[*difference.flatten] # => {"c"=>3}
Simplifying further:
Assigning difference via a ternary structure:
difference = (hash3.size > hash1.size) \
? hash3.to_a - hash1.to_a \
: hash1.to_a - hash3.to_a
=> [["c", 3]]
Hash[*difference.flatten]
=> {"c"=>3}
Doing it all in one operation and getting rid of the difference
variable:
Hash[*(
(hash3.size > hash1.size) \
? hash3.to_a - hash1.to_a \
: hash1.to_a - hash3.to_a
).flatten]
=> {"c"=>3}
what about convert both hash to_json and compare as string? but keeping in mind that
require "json"
h1 = {a: 20}
h2 = {a: "20"}
h1.to_json==h1.to_json
=> true
h1.to_json==h2.to_json
=> false