Finding common elements in two arrays

前端 未结 2 1916
北恋
北恋 2021-01-07 05:04

I’ve declared a type similar to the following.

type
  TLikes = record
    Name            : string[20];
    favColours  : array of string[20];

  faves     =         


        
2条回答
  •  慢半拍i
    慢半拍i (楼主)
    2021-01-07 05:58

    Here is a generic set, TSet which could be used as a tool to get relations between your data.

    TSet can hold data of simple types, not restricted to byte size as the normal Set type.

    Supports:

    • Include (addition)
    • Exclude (subtraction)
    • Intersect (mutual inclusion, multiplication)
    • Symmetrical difference (mutual exclusion, xor)
    • Test for contains (in operator)
    • Test for equality (equal operator)
    • Test for superset of (>= operator)
    • Test for subset of ( <= operator)
    • Sorting
    • BinarySearch

    Use TSet to benchmark your application.


    unit GenericSet;
    
    interface
    
    Uses
      System.Generics.Defaults,
      System.Generics.Collections;
    
    Type
      TSet = record
        // Include (union)
        class operator Add(const aSet: TSet; aValue: T) : TSet; overload;
        class operator Add(const aSet: TSet; const aTArr: TArray) : TSet; overload;
        class operator Add(const aSet1: TSet; const aSet2: TSet) : TSet; overload;
        // Exclude
        class operator Subtract(const aSet: TSet; aValue: T): TSet; overload;
        class operator Subtract(const aSet: TSet; const aTArr: TArray) : TSet; overload;
        class operator Subtract(const aSet1: TSet; const aSet2: TSet) : TSet; overload;
        // left in right, i.e right.Contains(left)
        class operator In(aValue: T; const aSet: TSet): Boolean; overload;
        class operator In(const aTArr: TArray; const aSet: TSet): Boolean; overload;
        class operator In(const aSet1: TSet; const aSet2: TSet): Boolean; overload;
        // Intersect, mutually common, A and B
        class operator Multiply(const aSet: TSet; aValue: T): TSet; overload;
        class operator Multiply(const aSet: TSet; const aTArr: TArray): TSet; overload;
        class operator Multiply(const aSet1,aSet2 : TSet): TSet; overload;
        // Symmetric difference, A xor B = (A+B) - A.Intersect(B)
        class operator LogicalXor(const aSet: TSet; aValue: T): TSet; overload;
        class operator LogicalXor(const aSet: TSet; aTArr: TArray): TSet; overload;
        class operator LogicalXor(const aSet1,aSet2 : TSet): TSet; overload;
        //
        class operator Equal(const aSet: TSet; aValue: T): Boolean; overload;
        class operator Equal(const aSet: TSet; aTArr: TArray): Boolean; overload;
        class operator Equal(const aSetLeft,aSetRight: TSet): Boolean; overload;
        // SubsetOf (Left <= Right)
        class operator LessThanOrEqual(const aSet: TSet; aValue: T): Boolean; overload;
        class operator LessThanOrEqual(const aSet: TSet; aTArr: TArray): Boolean; overload;
        class operator LessThanOrEqual(const aSetLeft,aSetRight: TSet): Boolean; overload;
        // SupersetOf (Left >= Right)
        class operator GreaterThanOrEqual(const aSet: TSet; aValue: T): Boolean; overload;
        class operator GreaterThanOrEqual(const aSet: TSet; aTArr: TArray): Boolean; overload;
        class operator GreaterThanOrEqual(const aSetLeft,aSetRight: TSet): Boolean; overload;
        // Set creator
        class function Create(const aTArr: array of T; checkDuplicates: Boolean = False): TSet; static;
      private
        FSetArray : array of T;
        FSorted : String; // !! Will be predefined as '' (=False) by compiler.
        function GetEmpty: Boolean; inline;
        function GetItem(index: Integer): T; inline;
        function GetItemCount: Integer; inline;
        function GetSorted: Boolean; inline;
        procedure SetSorted( sorted: Boolean); inline;
      public
        // Add
        procedure Include(aValue: T); overload;
        procedure Include(const aTArr: TArray); overload;
        procedure Include(const aTArr: array of T); overload;
        procedure Include(const aSet: TSet); overload;
        // Subtract; A=[1,2,3]; B=[2,3,4]; B.Exclude(A) = B-A = [4]
        procedure Exclude(aValue: T); overload;
        procedure Exclude(const aTArr: TArray); overload;
        procedure Exclude(const aTArr: array of T); overload;
        procedure Exclude(const aSet: TSet); overload;
        // Multiply (A and B) A=[1,2,3]; B=[2,3,4]; B.Intersect(A) = B*A = [2,3]
        function Intersect(aValue: T): TSet; overload;
        function Intersect(const aTArr: TArray): TSet; overload;
        function Intersect(const aTArr: array of T): TSet; overload;
        function Intersect(const aSet: TSet): TSet; overload;
    
        // A xor B; A=[1,2,3]; B=[2,3,4]; (A+B)-A.Intersect(B) = [1,4]
        function SymmetricDiff(aValue: T): TSet; overload;
        function SymmetricDiff(const aTArr: TArray): TSet; overload;
        function SymmetricDiff(const aTArr: array of T): TSet; overload;
        function SymmetricDiff(const aSet: TSet): TSet; overload;
        // Identical set
        function Equal(aValue: T): Boolean; overload;
        function Equal(const aTArr: array of T; checkDuplicates: Boolean = False): Boolean; overload;
        function Equal(const aTArr: TArray; checkDuplicates: Boolean = False): Boolean; overload;
        function Equal(const aSet: TSet): Boolean; overload;
        //  Self <= aSet
        function SubsetOf(aValue: T): Boolean; overload;
        function SubsetOf(const aTArr: array of T; checkDuplicates: Boolean = False): Boolean; overload;
        function SubsetOf(const aTArr: TArray; checkDuplicates: Boolean = False): Boolean; overload;
        function SubsetOf(const aSet: TSet): Boolean; overload;
        // Self >= aSet
        function SupersetOf(aValue: T): Boolean; overload;
        function SupersetOf(const aTArr: array of T; checkDuplicates: Boolean = False): Boolean; overload;
        function SupersetOf(const aTArr: TArray; checkDuplicates: Boolean = False): Boolean; overload;
        function SupersetOf(const aSet: TSet): Boolean; overload;
        // Is included
        function Contains(aValue: T): Boolean; overload;
        function Contains(const aTArr: array of T): Boolean; overload;
        function Contains(const aTArr: TArray): Boolean; overload;
        function Contains(const aSet: TSet): Boolean; overload;
    
        procedure Sort; // QuickSort
        function Search( aValue: T): Boolean; // BinarySearch (Set must be sorted)
        procedure Clear;
        property IsSorted: Boolean read GetSorted;
        property IsEmpty: Boolean read GetEmpty;
        property Items[index: Integer]: T read GetItem; default;
        property ItemCount: Integer read GetItemCount;
      end;
    
    implementation
    
    class function TSet.Create(const aTArr: array of T; checkDuplicates: Boolean = False): TSet;
    var
      i,j,elements : Integer;
      duplicate : Boolean;
      c : IEqualityComparer;
    begin
      if checkDuplicates then
      begin
        c := TEqualityComparer.Default;
        // This will remove duplicates
        SetLength(Result.FSetArray,Length(aTArr));
        elements := 0;
        for i := 0 to High(aTArr) do
        begin
          duplicate := False;
          for j := 0 to Pred(elements) do
          begin
            duplicate := c.Equals(Result.FSetArray[j],aTArr[i]);
            if duplicate then
              Break;
          end;
          if not duplicate then
          begin
            Result.FSetArray[elements] := aTArr[i];
            Inc(elements);
          end;
        end;
        SetLength(Result.FSetArray,elements);
      end
      else
      begin
        SetLength(Result.FSetArray, Length(aTArr));
        for i := 0 to High(aTArr) do
          Result.FSetArray[i] := aTArr[i];
      end;
    end;
    
    class operator TSet.Add(const aSet: TSet; aValue: T): TSet;
    begin
      Result := aSet;
      Result.Include(aValue);
    end;
    
    class operator TSet.Add(const aSet: TSet; const aTArr: TArray): TSet;
    begin
      Result := aSet;
      Result.Include(aTArr);
    end;
    
    class operator TSet.Add(const aSet1, aSet2: TSet): TSet;
    begin
      Result := aSet1;
      Result.Include(aSet2);
    end;
    
    procedure TSet.Include(aValue: T);
    begin
      if not Contains(aValue) then begin
        SetLength(FSetArray,Length(FSetArray)+1);
        FSetArray[High(FSetArray)] := aValue;
        SetSorted(False);
      end;
    end;
    
    procedure TSet.Include(const aSet: TSet);
    begin
      if Self.IsEmpty then
        Self := aSet
      else
        Include(aSet.FSetArray);
    end;
    
    procedure TSet.Include(const aTArr: TArray);
    var
      i : Integer;
    begin
      for i := 0 to High(aTArr) do
        Self.Include(aTArr[i]);
    end;
    
    procedure TSet.Include(const aTArr: array of T);
    var
      i : Integer;
    begin
      for i := 0 to High(aTArr) do
        Self.Include(aTArr[i]);
    end;
    
    procedure TSet.Exclude(const aTArr: TArray);
    var
      i : Integer;
    begin
      for i := 0 to High(aTArr) do
        Exclude(aTArr[i]);
    end;
    
    procedure TSet.Exclude(const aTArr: array of T);
    var
      i : Integer;
    begin
      for i := 0 to High(aTArr) do
        Exclude(aTArr[i]);
    end;
    
    procedure TSet.Exclude(const aSet: TSet);
    begin
      Exclude(aSet.FSetArray);
    end;
    
    procedure TSet.Exclude(aValue: T);
    var
      i : Integer;
      c : IEqualityComparer;
    begin
      c := TEqualityComparer.Default;
      for i := 0 to High(FSetArray) do
      begin
        if c.Equals(FSetArray[i],aValue) then
        begin
          SetLength(FSetArray,Length(FSetArray)); // Ensure unique dyn array
          if (i < High(FSetArray)) then
          begin
            FSetArray[i] := FSetArray[High(FSetArray)]; // Move last element
            Self.SetSorted(False);
          end;
          SetLength(FSetArray,Length(FSetArray)-1);
          Break;
        end;
      end;
    end;
    
    class operator TSet.Subtract(const aSet1, aSet2: TSet): TSet;
    begin
      Result := aSet1;
      Result.Exclude(aSet2.FSetArray);
    end;
    
    class operator TSet.Subtract(const aSet: TSet;
      const aTArr: TArray): TSet;
    begin
      Result := aSet;
      Result.Exclude(aTArr);
    end;
    
    class operator TSet.Subtract(const aSet: TSet; aValue: T): TSet;
    begin
      Result := aSet;
      Result.Exclude(aValue);
    end;
    
    class operator TSet.In(aValue: T; const aSet: TSet): Boolean;
    begin
      Result := aSet.Contains(aValue);
    end;
    
    class operator TSet.In(const aTArr: TArray; const aSet: TSet): Boolean;
    begin
      Result := aSet.Contains(aTArr);
    end;
    
    class operator TSet.In(const aSet1: TSet; const aSet2: TSet): Boolean;
    begin
      Result := aSet2.Contains(aSet1.FSetArray);
    end;
    
    function TSet.Contains(aValue: T): Boolean;
    var
      i : Integer;
      c : IEqualityComparer;
    begin
      if IsSorted then
      begin
        Result := Search(aValue);
      end
      else
      begin
        Result := false;
        c := TEqualityComparer.Default;
        for i := 0 to High(FSetArray) do
          if c.Equals(FSetArray[i],aValue) then
            Exit(True);
      end;
    end;
    
    function TSet.Contains(const aTArr: array of T): Boolean;
    var
      i: Integer;
    begin
      Result := High(aTArr) >= 0;
      for i := 0 to High(aTArr) do
      begin
        if IsSorted then
          Result := Search(aTArr[i])
        else
          Result := Contains(aTArr[i]);
        if not Result then
          Exit(false);
      end;
    end;
    
    function TSet.Contains(const aTArr: TArray): Boolean;
    var
      i : Integer;
    begin
      Result := High(aTArr) >= 0;
      for i := 0 to High(aTArr) do
      begin
        if IsSorted then
          Result := Search(aTArr[i])
        else
          Result := Contains(aTArr[i]);
        if not Result then
          Exit(false);
      end;
    end;
    
    function TSet.Contains(const aSet: TSet): Boolean;
    begin
      Result := Contains(aSet.FSetArray);
    end;
    
    function TSet.GetEmpty: Boolean;
    begin
      Result := (Self.ItemCount = 0);
    end;
    
    function TSet.GetItem(index: Integer): T;
    begin
      Result := Self.FSetArray[index];
    end;
    
    function TSet.GetItemCount: Integer;
    begin
      Result := Length(Self.FSetArray);
    end;
    
    procedure TSet.Clear;
    begin
      SetLength(FSetArray,0);
      Self.SetSorted(False);
    end;
    
    // Get the mutually common elements, aka the intersect.
    class operator TSet.Multiply(const aSet: TSet; aValue: T): TSet;
    begin
      Result:= aSet.Intersect(aValue);
    end;
    
    class operator TSet.Multiply(const aSet: TSet; const aTArr: TArray): TSet;
    begin
      Result:= aSet.Intersect(aTArr);
    end;
    
    class operator TSet.Multiply(const aSet1,aSet2: TSet): TSet;
    begin
      Result := aSet1.Intersect(aSet2);
    end;
    
    function TSet.Intersect(aValue : T): TSet;
    var
      i : Integer;
    begin
      if Self.Contains(aValue) then
        Result.Include(aValue)
      else
        Result.Clear;
      Result.SetSorted(Result.ItemCount = 1);
    end;
    
    function TSet.Intersect(const aSet: TSet): TSet;
    var
      i,items : Integer;
    begin
      SetLength(Result.FSetArray,aSet.ItemCount);
      items := 0;
      for i := 0 to High(aSet.FSetArray) do
      begin
        if Self.Contains(aSet.FSetArray[i]) then
        begin
          Result.FSetArray[items] := aSet.FSetArray[i];
          Inc(items);
        end;
      end;
      SetLength(Result.FSetArray,items);
      Result.SetSorted(Self.IsSorted and aSet.IsSorted);
    end;
    
    function TSet.Intersect(const aTArr: array of T): TSet;
    var
      i : Integer;
    begin
      for i := 0 to High(aTArr) do
      begin
        if Self.Contains(aTArr[i]) then
          Result.Include(aTArr[i]);
      end;
      Result.SetSorted(False);
    end;
    
    function TSet.Intersect(const aTArr: TArray): TSet;
    var
      i : Integer;
    begin
      for i := 0 to High(aTArr) do
      begin
        if Self.Contains(aTArr[i]) then
          Result.Include(aTArr[i]);
      end;
      Result.SetSorted(False);
    end;
    
    //
    function TSet.Equal(aValue: T): Boolean;
    begin
      Result := (Self.ItemCount = 1) and Self.Contains(aValue);
    end;
    
    function TSet.Equal(const aTArr: array of T; checkDuplicates: Boolean = False): Boolean;
    begin
      if checkDuplicates then
        Result :=
          (Self.ItemCount <= Length(aTArr)) and
          Self.Equal(TSet.Create(aTArr,True)) // Remove possible duplicates
      else
        Result :=
          (Self.ItemCount = Length(aTArr)) and
          Self.Contains(aTArr);
    end;
    
    function TSet.Equal(const aTArr: TArray; checkDuplicates: Boolean = False): Boolean;
    begin
      if checkDuplicates then
        Result :=
          (Self.ItemCount <= Length(aTArr)) and
          Self.Equal(TSet.Create(aTArr,True)) // Remove possible duplicates
      else
        Result :=
          (Self.ItemCount = Length(aTArr)) and
          Self.Contains(aTArr);
    end;
    
    function TSet.Equal(const aSet: TSet): Boolean;
    begin
      Result :=
        (Self.ItemCount = aSet.ItemCount) and
        Contains(aSet);
    end;
    
    class operator TSet.Equal(const aSet: TSet; aValue: T): Boolean;
    begin
      Result := aSet.Equal(aValue);
    end;
    
    class operator TSet.Equal(const aSet: TSet; aTArr: TArray): Boolean;
    begin
      Result := aSet.Equal(aTArr,True);
    end;
    
    class operator TSet.Equal(const aSetLeft,aSetRight: TSet): Boolean;
    begin
      Result := aSetLeft.Equal(aSetRight);
    end;
    
    //  Self <= aSet
    function TSet.SubsetOf(aValue: T): Boolean;
    begin
     Result := (Self.ItemCount = 1) and Self.Equal(aValue);
    end;
    
    function TSet.SubsetOf(const aTArr: array of T; checkDuplicates: Boolean = False): Boolean;
    begin
      Result := Self.SubsetOf(TSet.Create(aTArr,checkDuplicates));
    end;
    
    function TSet.SubsetOf(const aTArr: TArray; checkDuplicates: Boolean = False): Boolean;
    begin
      Result := SubsetOf(TSet.Create(aTArr,checkDuplicates));
    end;
    
    function TSet.SubsetOf(const aSet: TSet): Boolean;
    begin
      Result :=
        (Self.ItemCount <= aSet.ItemCount) and
        aSet.Contains(Self);
    end;
    
    // SubsetOf (Left <= Right)
    class operator TSet.LessThanOrEqual(const aSet: TSet; aValue: T): Boolean;
    begin
      Result := aSet.SubsetOf(aValue);
    end;
    
    class operator TSet.LessThanOrEqual(const aSet: TSet; aTArr: TArray): Boolean;
    begin
      Result := aSet.SubsetOf(aTArr,True);
    end;
    
    class operator TSet.LessThanOrEqual(const aSetLeft,aSetRight: TSet): Boolean;
    begin
      Result := aSetLeft.SubsetOf(aSetRight);
    end;
    
    // Self >= aSet
    function TSet.SupersetOf(const aSet: TSet): Boolean;
    begin
      Result :=
        (Self.ItemCount >= aSet.ItemCount) and
        Self.Contains(aSet);
    end;
    
    function TSet.SupersetOf(aValue: T): Boolean;
    begin
      Result := (Self.ItemCount >= 1) and Self.Contains(aValue);
    end;
    
    function TSet.SupersetOf(const aTArr: array of T; checkDuplicates: Boolean = False): Boolean;
    begin
      Result := SupersetOf(TSet.Create(aTArr,checkDuplicates));
    end;
    
    function TSet.SupersetOf(const aTArr: TArray; checkDuplicates: Boolean = False): Boolean;
    begin
      Result := SupersetOf(TSet.Create(aTArr,checkDuplicates));
    end;
    
    // SupersetOf (Left >= Right)
    class operator TSet.GreaterThanOrEqual(const aSet: TSet; aValue: T): Boolean;
    begin
      Result := aSet.SupersetOf(aValue);
    end;
    
    class operator TSet.GreaterThanOrEqual(const aSet: TSet; aTArr: TArray): Boolean;
    begin
      Result := aSet.SupersetOf(aTArr,True);
    end;
    
    class operator TSet.GreaterThanOrEqual(const aSetLeft,aSetRight: TSet): Boolean;
    begin
      Result := aSetLeft.SupersetOf(aSetRight);
    end;
    
    // A xor B; A=[1,2,3]; B=[2,3,4]; (A+B)-A.Intersect(B) = [1,4] alt:
    function TSet.SymmetricDiff(aValue: T): TSet;
    begin
      Result := Self;
      Result.Include(aValue);
      Result.Exclude(Self.Intersect(aValue));
      Result.SetSorted(False);
    end;
    
    function TSet.SymmetricDiff(const aTArr: TArray): TSet;
    begin
      Result := Self;
      Result.Include(aTArr);
      Result.Exclude(Self.Intersect(aTArr));
      Result.SetSorted(False);
    end;
    
    function TSet.SymmetricDiff(const aTArr: array of T): TSet;
    begin
      Result := Self;
      Result.Include(aTArr);
      Result.Exclude(Self.Intersect(aTArr));
      Result.SetSorted(False);
    end;
    
    function TSet.SymmetricDiff(const aSet: TSet): TSet;
    begin
      Result:= Self;
      Result.Include(aSet);
      Result.Exclude(Self.Intersect(aSet));
      Result.SetSorted(False);
    end;
    
    class operator TSet.LogicalXor(const aSet: TSet; aValue: T): TSet;
    begin
      Result := aSet.SymmetricDiff(aValue);
    end;
    
    class operator TSet.LogicalXor(const aSet: TSet; aTArr: TArray): TSet;
    begin
      Result := aSet.SymmetricDiff(aTArr);
    end;
    
    class operator TSet.LogicalXor(const aSet1,aSet2 : TSet): TSet;
    begin
      Result := aSet1.SymmetricDiff(aSet2);
    end;
    
    procedure TSet.Sort;
    begin
      SetLength(Self.FSetArray,Length(Self.FSetArray)); // Ensure COW
      TArray.Sort(Self.FSetArray);
      SetSorted(True);
    end;
    
    function TSet.Search(aValue: T): Boolean;
    var
      Index: Integer;
    begin
      Result := TArray.BinarySearch(Self.FSetArray,aValue,Index);
    end;
    
    function TSet.GetSorted: Boolean;
    begin
      Result := (FSorted = '1');
    end;
    
    procedure TSet.SetSorted(sorted: Boolean);
    begin
      if sorted then
        FSorted := '1'
      else
        FSorted := '0';
    end;
    
    end.
    

    A benchmark:

    program ProjectGenericSet;
    
    {$APPTYPE CONSOLE}
    
    uses
      System.Diagnostics,
      System.Generics.Defaults,
      System.Generics.Collections,
      GenericSet in 'GenericSet.pas';
    
    var
      set1,set2,set3 : TSet;
      sw : TStopWatch;
      ok : Boolean;
      i,j,max: Integer;
    begin
      Randomize;
      max := $10000;
      // Populate a sample set with 32K items.
      repeat
        set1.Include(Random(max));
      until (set1.ItemCount = (max DIV 2));
      // Populate a test set with items in sample set
      repeat
        set2.Include(set1[Random(max DIV 2)]);
      until (set2.ItemCount = 100);
      WriteLn('Test in Sample (unsorted), 1.000 iterations...');
      sw := TStopWatch.StartNew;
      for i  := 1 TO 1000 DO
        ok := set1.Contains(set2);
      sw.Stop;
      WriteLn('Result:',ok,' ',sw.ElapsedMilliseconds,' [ms]');
      set1.Sort; // Sort
      WriteLn('Test in Sample (sorted), 200.000 iterations...');
      sw := TStopWatch.StartNew;
      for i  := 1 TO 200000 DO
      begin
        ok := set1.Contains(set2);
      end;
      sw.Stop;
      WriteLn('Result:',ok,' ',sw.ElapsedMilliseconds,' [ms]');
      WriteLn('Test*Test (unsorted), 200.000 iterations...');
      sw := TStopWatch.StartNew;
      for i  := 1 TO 200000 DO
      begin
        set3 := set2.Intersect(set2);
      end;
      sw.Stop;
      WriteLn('Result:',set3=set2,' ',sw.ElapsedMilliseconds,' [ms]');
      set2.Sort;
      WriteLn('Test*Test (sorted), 200.000 iterations...');
      sw := TStopWatch.StartNew;
      for i  := 1 TO 200000 DO
      begin
        set3 := set2.Intersect(set2);
      end;
      sw.Stop;
      WriteLn('Result:',set3=set2,' ',sw.ElapsedMilliseconds,' [ms]');
      ReadLn;
    end.
    

提交回复
热议问题