I am attempting to write a function that weeds out consecutive duplicates, as determined by a given equality function, from a seq<\'a> but with a twist:
Bit of an old question here, but I'm just looking for old examples to demonstrate a new library that I have been working on. It's a replacement for System.Linq.Enumerable, but also it has a wrapper to replace F#'s Seq. It's not complete yet, but it's polyfill'd up to match the existing APIs (i.e. incomplete material just forwards to existing functionality).
It is available in on nuget here: https://www.nuget.org/packages/Cistern.Linq.FSharp/
So I have taken your modified Seq from the bottom of your answer and "converted" it to Cistern.Linq.FSharp (which is just a search and replace of "Seq." for "Linq.") And then compared it's runtime to your original. The Cistern version runs at well under 50% of the time (I get ~41%).
open System
open Cistern.Linq.FSharp
open System.Diagnostics
let dedupeTakingLastCistern equalityFn s =
s
|> Linq.map Some
|> fun x -> Linq.append x [None]
|> Linq.pairwise
|> Linq.map (fun (x,y) ->
match (x,y) with
| (Some a, Some b) -> (if (equalityFn a b) then None else Some a)
| (_,None) -> x
| _ -> None )
|> Linq.choose id
let dedupeTakingLastSeq equalityFn s =
s
|> Seq.map Some
|> fun x -> Seq.append x [None]
|> Seq.pairwise
|> Seq.map (fun (x,y) ->
match (x,y) with
| (Some a, Some b) -> (if (equalityFn a b) then None else Some a)
| (_,None) -> x
| _ -> None )
|> Seq.choose id
let test data which f =
let iterations = 1000
let sw = Stopwatch.StartNew ()
for i = 1 to iterations do
data
|> f (fun x y -> x = y)
|> List.ofSeq
|> ignore
printfn "%s %d" which sw.ElapsedMilliseconds
[]
let main argv =
let data = List.init 10000 (fun _ -> 1)
for i = 1 to 5 do
test data "Seq" dedupeTakingLastSeq
test data "Cistern" dedupeTakingLastCistern
0