Is there an IEnumerable implementation that only iterates over it's source (e.g. LINQ) once

后端未结

关注

 3  505

我在风中等你 2020-12-08 08:17

Provided items is the result of q LINQ expression:

var items = from item in ItemsSource.RetrieveItems()
            where ...

3条回答

青春惊慌失措 (楼主)

2020-12-08 08:29

public static IEnumerable SingleEnumeration(this IEnumerable source)
{
    return new SingleEnumerator(source);
}

private class SingleEnumerator : IEnumerable
{
    private CacheEntry cacheEntry;
    public SingleEnumerator(IEnumerable sequence)
    {
        cacheEntry = new CacheEntry(sequence.GetEnumerator());
    }

    public IEnumerator GetEnumerator()
    {
        if (cacheEntry.FullyPopulated)
        {
            return cacheEntry.CachedValues.GetEnumerator();
        }
        else
        {
            return iterateSequence(cacheEntry).GetEnumerator();
        }
    }

    IEnumerator IEnumerable.GetEnumerator()
    {
        return this.GetEnumerator();
    }
}

private static IEnumerable iterateSequence(CacheEntry entry)
{
    using (var iterator = entry.CachedValues.GetEnumerator())
    {
        int i = 0;
        while (entry.ensureItemAt(i) && iterator.MoveNext())
        {
            yield return iterator.Current;
            i++;
        }
    }
}

private class CacheEntry
{
    public bool FullyPopulated { get; private set; }
    public ConcurrentQueue CachedValues { get; private set; }

    private static object key = new object();
    private IEnumerator sequence;

    public CacheEntry(IEnumerator sequence)
    {
        this.sequence = sequence;
        CachedValues = new ConcurrentQueue();
    }

    /// 
    /// Ensure that the cache has an item a the provided index.  If not, take an item from the 
    /// input sequence and move to the cache.
    /// 
    /// The method is thread safe.
    /// 
    /// True if the cache already had enough items or 
    /// an item was moved to the cache, 
    /// false if there were no more items in the sequence.
    public bool ensureItemAt(int index)
    {
        //if the cache already has the items we don't need to lock to know we 
        //can get it
        if (index < CachedValues.Count)
            return true;
        //if we're done there's no race conditions hwere either
        if (FullyPopulated)
            return false;

        lock (key)
        {
            //re-check the early-exit conditions in case they changed while we were
            //waiting on the lock.

            //we already have the cached item
            if (index < CachedValues.Count)
                return true;
            //we don't have the cached item and there are no uncached items
            if (FullyPopulated)
                return false;

            //we actually need to get the next item from the sequence.
            if (sequence.MoveNext())
            {
                CachedValues.Enqueue(sequence.Current);
                return true;
            }
            else
            {
                FullyPopulated = true;
                return false;
            }
        }
    }
}

So this has been edited (substantially) to support multithreaded access. Several threads can ask for items, and on an item by item basis, they will be cached. It doesn't need to wait for the entire sequence to be iterated for it to return cached values. Below is a sample program that demonstrates this:

private static IEnumerable interestingIntGenertionMethod(int maxValue)
{
    for (int i = 0; i < maxValue; i++)
    {
        Thread.Sleep(1000);
        Console.WriteLine("actually generating value: {0}", i);
        yield return i;
    }
}

public static void Main(string[] args)
{
    IEnumerable sequence = interestingIntGenertionMethod(10)
        .SingleEnumeration();

    int numThreads = 3;
    for (int i = 0; i < numThreads; i++)
    {
        int taskID = i;
        Task.Factory.StartNew(() =>
        {
            foreach (int value in sequence)
            {
                Console.WriteLine("Task: {0} Value:{1}",
                    taskID, value);
            }
        });
    }

    Console.WriteLine("Press any key to exit...");
    Console.ReadKey(true);
}

You really need to see it run to understand the power here. As soon as a single thread forces the next actual values to be generated all of the remaining threads can immediately print that generated value, but they will all be waiting if there are no uncached values for that thread to print. (Obviously thread/threadpool scheduling may result in one task taking longer to print it's value than needed.)

0 讨论(0)

查看其它3个回答