Pipelines, multiplexing, and unbounded buffering

后端 未结 4 1516
猫巷女王i
猫巷女王i 2020-12-10 07:19

(NOTE: I\'m using .Net 4, not .Net 4.5, so I cannot use the TPL\'s DataflowBlock classes.)

TL;DR Version

Ultimately, I\'m just look

4条回答
  •  野趣味
    野趣味 (楼主)
    2020-12-10 07:44

    Follow up

    For completeness, here is the code that I wound up with. Thanks to Martin James for his answer, which provided the basis for the solution.

    I'm still not completely happy with the multiplexor (see ParallelWorkProcessor.multiplex()). It works, but it seems a bit klunky.

    I used Martin James' idea about a work pool to prevent unbounded growth of the multiplexor buffer, however I substituted a SemaphoreSlim for the work pool queue (since it provides the same functionality, but it's a bit simpler to use and uses less resources).

    The worker tasks write their completed items to a concurrent priority queue. This allows me to easily and efficiently find the next item to output.

    I used a sample concurrent priority queue from Microsoft, modified to provide an autoreset event that's signalled whenever a new item is enqueued.

    Here's the ParallelWorkProcessor class. You use it by providing it with three delegates; one to provide the work items, one to process a work item, and one to output a completed work item.

    using System;
    using System.Collections.Concurrent;
    using System.Collections.Generic;
    using System.Diagnostics.Contracts;
    using System.Threading;
    using System.Threading.Tasks;
    
    namespace Demo
    {
        public sealed class ParallelWorkProcessor where T: class // T is the work item type.
        {
            public delegate T    Read();           // Called by only one thread.
            public delegate T    Process(T block); // Called simultaneously by multiple threads.
            public delegate void Write(T block);   // Called by only one thread.
    
            public ParallelWorkProcessor(Read read, Process process, Write write, int numWorkers = 0)
            {
                _read    = read;
                _process = process;
                _write   = write;
    
                numWorkers = (numWorkers > 0) ? numWorkers : Environment.ProcessorCount;
    
                _workPool    = new SemaphoreSlim(numWorkers*2);
                _inputQueue  = new BlockingCollection(numWorkers);
                _outputQueue = new ConcurrentPriorityQueue();
                _workers     = new Task[numWorkers];
    
                startWorkers();
                Task.Factory.StartNew(enqueueWorkItems);
                _multiplexor = Task.Factory.StartNew(multiplex);
            }
    
            private void startWorkers()
            {
                for (int i = 0; i < _workers.Length; ++i)
                {
                    _workers[i] = Task.Factory.StartNew(processBlocks);
                }
            }
    
            private void enqueueWorkItems()
            {
                int index = 0;
    
                while (true)
                {
                    T data = _read();
    
                    if (data == null) // Signals end of input.
                    {
                        _inputQueue.CompleteAdding();
                        _outputQueue.Enqueue(index, null); // Special sentinel WorkItem .
                        break;
                    }
    
                    _workPool.Wait();
                    _inputQueue.Add(new WorkItem(data, index++));
                }
            }
    
            private void multiplex()
            {
                int index = 0; // Next required index.
                int last = int.MaxValue;
    
                while (index != last)
                {
                    KeyValuePair workItem;
                    _outputQueue.WaitForNewItem(); // There will always be at least one item - the sentinel item.
    
                    while ((index != last) && _outputQueue.TryPeek(out workItem))
                    {
                        if (workItem.Value == null) // The sentinel item has a null value to indicate that it's the sentinel.
                        {
                            last = workItem.Key;  // The sentinel's key is the index of the last block + 1.
                        }
                        else if (workItem.Key == index) // Is this block the next one that we want?
                        {
                            // Even if new items are added to the queue while we're here, the new items will be lower priority.
                            // Therefore it is safe to assume that the item we will dequeue now is the same one we peeked at.
    
                            _outputQueue.TryDequeue(out workItem);
                            Contract.Assume(workItem.Key == index); // This *must* be the case.
                            _workPool.Release();                    // Allow the enqueuer to queue another work item.
                            _write(workItem.Value);
                            ++index;
                        }
                        else // If it's not the block we want, we know we'll get a new item at some point.
                        {
                            _outputQueue.WaitForNewItem();
                        }
                    }
                }
            }
    
            private void processBlocks()
            {
                foreach (var block in _inputQueue.GetConsumingEnumerable())
                {
                    var processedData = _process(block.Data);
                    _outputQueue.Enqueue(block.Index, processedData);
                }
            }
    
            public bool WaitForFinished(int maxMillisecondsToWait) // Can be Timeout.Infinite.
            {
                return _multiplexor.Wait(maxMillisecondsToWait);
            }
    
            private sealed class WorkItem
            {
                public WorkItem(T data, int index)
                {
                    Data  = data;
                    Index = index;
                }
    
                public T   Data  { get; private set; }
                public int Index { get; private set; }
            }
    
            private readonly Task[] _workers;
            private readonly Task _multiplexor;
            private readonly SemaphoreSlim _workPool;
            private readonly BlockingCollection _inputQueue;
            private readonly ConcurrentPriorityQueue _outputQueue;
            private readonly Read    _read;
            private readonly Process _process;
            private readonly Write   _write;
        }
    }
    

    And here's my test code:

    using System;
    using System.Diagnostics;
    using System.Threading;
    
    namespace Demo
    {
        public static class Program
        {
            private static void Main(string[] args)
            {
                _rng = new Random(34324);
    
                int threadCount = 8;
                _maxBlocks = 200;
                ThreadPool.SetMinThreads(threadCount + 2, 4); // Kludge to prevent slow thread startup.
    
                var stopwatch = new Stopwatch();
    
                _numBlocks = _maxBlocks;
                stopwatch.Restart();
                var processor = new ParallelWorkProcessor(read, process, write, threadCount);
                processor.WaitForFinished(Timeout.Infinite);
    
                Console.WriteLine("\n\nFinished in " + stopwatch.Elapsed + "\n\n");
            }
    
            private static byte[] read()
            {
                if (_numBlocks-- == 0)
                {
                    return null;
                }
    
                var result = new byte[128];
                result[0] = (byte)(_maxBlocks-_numBlocks);
                Console.WriteLine("Supplied input: " + result[0]);
                return result;
            }
    
            private static byte[] process(byte[] data)
            {
                if (data[0] == 10) // Hack for test purposes. Make it REALLY slow for this item!
                {
                    Console.WriteLine("Delaying a call to process() for 5s for ID 10");
                    Thread.Sleep(5000);
                }
    
                Thread.Sleep(10 + _rng.Next(50));
                Console.WriteLine("Processed: " + data[0]);
                return data;
            }
    
            private static void write(byte[] data)
            {
                Console.WriteLine("Received output: " + data[0]);
            }
    
            private static Random _rng;
            private static int _numBlocks;
            private static int _maxBlocks;
        }
    }
    

提交回复
热议问题