I need to introduce a retry policy to the workflow. Let\'s say there are 3 blocks that are connected in such a way:
var executionOptions = new ExecutionDataf
In addition to svick's excellent answer, there are a couple of other options:
TransientFaultHandling.Core - just set MaxDegreeOfParallelism to Unbounded so the other messages can get through.LinkTo that examines whether another retry is necessary. This approach is more complex; you'd have to add a delay to your block if it is doing a retry, and add a TransformBlock to remove the failure/retry information for the rest of the mesh.Here are two methods CreateRetryTransformBlock and CreateRetryActionBlock that operate under these assumptions:
CreateRetryActionBlock). MaxDegreeOfParallelism, BoundedCapacity, CancellationToken and EnsureOrdered, on top of the options related to the retry functionality. The implementation below uses a SemaphoreSlim to control the level of concurrency between operations that are attempted for the first time, and previously faulted operations that are retried after their delay duration has elapsed.
public class RetryExecutionDataflowBlockOptions : ExecutionDataflowBlockOptions
{
/// <summary>The limit after which an item is returned as failed.</summary>
public int MaxAttemptsPerItem { get; set; } = 1;
/// <summary>The delay duration before retrying an item.</summary>
public TimeSpan RetryDelay { get; set; } = TimeSpan.Zero;
/// <summary>The limit after which the block transitions to a faulted
/// state (unlimited is the default).</summary>
public int MaxRetriesTotal { get; set; } = -1;
}
public readonly struct RetryResult<TInput, TOutput>
{
public readonly TInput Input { get; }
public readonly TOutput Output { get; }
public readonly bool Success { get; }
public readonly Exception[] Exceptions { get; }
public bool Failed => !Success;
public Exception FirstException => Exceptions != null ? Exceptions[0] : null;
public int Attempts =>
Exceptions != null ? Exceptions.Length + (Success ? 1 : 0) : 1;
public RetryResult(TInput input, TOutput output, bool success,
Exception[] exceptions)
{
Input = input;
Output = output;
Success = success;
Exceptions = exceptions;
}
}
public class RetryLimitException : Exception
{
public RetryLimitException(string message, Exception innerException)
: base(message, innerException) { }
}
public static IPropagatorBlock<TInput, RetryResult<TInput, TOutput>>
CreateRetryTransformBlock<TInput, TOutput>(
Func<TInput, Task<TOutput>> transform,
RetryExecutionDataflowBlockOptions dataflowBlockOptions)
{
if (transform == null) throw new ArgumentNullException(nameof(transform));
if (dataflowBlockOptions == null)
throw new ArgumentNullException(nameof(dataflowBlockOptions));
int maxAttemptsPerItem = dataflowBlockOptions.MaxAttemptsPerItem;
int maxRetriesTotal = dataflowBlockOptions.MaxRetriesTotal;
TimeSpan retryDelay = dataflowBlockOptions.RetryDelay;
if (maxAttemptsPerItem < 1) throw new ArgumentOutOfRangeException(
nameof(dataflowBlockOptions.MaxAttemptsPerItem));
if (maxRetriesTotal < -1) throw new ArgumentOutOfRangeException(
nameof(dataflowBlockOptions.MaxRetriesTotal));
if (retryDelay < TimeSpan.Zero) throw new ArgumentOutOfRangeException(
nameof(dataflowBlockOptions.RetryDelay));
var cancellationToken = dataflowBlockOptions.CancellationToken;
var exceptionsCount = 0;
var semaphore = new SemaphoreSlim(
dataflowBlockOptions.MaxDegreeOfParallelism);
async Task<(TOutput, Exception)> ProcessOnceAsync(TInput item)
{
await semaphore.WaitAsync(); // Preserve the SynchronizationContext
try
{
var result = await transform(item).ConfigureAwait(false);
return (result, null);
}
catch (Exception ex)
{
if (maxRetriesTotal != -1)
{
if (Interlocked.Increment(ref exceptionsCount) > maxRetriesTotal)
{
throw new RetryLimitException($"The max retry limit " +
$"({maxRetriesTotal}) has been reached.", ex);
}
}
return (default, ex);
}
finally
{
semaphore.Release();
}
}
async Task<Task<RetryResult<TInput, TOutput>>> ProcessWithRetryAsync(
TInput item)
{
// Creates a two-stages operation. Preserves the context on every await.
var (result, firstException) = await ProcessOnceAsync(item);
if (firstException == null) return Task.FromResult(
new RetryResult<TInput, TOutput>(item, result, true, null));
return RetryStageAsync();
async Task<RetryResult<TInput, TOutput>> RetryStageAsync()
{
var exceptions = new List<Exception>();
exceptions.Add(firstException);
for (int i = 2; i <= maxAttemptsPerItem; i++)
{
await Task.Delay(retryDelay, cancellationToken);
var (result, exception) = await ProcessOnceAsync(item);
if (exception != null)
exceptions.Add(exception);
else
return new RetryResult<TInput, TOutput>(item, result,
true, exceptions.ToArray());
}
return new RetryResult<TInput, TOutput>(item, default, false,
exceptions.ToArray());
};
}
// The input block awaits the first stage of each operation
var input = new TransformBlock<TInput, Task<RetryResult<TInput, TOutput>>>(
item => ProcessWithRetryAsync(item), dataflowBlockOptions);
// The output block awaits the second (and final) stage of each operation
var output = new TransformBlock<Task<RetryResult<TInput, TOutput>>,
RetryResult<TInput, TOutput>>(t => t, dataflowBlockOptions);
input.LinkTo(output, new DataflowLinkOptions { PropagateCompletion = true });
// In case of failure ensure that the input block is faulted too,
// so that its input/output queues are emptied, and any pending
// SendAsync operations are aborted
PropagateFailure(output, input);
return DataflowBlock.Encapsulate(input, output);
async void PropagateFailure(IDataflowBlock block1, IDataflowBlock block2)
{
try { await block1.Completion.ConfigureAwait(false); }
catch (Exception ex) { block2.Fault(ex); }
}
}
public static ITargetBlock<TInput> CreateRetryActionBlock<TInput>(
Func<TInput, Task> action,
RetryExecutionDataflowBlockOptions dataflowBlockOptions)
{
if (action == null) throw new ArgumentNullException(nameof(action));
var block = CreateRetryTransformBlock<TInput, object>(async input =>
{
await action(input).ConfigureAwait(false); return null;
}, dataflowBlockOptions);
var nullTarget = DataflowBlock.NullTarget<RetryResult<TInput, object>>();
block.LinkTo(nullTarget);
return block;
}
I think you pretty much have to do that, you have to track the remaining number of retries for a message and you have to schedule the retried attempt somehow.
But you could make this better by encapsulating it in a separate method. Something like:
// it's a private class, so public fields are okay
private class RetryingMessage<T>
{
public T Data;
public int RetriesRemaining;
public readonly List<Exception> Exceptions = new List<Exception>();
}
public static IPropagatorBlock<TInput, TOutput>
CreateRetryingBlock<TInput, TOutput>(
Func<TInput, Task<TOutput>> transform, int numberOfRetries,
TimeSpan retryDelay, Action<IEnumerable<Exception>> failureHandler)
{
var source = new TransformBlock<TInput, RetryingMessage<TInput>>(
input => new RetryingMessage<TInput>
{ Data = input, RetriesRemaining = numberOfRetries });
// TransformManyBlock, so that we can propagate zero results on failure
TransformManyBlock<RetryingMessage<TInput>, TOutput> target = null;
target = new TransformManyBlock<RetryingMessage<TInput>, TOutput>(
async message =>
{
try
{
return new[] { await transform(message.Data) };
}
catch (Exception ex)
{
message.Exceptions.Add(ex);
if (message.RetriesRemaining == 0)
{
failureHandler(message.Exceptions);
}
else
{
message.RetriesRemaining--;
Task.Delay(retryDelay)
.ContinueWith(_ => target.Post(message));
}
return null;
}
});
source.LinkTo(
target, new DataflowLinkOptions { PropagateCompletion = true });
return DataflowBlock.Encapsulate(source, target);
}
I have added code to track the exceptions, because I think that failures should not be ignored, they should be at the very least logged.
Also, this code doesn't work very well with completion: if there are retries waiting for their delay and you Complete() the block, it will immediately complete and the retries will be lost. If that's a problem for you, you will have to track outstanding reties and complete target when source completes and no retries are waiting.