我们正在使用 Azure 表存储,并且在执行 InsertOrMerge 操作时偶尔会出现 408 超时。在这种情况下,我们想重试,但似乎没有针对这些错误遵循重试策略。
这是我们用来处理表格交互的一个类。GetFooEntityAsync 方法尝试从表存储中检索实体。如果不能,它会创建一个新的 FooEntity 并将其添加到表中(映射到 FooTableEntity)。
public class FooTableStorageBase
{
private readonly string tableName;
protected readonly CloudStorageAccount storageAccount;
protected TableRequestOptions DefaultTableRequestOptions { get; }
protected OperationContext DefaultOperationContext { get; }
public CloudTable Table
{
get
{
return storageAccount.CreateCloudTableClient().GetTableReference(tableName);
}
}
public FooTableStorage(string tableName)
{
if (String.IsNullOrWhiteSpace(tableName))
{
throw new ArgumentNullException(nameof(tableName));
}
this.tableName = tableName;
storageAccount = CloudStorageAccount.Parse(ConnectionString);
ServicePoint tableServicePoint = ServicePointManager.FindServicePoint(storageAccount.TableEndpoint);
tableServicePoint.UseNagleAlgorithm = false;
tableServicePoint.ConnectionLimit = 100; // Increasing connection limit from default of 2.
DefaultTableRequestOptions = new TableRequestOptions()
{
PayloadFormat = TablePayloadFormat.JsonNoMetadata,
MaximumExecutionTime = TimeSpan.FromSeconds(1),
RetryPolicy = new OnTimeoutRetry(TimeSpan.FromMilliseconds(250), 3),
LocationMode = LocationMode.PrimaryOnly
};
DefaultOperationContext = new OperationContext();
DefaultOperationContext.Retrying += (sender, args) =>
{
// This is never executed.
Debug.WriteLine($"Retry policy activated in {this.GetType().Name} due to HTTP code {args.RequestInformation.HttpStatusCode} with exception {args.RequestInformation.Exception.ToString()}");
};
DefaultOperationContext.RequestCompleted += (sender, args) =>
{
if (args.Response == null)
{
// This is occasionally executed - we want to retry in this case.
Debug.WriteLine($"Request failed in {this.GetType().Name} due to HTTP code {args.RequestInformation.HttpStatusCode} with exception {args.RequestInformation.Exception.ToString()}");
}
else
{
Debug.WriteLine($"{this.GetType().Name} operation complete: Status code {args.Response.StatusCode} at {args.Response.ResponseUri}");
}
};
Table.CreateIfNotExists(DefaultTableRequestOptions, DefaultOperationContext);
}
public async Task<FooEntity> GetFooEntityAsync()
{
var retrieveOperation = TableOperation.Retrieve<FooTableEntity>(FooTableEntity.GenerateKey());
var tableEntity = (await Table.ExecuteAsync(retrieveOperation, DefaultTableRequestOptions, DefaultOperationContext)).Result as FooTableEntity;
if (tableEntity != null)
{
return tableEntity.ToFooEntity();
}
var fooEntity = CalculateFooEntity();
var insertOperation = TableOperation.InsertOrMerge(new FooTableEntity(fooEntity));
var executeResult = await Table.ExecuteAsync(insertOperation);
if (executeResult.HttpStatusCode == 408)
{
// This is never executed.
Debug.WriteLine("Got a 408");
}
return fooEntity;
}
public class OnTimeoutRetry : IRetryPolicy
{
int maxRetryAttempts = 3;
TimeSpan defaultRetryInterval = TimeSpan.FromMilliseconds(250);
public OnTimeoutRetry(TimeSpan deltaBackoff, int retryAttempts)
{
maxRetryAttempts = retryAttempts;
defaultRetryInterval = deltaBackoff;
}
public IRetryPolicy CreateInstance()
{
return new OnTimeoutRetry(TimeSpan.FromMilliseconds(250), 3);
}
public bool ShouldRetry(int currentRetryCount, int statusCode, Exception lastException, out TimeSpan retryInterval, OperationContext operationContext)
{
retryInterval = defaultRetryInterval;
if (currentRetryCount >= maxRetryAttempts)
{
return false;
}
// Non-retryable exceptions are all 400 ( >=400 and <500) class exceptions (Bad gateway, Not Found, etc.) as well as 501 and 505.
// This custom retry policy also retries on a 408 timeout.
if ((statusCode >= 400 && statusCode <= 500 && statusCode != 408) || statusCode == 501 || statusCode == 505)
{
return false;
}
return true;
}
}
}
调用 GetFooEntityAsync() 时,偶尔会执行“请求失败”行。检查值时args.RequestInformation.HttpStatusCode
= 408。但是:
Debug.WriteLine("Got a 408");
在 GetFooEntity 方法中永远不会执行。Debug.WriteLine($"Retry policy activated...
在DefaultOperationContext.Retrying
委托中永远不会执行(我希望这会执行两次 - 这不是重试吗?)。DefaultOperationContext.RequestResults
包含一长串结果(大多数状态码为 404,有些为 204)。
根据这篇(相当旧的)博客文章,代码在 400 和 500 之间以及 501 和 505 之间的异常是不可重试的。然而,超时(408)正是我们想要重试的情况。也许我需要为这种情况编写自定义重试策略。
我不完全理解 408 的来源,因为除了调用 RequestCompleted 委托时我无法在代码中找到它。我一直在为我的重试策略尝试不同的设置,但没有运气。我在这里想念什么?如何从表存储中获得重试 408 的操作?
编辑:我已更新代码以显示我实施的自定义重试策略,以重试 408 错误。但是,似乎我的重试断点仍未被命中,因此似乎未触发重试。我的重试策略未激活的原因可能是什么?