Documentation
adrs/030-distributed-caching-strategy.md
ADR-030: Distributed Caching Strategy
Status
Proposed
Context
As Dynaplex scales to handle more users and data, we need a caching strategy to improve performance and reduce database load. Currently, we have minimal caching, leading to repeated database queries for the same data. With our microservices architecture, we need a distributed caching solution that can be shared across services.
Current challenges:
- Repeated database queries for reference data
- No cache invalidation strategy
- Each service instance has its own memory cache (not shared)
- Poor performance for frequently accessed data
- No caching for API responses
Requirements:
- Distributed cache accessible by all service instances
- Cache invalidation across services
- Support for different cache expiration policies
- High availability and performance
- Cost-effective solution
Decision
We propose implementing a distributed caching strategy with a provider-agnostic abstraction layer, allowing us to evaluate and choose the best caching solution for our needs.
Initial evaluation candidates:
- Redis (leading candidate)
- Azure Cache for Redis (managed Redis)
- NCache
- Hazelcast
- SQL Server In-Memory tables (for specific scenarios)
Consequences
Positive
- Performance: Significantly faster data access
- Scalability: Reduced database load
- Cost Savings: Fewer database queries
- Consistency: Shared cache across instances
- Flexibility: Provider-agnostic design
- Resilience: Services can operate with cached data if DB is down
Negative
- Complexity: Cache invalidation is hard
- Cost: Additional infrastructure required
- Consistency: Potential for stale data
- Debugging: Harder to debug cache-related issues
- Memory: Additional memory requirements
Neutral
- Provider Selection: Must evaluate and choose provider
- Network Latency: Cache calls have network overhead
- Monitoring: Need cache-specific monitoring
Proposed Implementation
Cache Abstraction Layer
public interface IDistributedCacheService
{
Task<T?> GetAsync<T>(string key, CancellationToken cancellationToken = default);
Task SetAsync<T>(string key, T value, CacheOptions? options = null, CancellationToken cancellationToken = default);
Task RemoveAsync(string key, CancellationToken cancellationToken = default);
Task RemoveByPatternAsync(string pattern, CancellationToken cancellationToken = default);
Task<bool> ExistsAsync(string key, CancellationToken cancellationToken = default);
Task<T> GetOrCreateAsync<T>(string key, Func<Task<T>> factory, CacheOptions? options = null, CancellationToken cancellationToken = default);
}
public class CacheOptions
{
public TimeSpan? AbsoluteExpiration { get; set; }
public TimeSpan? SlidingExpiration { get; set; }
public CachePriority Priority { get; set; } = CachePriority.Normal;
public string[]? Tags { get; set; }
}
public enum CachePriority
{
Low,
Normal,
High,
NeverRemove
}
Redis Implementation
public class RedisCacheService : IDistributedCacheService
{
private readonly IConnectionMultiplexer _redis;
private readonly IDatabase _database;
private readonly ILogger<RedisCacheService> _logger;
private readonly JsonSerializerOptions _jsonOptions;
public RedisCacheService(IConnectionMultiplexer redis, ILogger<RedisCacheService> logger)
{
_redis = redis;
_database = redis.GetDatabase();
_logger = logger;
_jsonOptions = new JsonSerializerOptions
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
};
}
public async Task<T?> GetAsync<T>(string key, CancellationToken cancellationToken = default)
{
try
{
var value = await _database.StringGetAsync(key);
if (value.IsNullOrEmpty)
return default;
return JsonSerializer.Deserialize<T>(value!, _jsonOptions);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error getting cache key {Key}", key);
return default; // Fail gracefully
}
}
public async Task SetAsync<T>(string key, T value, CacheOptions? options = null, CancellationToken cancellationToken = default)
{
try
{
var json = JsonSerializer.Serialize(value, _jsonOptions);
var expiry = options?.AbsoluteExpiration ?? TimeSpan.FromMinutes(5);
await _database.StringSetAsync(key, json, expiry);
// Handle tags for invalidation
if (options?.Tags != null)
{
foreach (var tag in options.Tags)
{
await _database.SetAddAsync($"tag:{tag}", key);
}
}
}
catch (Exception ex)
{
_logger.LogError(ex, "Error setting cache key {Key}", key);
// Don't throw - caching should not break the application
}
}
public async Task RemoveByPatternAsync(string pattern, CancellationToken cancellationToken = default)
{
var server = _redis.GetServer(_redis.GetEndPoints().First());
var keys = server.Keys(pattern: pattern).ToArray();
if (keys.Any())
{
await _database.KeyDeleteAsync(keys);
}
}
public async Task<T> GetOrCreateAsync<T>(string key, Func<Task<T>> factory, CacheOptions? options = null, CancellationToken cancellationToken = default)
{
var cached = await GetAsync<T>(key, cancellationToken);
if (cached != null)
return cached;
var value = await factory();
await SetAsync(key, value, options, cancellationToken);
return value;
}
}
Cache Key Strategy
public static class CacheKeys
{
// Hierarchical key structure
public static string Asset(int id) => $"asset:{id}";
public static string AssetsByType(int typeId) => $"assets:type:{typeId}";
public static string AssetsByLocation(int locationId) => $"assets:location:{locationId}";
public static string AssetTypes() => "asset-types:all";
public static string AssetType(int id) => $"asset-type:{id}";
public static string User(Guid userId) => $"user:{userId}";
public static string UserPermissions(Guid userId) => $"user:{userId}:permissions";
public static string LocationHierarchy(int rootId) => $"location:hierarchy:{rootId}";
// Versioned keys for breaking changes
public static string Versioned(string key, int version = 1) => $"v{version}:{key}";
}
Service Integration
public class AssetService
{
private readonly IDistributedCacheService _cache;
private readonly CatalogDb _db;
private readonly IEventBus _eventBus;
public async Task<Asset?> GetAssetAsync(int id)
{
return await _cache.GetOrCreateAsync(
CacheKeys.Asset(id),
async () => await _db.Assets
.Include(a => a.Type)
.Include(a => a.Location)
.FirstOrDefaultAsync(a => a.Id == id),
new CacheOptions
{
AbsoluteExpiration = TimeSpan.FromMinutes(15),
Tags = new[] { "assets" }
});
}
public async Task<Asset> UpdateAssetAsync(int id, UpdateAssetRequest request)
{
var asset = await _db.Assets.FindAsync(id);
// Update asset...
await _db.SaveChangesAsync();
// Invalidate caches
await _cache.RemoveAsync(CacheKeys.Asset(id));
await _cache.RemoveAsync(CacheKeys.AssetsByType(asset.TypeId));
await _cache.RemoveAsync(CacheKeys.AssetsByLocation(asset.LocationId));
// Publish cache invalidation event for other services
await _eventBus.PublishAsync(new CacheInvalidationEvent
{
Keys = new[] { CacheKeys.Asset(id) },
Tags = new[] { "assets" }
});
return asset;
}
}
Cache Invalidation Strategies
1. Time-based Expiration
// Short-lived for frequently changing data
new CacheOptions { AbsoluteExpiration = TimeSpan.FromMinutes(5) }
// Long-lived for reference data
new CacheOptions { AbsoluteExpiration = TimeSpan.FromHours(24) }
// Sliding expiration for active data
new CacheOptions { SlidingExpiration = TimeSpan.FromMinutes(10) }
2. Event-based Invalidation
public class CacheInvalidationHandler : IEventHandler<AssetUpdatedEvent>
{
private readonly IDistributedCacheService _cache;
public async Task HandleAsync(AssetUpdatedEvent @event)
{
await _cache.RemoveAsync(CacheKeys.Asset(@event.AssetId));
await _cache.RemoveByPatternAsync($"assets:*:{@event.AssetId}");
}
}
3. Tag-based Invalidation
public async Task InvalidateTagAsync(string tag)
{
var keys = await _database.SetMembersAsync($"tag:{tag}");
if (keys.Any())
{
await _database.KeyDeleteAsync(keys.Select(k => (RedisKey)k.ToString()).ToArray());
await _database.KeyDeleteAsync($"tag:{tag}");
}
}
Cache Warming
public class CacheWarmingService : BackgroundService
{
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
// Warm up critical caches on startup
await WarmReferenceDataAsync();
// Periodic refresh
using var timer = new PeriodicTimer(TimeSpan.FromHours(1));
while (await timer.WaitForNextTickAsync(stoppingToken))
{
await RefreshExpiringCachesAsync();
}
}
private async Task WarmReferenceDataAsync()
{
// Pre-load frequently accessed reference data
var assetTypes = await _db.AssetTypes.ToListAsync();
await _cache.SetAsync(CacheKeys.AssetTypes(), assetTypes,
new CacheOptions { AbsoluteExpiration = TimeSpan.FromHours(24) });
var locations = await _db.Locations.ToListAsync();
await _cache.SetAsync(CacheKeys.LocationHierarchy(0), locations,
new CacheOptions { AbsoluteExpiration = TimeSpan.FromHours(24) });
}
}
Response Caching
// API response caching
app.MapGet("/api/assets", GetAssets)
.CacheOutput(policy => policy
.Expire(TimeSpan.FromMinutes(5))
.Tag("assets")
.VaryByQuery("page", "pageSize", "filter"));
// Custom cache policy
public class AssetCachePolicy : IOutputCachePolicy
{
public ValueTask CacheRequestAsync(OutputCacheContext context, CancellationToken cancellation)
{
var attemptOutputCaching = context.HttpContext.Request.Method == HttpMethods.Get;
context.EnableOutputCaching = attemptOutputCaching;
context.AllowCacheLookup = attemptOutputCaching;
context.AllowCacheStorage = attemptOutputCaching;
context.AllowLocking = true;
context.CacheVaryByRules.QueryKeys = "*";
context.ResponseExpirationTimeSpan = TimeSpan.FromMinutes(5);
return ValueTask.CompletedTask;
}
}
Monitoring and Metrics
public class CacheMetrics
{
private readonly IMeterProvider _meterProvider;
private readonly Counter<long> _hits;
private readonly Counter<long> _misses;
private readonly Histogram<double> _duration;
public CacheMetrics(IMeterProvider meterProvider)
{
var meter = meterProvider.GetMeter("Dynaplex.Cache");
_hits = meter.CreateCounter<long>("cache.hits");
_misses = meter.CreateCounter<long>("cache.misses");
_duration = meter.CreateHistogram<double>("cache.operation.duration", "ms");
}
public void RecordHit(string key) => _hits.Add(1, new("key", key));
public void RecordMiss(string key) => _misses.Add(1, new("key", key));
public void RecordDuration(double ms, string operation) =>
_duration.Record(ms, new("operation", operation));
}
Configuration
{
"Caching": {
"Provider": "Redis",
"Redis": {
"Configuration": "localhost:6379,abortConnect=false",
"InstanceName": "dynaplex",
"DefaultExpiration": "00:05:00"
},
"Policies": {
"ReferenceData": {
"Duration": "24:00:00",
"Priority": "High"
},
"UserSession": {
"Duration": "00:20:00",
"SlidingExpiration": true
},
"ApiResponse": {
"Duration": "00:01:00",
"VaryByUser": true
}
}
}
}
Evaluation Criteria for Providers
- Performance: Latency, throughput
- Scalability: Clustering, sharding support
- Features: Data structures, pub/sub, transactions
- Cost: Infrastructure and operational costs
- Maintenance: Managed vs self-hosted
- Integration: .NET client quality
Migration Plan
- Phase 1: Implement cache abstraction
- Phase 2: Deploy Redis in development
- Phase 3: Cache reference data
- Phase 4: Add response caching
- Phase 5: Implement invalidation
- Phase 6: Monitor and optimize
Related ADRs
- ADR-027: Event-Driven Architecture (cache invalidation events)
- ADR-019: OpenTelemetry Integration (cache metrics)
- ADR-031: API Gateway Pattern (centralized caching)