Skip to content

Fix FhirPath performance regression by optimizing cache cleanup #3219

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 31 additions & 4 deletions src/Hl7.Fhir.Base/Utility/Cache.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,18 @@ internal class CacheItem<V>
public DateTimeOffset LastAccessed { get; private set; } = DateTimeOffset.Now;

private V _value;
private volatile bool _accessedRecently = false;

internal V Value
{
get
{
LastAccessed = DateTimeOffset.Now;
// Only update LastAccessed occasionally to reduce allocation overhead
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What allocation overhead? The LastAccessed property is DateTimeOffset, so it will be a struct and updating it's value will just change value in memory?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're absolutely correct. DateTimeOffset is a struct, so updating it doesn't cause heap allocation. My comment was misleading. The real performance cost comes from calling DateTimeOffset.Now on every cache access, which involves system calls and has CPU overhead, not memory allocation overhead. I'll clarify this and focus on finding the actual performance bottleneck.

if (!_accessedRecently)
{
LastAccessed = DateTimeOffset.Now;
_accessedRecently = true;
}
return _value;
}
set
Expand All @@ -38,6 +45,9 @@ internal V Value
}
}
}

// Method to reset the recently accessed flag during cache cleanup
internal void ResetAccessFlag() => _accessedRecently = false;
}

public class CacheSettings
Expand Down Expand Up @@ -68,6 +78,8 @@ public class Cache<K, V> where V : class
private readonly ConcurrentDictionary<K, CacheItem<V>> _cached;
private readonly int _minimumCacheSize;
private readonly Func<K, CacheItem<V>> _retriever;
private volatile int _accessCount = 0;
private const int CLEANUP_INTERVAL = 100; // Check for cleanup every 100 accesses

/// <summary>
/// The settings for changing the behaviour of the case. Passed into the constructor and readonly here.
Expand Down Expand Up @@ -113,7 +125,7 @@ public V GetValue(K key)
else
{
var cachedItem = _cached.GetOrAdd(key, _retriever);
enforceMaxItems();
enforceMaxItemsIfNeeded();
return cachedItem.Value;
}
}
Expand All @@ -127,16 +139,31 @@ public V GetValue(K key)
public V GetValueOrAdd(K key, V value)
{
var cachedItem = _cached.GetOrAdd(key, new CacheItem<V>(value));
enforceMaxItems();
enforceMaxItemsIfNeeded();

return cachedItem.Value;
}

private void enforceMaxItemsIfNeeded()
{
// Only check for cleanup periodically to avoid expensive operations on every access
if (System.Threading.Interlocked.Increment(ref _accessCount) % CLEANUP_INTERVAL == 0)
{
enforceMaxItems();
}
}

private void enforceMaxItems()
{
var currentCount = _cached.Count();
var currentCount = _cached.Count; // Use Count property instead of Count() method
if (currentCount > Settings.MaxCacheSize)
{
// Reset access flags before cleanup to ensure proper LRU behavior
foreach (var item in _cached.Values)
{
item.ResetAccessFlag();
}

// first copy the key value pairs in an array. Otherwise we could have a race condition. See for more information:
// https://stackoverflow.com/questions/11692389/getting-argument-exception-in-concurrent-dictionary-when-sorting-and-displaying
var copy = _cached.ToArray();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
using System;
using System.Diagnostics;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using Hl7.Fhir.ElementModel;
using Hl7.Fhir.Model;
using Hl7.Fhir.FhirPath;
using Hl7.FhirPath;

namespace Hl7.Fhir
{
[TestClass]
public class FhirPathCachePerformanceTests
{
[TestMethod]
public void FhirPathCachePerformanceRegression()
{
// Create a test patient for FhirPath evaluation
var patient = new Patient
{
Id = "test-patient",
Name = { new HumanName { Family = "Doe", Given = new[] { "John" } } },
BirthDate = "1990-01-01",
Gender = AdministrativeGender.Male
};

// Test expressions that would be commonly cached
var expressions = new[]
{
"name.family",
"name.given.first()",
"birthDate",
"gender",
"name.where(family = 'Doe').given",
"extension.where(url = 'test').value",
"id",
"name.given.last()"
};

var cache = new FhirPathCompilerCache();
var typedElement = patient.ToTypedElement();

// Warmup - this will populate the cache with the expressions
foreach (var expr in expressions)
{
cache.Select(typedElement, expr);
}

// Now measure performance of cached evaluations
var sw = Stopwatch.StartNew();
int iterations = 10000;

for (int i = 0; i < iterations; i++)
{
foreach (var expr in expressions)
{
var result = cache.Select(typedElement, expr);
// Consume the result to ensure evaluation happens
foreach (var item in result) { }
}
}

sw.Stop();

Console.WriteLine($"FhirPath cache performance: {sw.ElapsedMilliseconds}ms for {iterations * expressions.Length} cached evaluations");
Console.WriteLine($"Average time per evaluation: {(double)sw.ElapsedMilliseconds / (iterations * expressions.Length):F3}ms");

// The test should complete in reasonable time even with many iterations
// This is not an exact performance test but ensures we don't have major regressions
Assert.IsTrue(sw.ElapsedMilliseconds < 5000,
$"FhirPath cache performance regression detected. Took {sw.ElapsedMilliseconds}ms for {iterations * expressions.Length} evaluations");
}

[TestMethod]
public void FhirPathCacheEvictionPerformance()
{
var cache = new FhirPathCompilerCache(cacheSize: 100); // Small cache to trigger eviction
var patient = new Patient { Id = "test" };
var typedElement = patient.ToTypedElement();

var sw = Stopwatch.StartNew();

// Create many unique expressions to trigger cache eviction
for (int i = 0; i < 500; i++) // More than cache size to trigger cleanup
{
var expr = $"id.where(value = 'test{i}')";
cache.Select(typedElement, expr);
}

sw.Stop();

Console.WriteLine($"Cache eviction test: {sw.ElapsedMilliseconds}ms for 500 unique expressions");

// Should handle cache eviction efficiently
Assert.IsTrue(sw.ElapsedMilliseconds < 2000,
$"Cache eviction performance issue detected. Took {sw.ElapsedMilliseconds}ms");
}
}
}
Loading