diff --git a/inflight_batcher_llm/src/custom_metrics_reporter/custom_metrics_reporter.cc b/inflight_batcher_llm/src/custom_metrics_reporter/custom_metrics_reporter.cc index 1e76c8dc..a4865534 100644 --- a/inflight_batcher_llm/src/custom_metrics_reporter/custom_metrics_reporter.cc +++ b/inflight_batcher_llm/src/custom_metrics_reporter/custom_metrics_reporter.cc @@ -43,8 +43,11 @@ const std::vector CustomMetricsReporter::runtime_memory_keys_{ const std::vector CustomMetricsReporter::runtime_memory_labels_{"cpu", "gpu", "pinned"}; const std::vector CustomMetricsReporter::kv_cache_keys_{ - "Max KV cache blocks", "Free KV cache blocks", "Used KV cache blocks", "Tokens per KV cache block"}; -const std::vector CustomMetricsReporter::kv_cache_labels_{"max", "free", "used", "tokens_per"}; + "Max KV cache blocks", "Free KV cache blocks", "Used KV cache blocks", "Tokens per KV cache block", + "Alloc Total KV cache blocks", "Alloc New KV cache blocks", "Reused KV cache blocks"}; +const std::vector CustomMetricsReporter::kv_cache_labels_{ + "max", "free", "used", "tokens_per", + "alloc_total", "alloc_new", "reused"}; const std::vector CustomMetricsReporter::v1_specific_keys_{ "Total Context Tokens", "Total Generation Tokens", "Empty Generation Slots"}; diff --git a/inflight_batcher_llm/src/model_instance_state.cc b/inflight_batcher_llm/src/model_instance_state.cc index ba18de7f..f6e95994 100644 --- a/inflight_batcher_llm/src/model_instance_state.cc +++ b/inflight_batcher_llm/src/model_instance_state.cc @@ -1084,6 +1084,8 @@ void ModelInstanceState::WaitForStats() statJson.append("\"Max KV cache blocks\":" + std::to_string(kvStats.maxNumBlocks) + ","); statJson.append("\"Tokens per KV cache block\":" + std::to_string(kvStats.tokensPerBlock) + ","); statJson.append("\"Used KV cache blocks\":" + std::to_string(kvStats.usedNumBlocks) + ","); + statJson.append("\"Alloc Total KV cache blocks\":" + std::to_string(kvStats.allocTotalBlocks) + ","); + statJson.append("\"Alloc New KV cache blocks\":" + std::to_string(kvStats.allocNewBlocks) + ","); statJson.append("\"Reused KV cache blocks\":" + std::to_string(kvStats.reusedBlocks) + ","); }