cortexproject · SungJin1212 · Jan 30, 2026
diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md
@@ -3074,24 +3074,24 @@ pool:
   [health_check_ingesters: <boolean> | default = true]
 
 ha_tracker:
-  # Enable the distributors HA tracker so that it can accept samples from
-  # Prometheus HA replicas gracefully (requires labels).
+  # Enable the HA tracker so that it can accept data from Prometheus HA replicas
+  # gracefully (requires labels).
   # CLI flag: -distributor.ha-tracker.enable
   [enable_ha_tracker: <boolean> | default = false]
 
-  # Update the timestamp in the KV store for a given cluster/replica only after
-  # this amount of time has passed since the current stored timestamp.
+  # The time interval that must pass since the last timestamp update in the KV
+  # store before updating it again for a given cluster.
   # CLI flag: -distributor.ha-tracker.update-timeout
   [ha_tracker_update_timeout: <duration> | default = 15s]
 
-  # Maximum jitter applied to the update timeout, in order to spread the HA
-  # heartbeats over time.
+  # The maximum jitter applied to the update timeout to spread KV store updates
+  # over time.
   # CLI flag: -distributor.ha-tracker.update-timeout-jitter-max
   [ha_tracker_update_timeout_jitter_max: <duration> | default = 5s]
 
-  # If we don't receive any samples from the accepted replica for a cluster in
-  # this amount of time we will failover to the next replica we receive a sample
-  # from. This value must be greater than the update timeout
+  # The timeout after which a new replica will be accepted if the currently
+  # elected replica stops sending data. This value must be greater than the
+  # update timeout plus the maximum jitter.
   # CLI flag: -distributor.ha-tracker.failover-timeout
   [ha_tracker_failover_timeout: <duration> | default = 30s]
 

diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go
@@ -144,7 +144,7 @@ type Distributor struct {
 type Config struct {
 	PoolConfig PoolConfig `yaml:"pool"`
 
-	HATrackerConfig HATrackerConfig `yaml:"ha_tracker"`
+	HATrackerConfig ha.HATrackerConfig `yaml:"ha_tracker"`
 
 	MaxRecvMsgSize     int           `yaml:"max_recv_msg_size"`
 	OTLPMaxRecvMsgSize int           `yaml:"otlp_max_recv_msg_size"`
@@ -207,7 +207,7 @@ type OTLPConfig struct {
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.PoolConfig.RegisterFlags(f)
-	cfg.HATrackerConfig.RegisterFlags(f)
+	cfg.HATrackerConfig.RegisterFlagsWithPrefix("distributor.", "", f)
 	cfg.DistributorRing.RegisterFlags(f)
 
 	f.IntVar(&cfg.MaxRecvMsgSize, "distributor.max-recv-msg-size", 100<<20, "remote_write API max receive message size (bytes).")
@@ -243,9 +243,7 @@ func (cfg *Config) Validate(limits validation.Limits) error {
 		return errInvalidTenantShardSize
 	}
 
-	haHATrackerConfig := cfg.HATrackerConfig.ToHATrackerConfig()
-
-	return haHATrackerConfig.Validate()
+	return cfg.HATrackerConfig.Validate()
 }
 
 const (
@@ -268,7 +266,7 @@ func New(cfg Config, clientConfig ingester_client.Config, limits *validation.Ove
 		Title:             "Cortex HA Tracker Status",
 		ReplicaGroupLabel: "Cluster",
 	}
-	haTracker, err := ha.NewHATracker(cfg.HATrackerConfig.ToHATrackerConfig(), limits, haTrackerStatusConfig, prometheus.WrapRegistererWithPrefix("cortex_", reg), "distributor-hatracker", log)
+	haTracker, err := ha.NewHATracker(cfg.HATrackerConfig, limits, haTrackerStatusConfig, prometheus.WrapRegistererWithPrefix("cortex_", reg), "distributor-hatracker", log)
 	if err != nil {
 		return nil, err
 	}

diff --git a/pkg/distributor/distributor_ha_tracker.go b/pkg/distributor/distributor_ha_tracker.go
diff --git a/pkg/distributor/distributor_test.go b/pkg/distributor/distributor_test.go
@@ -3216,7 +3216,7 @@ func prepare(tb testing.TB, cfg prepConfig) ([]*Distributor, []*mockIngester, []
 			ringStore, closer := consul.NewInMemoryClient(codec, log.NewNopLogger(), nil)
 			tb.Cleanup(func() { assert.NoError(tb, closer.Close()) })
 			mock := kv.PrefixClient(ringStore, "prefix")
-			distributorCfg.HATrackerConfig = HATrackerConfig{
+			distributorCfg.HATrackerConfig = ha.HATrackerConfig{
 				EnableHATracker: true,
 				KVStore:         kv.Config{Mock: mock},
 				UpdateTimeout:   100 * time.Millisecond,

diff --git a/pkg/ha/ha_tracker.go b/pkg/ha/ha_tracker.go
@@ -92,10 +92,10 @@ func (cfg *HATrackerConfig) RegisterFlagsWithPrefix(flagPrefix string, kvPrefix
 		finalKVPrefix = kvPrefix
 	}
 
-	f.BoolVar(&cfg.EnableHATracker, finalFlagPrefix+"ha-tracker.enable", false, "Enable the HA tracker so that it can accept data from Prometheus HA replicas gracefully.")
-	f.DurationVar(&cfg.UpdateTimeout, finalFlagPrefix+"ha-tracker.update-timeout", 15*time.Second, "Update the timestamp in the KV store for a given cluster/replicaGroup only after this amount of time has passed since the current stored timestamp.")
-	f.DurationVar(&cfg.UpdateTimeoutJitterMax, finalFlagPrefix+"ha-tracker.update-timeout-jitter-max", 5*time.Second, "Maximum jitter applied to the update timeout, in order to spread the HA heartbeats over time.")
-	f.DurationVar(&cfg.FailoverTimeout, finalFlagPrefix+"ha-tracker.failover-timeout", 30*time.Second, "If we don't receive any data from the accepted replica for a cluster/replicaGroup in this amount of time we will failover to the next replica we receive a sample from. This value must be greater than the update timeout")
+	f.BoolVar(&cfg.EnableHATracker, finalFlagPrefix+"ha-tracker.enable", false, "Enable the HA tracker so that it can accept data from Prometheus HA replicas gracefully (requires labels).")
+	f.DurationVar(&cfg.UpdateTimeout, finalFlagPrefix+"ha-tracker.update-timeout", 15*time.Second, "The time interval that must pass since the last timestamp update in the KV store before updating it again for a given cluster.")
+	f.DurationVar(&cfg.UpdateTimeoutJitterMax, finalFlagPrefix+"ha-tracker.update-timeout-jitter-max", 5*time.Second, "The maximum jitter applied to the update timeout to spread KV store updates over time.")
+	f.DurationVar(&cfg.FailoverTimeout, finalFlagPrefix+"ha-tracker.failover-timeout", 30*time.Second, "The timeout after which a new replica will be accepted if the currently elected replica stops sending data. This value must be greater than the update timeout plus the maximum jitter.")
 	f.BoolVar(&cfg.EnableStartupSync, finalFlagPrefix+"ha-tracker.enable-startup-sync", false, "[Experimental] If enabled, fetches all tracked keys on startup to populate the local cache. This prevents duplicate GET calls for the same key while the cache is cold, but could cause a spike in GET requests during initialization if the number of tracked keys is large.")
 
 	// We want the ability to use different Consul instances for the ring and

diff --git a/schemas/cortex-config-schema.json b/schemas/cortex-config-schema.json
@@ -3718,7 +3718,7 @@
           "properties": {
             "enable_ha_tracker": {
               "default": false,
-              "description": "Enable the distributors HA tracker so that it can accept samples from Prometheus HA replicas gracefully (requires labels).",
+              "description": "Enable the HA tracker so that it can accept data from Prometheus HA replicas gracefully (requires labels).",
               "type": "boolean",
               "x-cli-flag": "distributor.ha-tracker.enable"
             },
@@ -3730,21 +3730,21 @@
             },
             "ha_tracker_failover_timeout": {
               "default": "30s",
-              "description": "If we don't receive any samples from the accepted replica for a cluster in this amount of time we will failover to the next replica we receive a sample from. This value must be greater than the update timeout",
+              "description": "The timeout after which a new replica will be accepted if the currently elected replica stops sending data. This value must be greater than the update timeout plus the maximum jitter.",
               "type": "string",
               "x-cli-flag": "distributor.ha-tracker.failover-timeout",
               "x-format": "duration"
             },
             "ha_tracker_update_timeout": {
               "default": "15s",
-              "description": "Update the timestamp in the KV store for a given cluster/replica only after this amount of time has passed since the current stored timestamp.",
+              "description": "The time interval that must pass since the last timestamp update in the KV store before updating it again for a given cluster.",
               "type": "string",
               "x-cli-flag": "distributor.ha-tracker.update-timeout",
               "x-format": "duration"
             },
             "ha_tracker_update_timeout_jitter_max": {
               "default": "5s",
-              "description": "Maximum jitter applied to the update timeout, in order to spread the HA heartbeats over time.",
+              "description": "The maximum jitter applied to the update timeout to spread KV store updates over time.",
               "type": "string",
               "x-cli-flag": "distributor.ha-tracker.update-timeout-jitter-max",
               "x-format": "duration"