Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -3074,24 +3074,24 @@ pool:
[health_check_ingesters: <boolean> | default = true]

ha_tracker:
# Enable the distributors HA tracker so that it can accept samples from
# Prometheus HA replicas gracefully (requires labels).
# Enable the HA tracker so that it can accept data from Prometheus HA replicas
# gracefully (requires labels).
# CLI flag: -distributor.ha-tracker.enable
[enable_ha_tracker: <boolean> | default = false]

# Update the timestamp in the KV store for a given cluster/replica only after
# this amount of time has passed since the current stored timestamp.
# The time interval that must pass since the last timestamp update in the KV
# store before updating it again for a given cluster.
# CLI flag: -distributor.ha-tracker.update-timeout
[ha_tracker_update_timeout: <duration> | default = 15s]

# Maximum jitter applied to the update timeout, in order to spread the HA
# heartbeats over time.
# The maximum jitter applied to the update timeout to spread KV store updates
# over time.
# CLI flag: -distributor.ha-tracker.update-timeout-jitter-max
[ha_tracker_update_timeout_jitter_max: <duration> | default = 5s]

# If we don't receive any samples from the accepted replica for a cluster in
# this amount of time we will failover to the next replica we receive a sample
# from. This value must be greater than the update timeout
# The timeout after which a new replica will be accepted if the currently
# elected replica stops sending data. This value must be greater than the
# update timeout plus the maximum jitter.
# CLI flag: -distributor.ha-tracker.failover-timeout
[ha_tracker_failover_timeout: <duration> | default = 30s]

Expand Down
10 changes: 4 additions & 6 deletions pkg/distributor/distributor.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ type Distributor struct {
type Config struct {
PoolConfig PoolConfig `yaml:"pool"`

HATrackerConfig HATrackerConfig `yaml:"ha_tracker"`
HATrackerConfig ha.HATrackerConfig `yaml:"ha_tracker"`

MaxRecvMsgSize int `yaml:"max_recv_msg_size"`
OTLPMaxRecvMsgSize int `yaml:"otlp_max_recv_msg_size"`
Expand Down Expand Up @@ -207,7 +207,7 @@ type OTLPConfig struct {
// RegisterFlags adds the flags required to config this to the given FlagSet
func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
cfg.PoolConfig.RegisterFlags(f)
cfg.HATrackerConfig.RegisterFlags(f)
cfg.HATrackerConfig.RegisterFlagsWithPrefix("distributor.", "", f)
cfg.DistributorRing.RegisterFlags(f)

f.IntVar(&cfg.MaxRecvMsgSize, "distributor.max-recv-msg-size", 100<<20, "remote_write API max receive message size (bytes).")
Expand Down Expand Up @@ -243,9 +243,7 @@ func (cfg *Config) Validate(limits validation.Limits) error {
return errInvalidTenantShardSize
}

haHATrackerConfig := cfg.HATrackerConfig.ToHATrackerConfig()

return haHATrackerConfig.Validate()
return cfg.HATrackerConfig.Validate()
}

const (
Expand All @@ -268,7 +266,7 @@ func New(cfg Config, clientConfig ingester_client.Config, limits *validation.Ove
Title: "Cortex HA Tracker Status",
ReplicaGroupLabel: "Cluster",
}
haTracker, err := ha.NewHATracker(cfg.HATrackerConfig.ToHATrackerConfig(), limits, haTrackerStatusConfig, prometheus.WrapRegistererWithPrefix("cortex_", reg), "distributor-hatracker", log)
haTracker, err := ha.NewHATracker(cfg.HATrackerConfig, limits, haTrackerStatusConfig, prometheus.WrapRegistererWithPrefix("cortex_", reg), "distributor-hatracker", log)
if err != nil {
return nil, err
}
Expand Down
61 changes: 0 additions & 61 deletions pkg/distributor/distributor_ha_tracker.go

This file was deleted.

2 changes: 1 addition & 1 deletion pkg/distributor/distributor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3216,7 +3216,7 @@ func prepare(tb testing.TB, cfg prepConfig) ([]*Distributor, []*mockIngester, []
ringStore, closer := consul.NewInMemoryClient(codec, log.NewNopLogger(), nil)
tb.Cleanup(func() { assert.NoError(tb, closer.Close()) })
mock := kv.PrefixClient(ringStore, "prefix")
distributorCfg.HATrackerConfig = HATrackerConfig{
distributorCfg.HATrackerConfig = ha.HATrackerConfig{
EnableHATracker: true,
KVStore: kv.Config{Mock: mock},
UpdateTimeout: 100 * time.Millisecond,
Expand Down
8 changes: 4 additions & 4 deletions pkg/ha/ha_tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ func (cfg *HATrackerConfig) RegisterFlagsWithPrefix(flagPrefix string, kvPrefix
finalKVPrefix = kvPrefix
}

f.BoolVar(&cfg.EnableHATracker, finalFlagPrefix+"ha-tracker.enable", false, "Enable the HA tracker so that it can accept data from Prometheus HA replicas gracefully.")
f.DurationVar(&cfg.UpdateTimeout, finalFlagPrefix+"ha-tracker.update-timeout", 15*time.Second, "Update the timestamp in the KV store for a given cluster/replicaGroup only after this amount of time has passed since the current stored timestamp.")
f.DurationVar(&cfg.UpdateTimeoutJitterMax, finalFlagPrefix+"ha-tracker.update-timeout-jitter-max", 5*time.Second, "Maximum jitter applied to the update timeout, in order to spread the HA heartbeats over time.")
f.DurationVar(&cfg.FailoverTimeout, finalFlagPrefix+"ha-tracker.failover-timeout", 30*time.Second, "If we don't receive any data from the accepted replica for a cluster/replicaGroup in this amount of time we will failover to the next replica we receive a sample from. This value must be greater than the update timeout")
f.BoolVar(&cfg.EnableHATracker, finalFlagPrefix+"ha-tracker.enable", false, "Enable the HA tracker so that it can accept data from Prometheus HA replicas gracefully (requires labels).")
f.DurationVar(&cfg.UpdateTimeout, finalFlagPrefix+"ha-tracker.update-timeout", 15*time.Second, "The time interval that must pass since the last timestamp update in the KV store before updating it again for a given cluster.")
f.DurationVar(&cfg.UpdateTimeoutJitterMax, finalFlagPrefix+"ha-tracker.update-timeout-jitter-max", 5*time.Second, "The maximum jitter applied to the update timeout to spread KV store updates over time.")
f.DurationVar(&cfg.FailoverTimeout, finalFlagPrefix+"ha-tracker.failover-timeout", 30*time.Second, "The timeout after which a new replica will be accepted if the currently elected replica stops sending data. This value must be greater than the update timeout plus the maximum jitter.")
f.BoolVar(&cfg.EnableStartupSync, finalFlagPrefix+"ha-tracker.enable-startup-sync", false, "[Experimental] If enabled, fetches all tracked keys on startup to populate the local cache. This prevents duplicate GET calls for the same key while the cache is cold, but could cause a spike in GET requests during initialization if the number of tracked keys is large.")

// We want the ability to use different Consul instances for the ring and
Expand Down
8 changes: 4 additions & 4 deletions schemas/cortex-config-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -3718,7 +3718,7 @@
"properties": {
"enable_ha_tracker": {
"default": false,
"description": "Enable the distributors HA tracker so that it can accept samples from Prometheus HA replicas gracefully (requires labels).",
"description": "Enable the HA tracker so that it can accept data from Prometheus HA replicas gracefully (requires labels).",
"type": "boolean",
"x-cli-flag": "distributor.ha-tracker.enable"
},
Expand All @@ -3730,21 +3730,21 @@
},
"ha_tracker_failover_timeout": {
"default": "30s",
"description": "If we don't receive any samples from the accepted replica for a cluster in this amount of time we will failover to the next replica we receive a sample from. This value must be greater than the update timeout",
"description": "The timeout after which a new replica will be accepted if the currently elected replica stops sending data. This value must be greater than the update timeout plus the maximum jitter.",
"type": "string",
"x-cli-flag": "distributor.ha-tracker.failover-timeout",
"x-format": "duration"
},
"ha_tracker_update_timeout": {
"default": "15s",
"description": "Update the timestamp in the KV store for a given cluster/replica only after this amount of time has passed since the current stored timestamp.",
"description": "The time interval that must pass since the last timestamp update in the KV store before updating it again for a given cluster.",
"type": "string",
"x-cli-flag": "distributor.ha-tracker.update-timeout",
"x-format": "duration"
},
"ha_tracker_update_timeout_jitter_max": {
"default": "5s",
"description": "Maximum jitter applied to the update timeout, in order to spread the HA heartbeats over time.",
"description": "The maximum jitter applied to the update timeout to spread KV store updates over time.",
"type": "string",
"x-cli-flag": "distributor.ha-tracker.update-timeout-jitter-max",
"x-format": "duration"
Expand Down
Loading