From 2f12a59e6debaa48f0e781ad1b748283e88f8dbe Mon Sep 17 00:00:00 2001 From: Aayush Kataria Date: Tue, 24 Feb 2026 09:08:06 -0800 Subject: [PATCH 1/5] Adding query advisor --- sdk/cosmos/azure-cosmos/azure/cosmos/_base.py | 3 + .../azure/cosmos/_cosmos_client_connection.py | 4 + .../azure/cosmos/_query_advisor/__init__.py | 18 ++ .../_query_advisor/_get_query_advice_info.py | 43 ++++ .../cosmos/_query_advisor/_query_advice.py | 145 ++++++++++++ .../cosmos/_query_advisor/_rule_directory.py | 68 ++++++ .../_query_advisor/query_advice_rules.json | 55 +++++ .../azure/cosmos/aio/_container.py | 13 ++ .../aio/_cosmos_client_connection_async.py | 4 + .../azure-cosmos/azure/cosmos/container.py | 10 + .../azure/cosmos/http_constants.py | 2 + .../azure-cosmos/tests/test_none_options.py | 3 +- .../tests/test_none_options_async.py | 3 +- sdk/cosmos/azure-cosmos/tests/test_query.py | 86 +++++++ .../azure-cosmos/tests/test_query_advisor.py | 209 ++++++++++++++++++ .../azure-cosmos/tests/test_query_async.py | 88 ++++++++ .../tests/test_query_cross_partition.py | 82 +++++++ .../tests/test_query_cross_partition_async.py | 82 +++++++ 18 files changed, 916 insertions(+), 2 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/__init__.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_get_query_advice_info.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_query_advice.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_rule_directory.py create mode 100644 sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/query_advice_rules.json create mode 100644 sdk/cosmos/azure-cosmos/tests/test_query_advisor.py diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py index 0ad2ce0ad820..fb8e78d232db 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_base.py @@ -359,6 +359,9 @@ def GetHeaders( # pylint: disable=too-many-statements,too-many-branches if options.get("populateIndexMetrics"): headers[http_constants.HttpHeaders.PopulateIndexMetrics] = options["populateIndexMetrics"] + if options.get("populateQueryAdvice"): + headers[http_constants.HttpHeaders.PopulateQueryAdvice] = options["populateQueryAdvice"] + if options.get("responseContinuationTokenLimitInKb"): headers[http_constants.HttpHeaders.ResponseContinuationTokenLimitInKb] = options[ "responseContinuationTokenLimitInKb"] diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py index 76803f1a2edc..0301529bc0c7 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_cosmos_client_connection.py @@ -71,6 +71,7 @@ from ._request_object import RequestObject from ._retry_utility import ConnectionRetryPolicy from ._routing import routing_map_provider, routing_range +from ._query_advisor import get_query_advice_info from ._inference_service import _InferenceService from .documents import ConnectionPolicy, DatabaseAccount from .partition_key import ( @@ -3376,6 +3377,9 @@ def __GetBodiesFromQueryResult(result: dict[str, Any]) -> list[dict[str, Any]]: INDEX_METRICS_HEADER = http_constants.HttpHeaders.IndexUtilization index_metrics_raw = last_response_headers[INDEX_METRICS_HEADER] last_response_headers[INDEX_METRICS_HEADER] = _utils.get_index_metrics_info(index_metrics_raw) + if last_response_headers.get(http_constants.HttpHeaders.QueryAdvice) is not None: + query_advice_raw = last_response_headers[http_constants.HttpHeaders.QueryAdvice] + last_response_headers[http_constants.HttpHeaders.QueryAdvice] = get_query_advice_info(query_advice_raw) if response_hook: response_hook(last_response_headers, result) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/__init__.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/__init__.py new file mode 100644 index 000000000000..795d62192188 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/__init__.py @@ -0,0 +1,18 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- + +"""Query Advisor module for processing query optimization advice from Azure Cosmos DB.""" + +from ._query_advice import QueryAdvice, QueryAdviceEntry +from ._rule_directory import RuleDirectory +from ._get_query_advice_info import get_query_advice_info + +__all__ = [ + "QueryAdvice", + "QueryAdviceEntry", + "RuleDirectory", + "get_query_advice_info", +] diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_get_query_advice_info.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_get_query_advice_info.py new file mode 100644 index 000000000000..b7c3c6007f41 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_get_query_advice_info.py @@ -0,0 +1,43 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- + +"""Function for processing query advice response headers.""" + +from typing import Optional + +from ._query_advice import QueryAdvice + + +def get_query_advice_info(header_value: Optional[str]) -> str: + """Process a query advice response header into a formatted string. + + This function takes the raw query advice response header (URL-encoded JSON), + decodes it, parses the query advice entries, enriches them with human-readable + messages from the rule directory, and returns a formatted multi-line string. + + Args: + header_value: The raw query advice response header value (URL-encoded JSON) + + Returns: + Formatted string with query advice entries, or empty string if parsing fails + + Example: + >>> header = "QA1002%3A%20Instead%20of%20CONTAINS..." + >>> advice = get_query_advice_info(header) + >>> print(advice) + QA1002: Instead of CONTAINS, consider using STARTSWTIH or computed properties... + """ + if header_value is None: + return "" + + # Parse the query advice from the header + query_advice = QueryAdvice.try_create_from_string(header_value) + + if query_advice is None: + return "" + + # Format as string + return query_advice.to_string() diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_query_advice.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_query_advice.py new file mode 100644 index 000000000000..2baed1b412f2 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_query_advice.py @@ -0,0 +1,145 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- + +"""Query advice classes for parsing and formatting query optimization recommendations.""" + +import json +from typing import Any, Dict, List, Optional +from urllib.parse import unquote + +from ._rule_directory import RuleDirectory + + +class QueryAdviceEntry: + """Represents a single query advice entry. + + Each entry contains a rule ID and optional parameters that provide + specific guidance for query optimization. + """ + + def __init__(self, rule_id: str, parameters: Optional[List[str]] = None) -> None: + """Initialize a query advice entry. + + Args: + rule_id: The rule identifier (e.g., "QA1000") + parameters: Optional list of parameters for the rule message + """ + self.id = rule_id + self.parameters = parameters or [] + + def to_string(self, rule_directory: RuleDirectory) -> Optional[str]: + """Format the query advice entry as a human-readable string. + + Args: + rule_directory: Rule directory instance for looking up messages + + Returns: + Formatted string with rule ID, message, and documentation link, + or None if the rule message cannot be found + """ + if self.id is None: + return None + + message = rule_directory.get_rule_message(self.id) + if message is None: + return None + + # Format: {id}: {message}. For more information, please visit {url_prefix}{id} + result = f"{self.id}: " + + # Format message with parameters if available + if self.parameters: + try: + result += message.format(*self.parameters) + except (IndexError, KeyError): + # If formatting fails, use message as-is + result += message + else: + result += message + + # Add documentation link + result += f" For more information, please visit {rule_directory.url_prefix}{self.id}" + + return result + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "QueryAdviceEntry": + """Create a QueryAdviceEntry from a dictionary. + + Args: + data: Dictionary with "Id" and optional "Params" keys + + Returns: + QueryAdviceEntry instance + """ + rule_id = data.get("Id", "") + parameters = data.get("Params", []) + return cls(rule_id, parameters) + + +class QueryAdvice: + """Collection of query advice entries. + + Represents the complete query advice response from Azure Cosmos DB, + containing one or more optimization recommendations. + """ + + def __init__(self, entries: Optional[List[QueryAdviceEntry]] = None) -> None: + """Initialize query advice with a list of entries. + + Args: + entries: List of QueryAdviceEntry objects + """ + self.entries = [e for e in (entries or []) if e is not None] + + def to_string(self) -> str: + """Format all query advice entries as a multi-line string. + + Returns: + Formatted string with each entry on a separate line + """ + if not self.entries: + return "" + + rule_directory = RuleDirectory() + lines = [] + + for entry in self.entries: + formatted = entry.to_string(rule_directory) + if formatted: + lines.append(formatted) + + return "\n".join(lines) + + @classmethod + def try_create_from_string(cls, response_header: Optional[str]) -> Optional["QueryAdvice"]: + """Parse query advice from a URL-encoded JSON response header. + + Args: + response_header: URL-encoded JSON string from the response header + + Returns: + QueryAdvice instance if parsing succeeds, None otherwise + """ + if response_header is None: + return None + + try: + # URL-decode the header value + decoded_string = unquote(response_header) + + # Parse JSON into list of entry dictionaries + data = json.loads(decoded_string) + + if not isinstance(data, list): + return None + + # Convert dictionaries to QueryAdviceEntry objects + entries = [QueryAdviceEntry.from_dict(item) for item in data if isinstance(item, dict)] + + return cls(entries) + except (json.JSONDecodeError, ValueError, AttributeError): + return None diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_rule_directory.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_rule_directory.py new file mode 100644 index 000000000000..52338a9afcc0 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_rule_directory.py @@ -0,0 +1,68 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- + +"""Rule directory singleton for loading and accessing query advice rules.""" + +import json +import os +from typing import Any, Dict, Optional + + +class RuleDirectory: + """Singleton for loading and accessing query advice rules. + + The rule directory lazy-loads the query_advice_rules.json file + and provides access to rule messages and URL prefix. + """ + + _instance: Optional["RuleDirectory"] = None + + def __new__(cls) -> "RuleDirectory": + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance._initialized = False + return cls._instance + + def __init__(self) -> None: + if self._initialized: + return + + self._initialized = True + self._rules: Dict[str, Dict[str, Any]] = {} + self._url_prefix: str = "" + self._load_rules() + + def _load_rules(self) -> None: + """Load rules from the JSON file.""" + try: + rules_file = os.path.join(os.path.dirname(__file__), "query_advice_rules.json") + with open(rules_file, "r", encoding="utf-8") as f: + data = json.load(f) + self._url_prefix = data.get("url_prefix", "") + self._rules = data.get("rules", {}) + except (IOError, json.JSONDecodeError): + # If we can't load rules, use empty defaults + self._url_prefix = "https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/query/queryadvisor/" + self._rules = {} + + @property + def url_prefix(self) -> str: + """Get the URL prefix for documentation links.""" + return self._url_prefix + + def get_rule_message(self, rule_id: str) -> Optional[str]: + """Get the message for a given rule ID. + + Args: + rule_id: The rule identifier (e.g., "QA1000") + + Returns: + The rule message, or None if the rule is not found + """ + rule = self._rules.get(rule_id) + if rule: + return rule.get("message") + return None diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/query_advice_rules.json b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/query_advice_rules.json new file mode 100644 index 000000000000..c6ade8f48638 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/query_advice_rules.json @@ -0,0 +1,55 @@ +{ + "url_prefix": "https://aka.ms/CosmosDB/QueryAdvisor/", + "rules": { + "QA1000": { + "name": "PartialArrayContains", + "description": "Query uses ARRAY_CONTAINS with the the third argument set to true.", + "message": "Instead of ARRAY_CONTAINS, consider using EXISTS with a subquery, which may improve performance." + }, + "QA1001": { + "name": "DistinctAndJoin", + "description": "Query uses Distinct and Join.", + "message": "Instead of DISTINCT with a JOIN, consider using EXISTS with a subquery, which may improve performance." + }, + "QA1002": { + "name": "Contains", + "description": "Query uses CONTAINS.", + "message": "If you are matching on a string prefix, consider using STARTSWITH." + }, + "QA1003": { + "name": "CaseInsensitiveStartsWithOrStringEquals", + "description": "Query uses case-insensitive string search functions STARTSWITH or StringEquals.", + "message": "Instead of case-insensitive string search, consider creating a computed property with LOWER on the string field, which may improve performance." + }, + "QA1004": { + "name": "CaseInsensitiveEndsWith", + "description": "Query uses case-insensitive ENDSWITH.", + "message": "Instead of case-insensitive ENDSWITH, consider creating a computed property with REVERSE on the string field, and use STARTSWITH for comparison, which may improve performance." + }, + "QA1005": { + "name": "GroupByComputedProperty", + "description": "Query uses deterministic scalar expressions in Group By clause.", + "message": "Instead of using scalar expressions in GROUP BY clause, consider creating computed properties of these expressions, which may improve performance." + }, + "QA1006": { + "name": "UpperLowerComparison", + "description": "Query uses Upper or Lower string comparison.", + "message": "Consider defining a computed property on the UPPER/LOWER function expression." + }, + "QA1007": { + "name": "GetCurrentDateTime", + "description": "Query uses GetCurrentDateTime.", + "message": "Consider using GetCurrentDateTimeStatic instead of GetCurrentDateTime in the WHERE clause." + }, + "QA1008": { + "name": "GetCurrentTicks", + "description": "Query uses GetCurrentTicks.", + "message": "Consider using GetCurrentTicksStatic instead of GetCurrentTicks in the WHERE clause." + }, + "QA1009": { + "name": "GetCurrentTimestamp", + "description": "Query uses GetCurrentTimestamp.", + "message": "Consider using GetCurrentTimestampStatic instead of GetCurrentTimestamp in the WHERE clause." + } + } +} diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py index 1baf25be0c06..6b8e280f6fca 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_container.py @@ -549,6 +549,7 @@ def query_items( partition_key: PartitionKeyType, populate_index_metrics: Optional[bool] = None, populate_query_metrics: Optional[bool] = None, + populate_query_advice: Optional[bool] = None, priority: Optional[Literal["High", "Low"]] = None, response_hook: Optional[Callable[[Mapping[str, str], dict[str, Any]], None]] = None, session_token: Optional[str] = None, @@ -590,6 +591,8 @@ def query_items( existing indexes and how it could use potential new indexes. Please note that this option will incur overhead, so it should be enabled only when debugging slow queries. :keyword bool populate_query_metrics: Enable returning query metrics in response headers. + :keyword bool populate_query_advice: Used to obtain the query advice to understand aspects of the query that can + be optimized. :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. @@ -637,6 +640,7 @@ def query_items( parameters: Optional[list[dict[str, object]]] = None, populate_index_metrics: Optional[bool] = None, populate_query_metrics: Optional[bool] = None, + populate_query_advice: Optional[bool] = None, priority: Optional[Literal["High", "Low"]] = None, response_hook: Optional[Callable[[Mapping[str, str], dict[str, Any]], None]] = None, session_token: Optional[str] = None, @@ -675,6 +679,8 @@ def query_items( existing indexes and how it could use potential new indexes. Please note that this option will incur overhead, so it should be enabled only when debugging slow queries. :keyword bool populate_query_metrics: Enable returning query metrics in response headers. + :keyword bool populate_query_advice: Used to obtain the query advice to understand aspects of the query that can + be optimized. :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. @@ -721,6 +727,7 @@ def query_items( parameters: Optional[list[dict[str, object]]] = None, populate_index_metrics: Optional[bool] = None, populate_query_metrics: Optional[bool] = None, + populate_query_advice: Optional[bool] = None, priority: Optional[Literal["High", "Low"]] = None, response_hook: Optional[Callable[[Mapping[str, str], dict[str, Any]], None]] = None, session_token: Optional[str] = None, @@ -758,6 +765,8 @@ def query_items( existing indexes and how it could use potential new indexes. Please note that this option will incur overhead, so it should be enabled only when debugging slow queries. :keyword bool populate_query_metrics: Enable returning query metrics in response headers. + :keyword bool populate_query_advice: Used to obtain the query advice to understand aspects of the query that can + be optimized. :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. @@ -834,6 +843,8 @@ def query_items( :keyword bool populate_index_metrics: Used to obtain the index metrics to understand how the query engine used existing indexes and how it could use potential new indexes. Please note that this option will incur overhead, so it should be enabled only when debugging slow queries. + :keyword bool populate_query_advice: Used to obtain the query advice to understand aspects of the query that can + be optimized. :keyword bool populate_query_metrics: Enable returning query metrics in response headers. :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled @@ -877,6 +888,8 @@ def query_items( feed_options["populateQueryMetrics"] = kwargs.pop("populate_query_metrics") if utils.valid_key_value_exist(kwargs, "populate_index_metrics"): feed_options["populateIndexMetrics"] = kwargs.pop("populate_index_metrics") + if utils.valid_key_value_exist(kwargs, "populate_query_advice"): + feed_options["populateQueryAdvice"] = kwargs.pop("populate_query_advice") if utils.valid_key_value_exist(kwargs, "enable_scan_in_query"): feed_options["enableScanInQuery"] = kwargs.pop("enable_scan_in_query") if utils.valid_key_value_exist(kwargs, "max_integrated_cache_staleness_in_ms"): diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py index 5e20f28d311c..f05dc85fde37 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/aio/_cosmos_client_connection_async.py @@ -56,6 +56,7 @@ from ..documents import ConnectionPolicy, DatabaseAccount from .._constants import _Constants as Constants from .._cosmos_responses import CosmosDict, CosmosList +from .._query_advisor import get_query_advice_info from .. import http_constants, exceptions from . import _query_iterable_async as query_iterable from .. import _runtime_constants as runtime_constants @@ -3176,6 +3177,9 @@ def __GetBodiesFromQueryResult(result: dict[str, Any]) -> list[dict[str, Any]]: INDEX_METRICS_HEADER = http_constants.HttpHeaders.IndexUtilization index_metrics_raw = self.last_response_headers[INDEX_METRICS_HEADER] self.last_response_headers[INDEX_METRICS_HEADER] = _utils.get_index_metrics_info(index_metrics_raw) + if self.last_response_headers.get(http_constants.HttpHeaders.QueryAdvice) is not None: + query_advice_raw = self.last_response_headers[http_constants.HttpHeaders.QueryAdvice] + self.last_response_headers[http_constants.HttpHeaders.QueryAdvice] = get_query_advice_info(query_advice_raw) if response_hook: response_hook(self.last_response_headers, result) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py index 92197a3a23c2..da50e5dbf69f 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/container.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/container.py @@ -752,6 +752,7 @@ def query_items( initial_headers: Optional[dict[str, str]] = None, max_integrated_cache_staleness_in_ms: Optional[int] = None, populate_index_metrics: Optional[bool] = None, + populate_query_advice: Optional[bool] = None, priority: Optional[Literal["High", "Low"]] = None, response_hook: Optional[Callable[[Mapping[str, str], dict[str, Any]], None]] = None, session_token: Optional[str] = None, @@ -785,6 +786,8 @@ def query_items( :keyword bool populate_index_metrics: Used to obtain the index metrics to understand how the query engine used existing indexes and how it could use potential new indexes. Please note that this option will incur overhead, so it should be enabled only when debugging slow queries. + :keyword bool populate_query_advice: Used to obtain the query advice to understand aspects of the query that can + be optimized. :keyword int continuation_token_limit: The size limit in kb of the response continuation token in the query response. Valid values are positive integers. A value of 0 is the same as not passing a value (default no limit). @@ -845,6 +848,7 @@ def query_items( parameters: Optional[list[dict[str, object]]] = None, populate_index_metrics: Optional[bool] = None, populate_query_metrics: Optional[bool] = None, + populate_query_advice: Optional[bool] = None, priority: Optional[Literal["High", "Low"]] = None, response_hook: Optional[Callable[[Mapping[str, str], dict[str, Any]], None]] = None, session_token: Optional[str] = None, @@ -886,6 +890,8 @@ def query_items( existing indexes and how it could use potential new indexes. Please note that this option will incur overhead, so it should be enabled only when debugging slow queries. :keyword bool populate_query_metrics: Enable returning query metrics in response headers. + :keyword bool populate_query_advice: Used to obtain the query advice to understand aspects of the query that can + be optimized. :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. @@ -964,6 +970,8 @@ def query_items( # pylint:disable=docstring-missing-param existing indexes and how it could use potential new indexes. Please note that this option will incur overhead, so it should be enabled only when debugging slow queries. :keyword bool populate_query_metrics: Enable returning query metrics in response headers. + :keyword bool populate_query_advice: Used to obtain the query advice to understand aspects of the query that can + be optimized. :keyword Literal["High", "Low"] priority: Priority based execution allows users to set a priority for each request. Once the user has reached their provisioned throughput, low priority requests are throttled before high priority requests start getting throttled. Feature must first be enabled at the account level. @@ -1014,6 +1022,8 @@ def query_items( # pylint:disable=docstring-missing-param feed_options["populateQueryMetrics"] = kwargs.pop("populate_query_metrics") if utils.valid_key_value_exist(kwargs, "populate_index_metrics"): feed_options["populateIndexMetrics"] = kwargs.pop("populate_index_metrics") + if utils.valid_key_value_exist(kwargs, "populate_query_advice"): + feed_options["populateQueryAdvice"] = kwargs.pop("populate_query_advice") if utils.valid_key_value_exist(kwargs, "enable_scan_in_query"): feed_options["enableScanInQuery"] = kwargs.pop("enable_scan_in_query") if utils.valid_key_value_exist(kwargs, "max_integrated_cache_staleness_in_ms"): diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/http_constants.py b/sdk/cosmos/azure-cosmos/azure/cosmos/http_constants.py index 9a6e0ef7b55d..a60ef0a48e5b 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/http_constants.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/http_constants.py @@ -100,6 +100,7 @@ class HttpHeaders: QueryMetrics = "x-ms-documentdb-query-metrics" QueryExecutionInfo = "x-ms-cosmos-query-execution-info" IndexUtilization = "x-ms-cosmos-index-utilization" + QueryAdvice = "x-ms-cosmos-query-advice" # Our custom DocDB headers Continuation = "x-ms-continuation" @@ -131,6 +132,7 @@ class HttpHeaders: IsContinuationExpected = "x-ms-documentdb-query-iscontinuationexpected" PopulateQueryMetrics = "x-ms-documentdb-populatequerymetrics" PopulateIndexMetrics = "x-ms-cosmos-populateindexmetrics" + PopulateQueryAdvice = "x-ms-cosmos-populatequeryadvice" ResourceQuota = "x-ms-resource-quota" ResourceUsage = "x-ms-resource-usage" IntendedCollectionRID = "x-ms-cosmos-intended-collection-rid" diff --git a/sdk/cosmos/azure-cosmos/tests/test_none_options.py b/sdk/cosmos/azure-cosmos/tests/test_none_options.py index bcd08a11d799..8c39f3f12795 100644 --- a/sdk/cosmos/azure-cosmos/tests/test_none_options.py +++ b/sdk/cosmos/azure-cosmos/tests/test_none_options.py @@ -70,6 +70,7 @@ def test_container_query_items_none_options_partition(self): pager = self.container.query_items("SELECT * FROM c", continuation_token_limit=None, enable_scan_in_query=None, initial_headers=None, max_integrated_cache_staleness_in_ms=None, max_item_count=None, parameters=None, partition_key=None, populate_index_metrics=None, + populate_query_advice=None, populate_query_metrics=None, priority=None, response_hook=None, session_token=None, throughput_bucket=None, enable_cross_partition_query=True) items = list(pager) @@ -141,7 +142,7 @@ def test_delete_all_items_by_partition_key_none_options(self): pager = self.container.query_items("SELECT * FROM c WHERE c.pk = @pk", parameters=[{"name": "@pk", "value": pk_value}], partition_key=None, continuation_token_limit=None, enable_scan_in_query=None, initial_headers=None, max_integrated_cache_staleness_in_ms=None, max_item_count=None, - populate_index_metrics=None, populate_query_metrics=None, priority=None, + populate_index_metrics=None, populate_query_advice=None, populate_query_metrics=None, priority=None, response_hook=None, session_token=None, throughput_bucket=None) _items = list(pager) assert _items == _items diff --git a/sdk/cosmos/azure-cosmos/tests/test_none_options_async.py b/sdk/cosmos/azure-cosmos/tests/test_none_options_async.py index fd66b4c6e454..0045f53a1062 100644 --- a/sdk/cosmos/azure-cosmos/tests/test_none_options_async.py +++ b/sdk/cosmos/azure-cosmos/tests/test_none_options_async.py @@ -74,6 +74,7 @@ async def test_container_query_items_none_options_partition_async(self): pager = self.container.query_items("SELECT * FROM c", continuation_token_limit=None, enable_scan_in_query=None, initial_headers=None, max_integrated_cache_staleness_in_ms=None, max_item_count=None, parameters=None, partition_key=None, populate_index_metrics=None, + populate_query_advice=None, populate_query_metrics=None, priority=None, response_hook=None, session_token=None, throughput_bucket=None) items = [doc async for doc in pager] @@ -148,7 +149,7 @@ async def test_delete_all_items_by_partition_key_none_options_async(self): pager = self.container.query_items("SELECT * FROM c WHERE c.pk = @pk", parameters=[{"name": "@pk", "value": pk_value}], partition_key=None, continuation_token_limit=None, enable_scan_in_query=None, initial_headers=None, max_integrated_cache_staleness_in_ms=None, max_item_count=None, - populate_index_metrics=None, populate_query_metrics=None, priority=None, + populate_index_metrics=None, populate_query_advice=None, populate_query_metrics=None, priority=None, response_hook=None, session_token=None, throughput_bucket=None) _items = [doc async for doc in pager] assert _items == _items diff --git a/sdk/cosmos/azure-cosmos/tests/test_query.py b/sdk/cosmos/azure-cosmos/tests/test_query.py index bcf283932a53..5d3b13073d76 100644 --- a/sdk/cosmos/azure-cosmos/tests/test_query.py +++ b/sdk/cosmos/azure-cosmos/tests/test_query.py @@ -111,6 +111,92 @@ def test_populate_index_metrics(self): self.assertDictEqual(expected_index_metrics, index_metrics) self.created_db.delete_container(created_collection.id) + @pytest.mark.skip(reason="Emulator does not support query advisor yet") + def test_populate_query_advice(self): + created_collection = self.created_db.create_container("query_advice_test", + PartitionKey(path="/pk")) + + doc_id = 'MyId' + str(uuid.uuid4()) + document_definition = { + 'pk': 'pk', 'id': doc_id, 'name': 'test document', + 'tags': [{'name': 'python'}, {'name': 'cosmos'}], + 'timestamp': '2099-01-01T00:00:00Z', 'ticks': 0, 'ts': 0 + } + created_collection.create_item(body=document_definition) + + QUERY_ADVICE_HEADER = http_constants.HttpHeaders.QueryAdvice + + # QA1000 - PartialArrayContains: ARRAY_CONTAINS with partial match + query_iterable = created_collection.query_items( + query='SELECT * FROM c WHERE ARRAY_CONTAINS(c.tags, {"name": "python"}, true)', + partition_key='pk', populate_query_advice=True + ) + list(query_iterable) + query_advice = created_collection.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + self.assertIsNotNone(query_advice) + self.assertIn("QA1000", query_advice) + + # QA1002 - Contains: CONTAINS usage + query_iterable = created_collection.query_items( + query='SELECT * FROM c WHERE CONTAINS(c.name, "test")', + partition_key='pk', populate_query_advice=True + ) + list(query_iterable) + query_advice = created_collection.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + self.assertIsNotNone(query_advice) + self.assertIn("QA1002", query_advice) + + # QA1003 - CaseInsensitiveStartsWithOrStringEquals: case-insensitive STARTSWITH + query_iterable = created_collection.query_items( + query='SELECT * FROM c WHERE STARTSWITH(c.name, "test", true)', + partition_key='pk', populate_query_advice=True + ) + list(query_iterable) + query_advice = created_collection.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + self.assertIsNotNone(query_advice) + self.assertIn("QA1003", query_advice) + + # QA1004 - CaseInsensitiveEndsWith: case-insensitive ENDSWITH + query_iterable = created_collection.query_items( + query='SELECT * FROM c WHERE ENDSWITH(c.name, "document", true)', + partition_key='pk', populate_query_advice=True + ) + list(query_iterable) + query_advice = created_collection.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + self.assertIsNotNone(query_advice) + self.assertIn("QA1004", query_advice) + + # QA1007 - GetCurrentDateTime: usage of GetCurrentDateTime + query_iterable = created_collection.query_items( + query='SELECT * FROM c WHERE c.timestamp < GetCurrentDateTime()', + partition_key='pk', populate_query_advice=True + ) + list(query_iterable) + query_advice = created_collection.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + self.assertIsNotNone(query_advice) + self.assertIn("QA1007", query_advice) + + # QA1008 - GetCurrentTicks: usage of GetCurrentTicks + query_iterable = created_collection.query_items( + query='SELECT * FROM c WHERE c.ticks < GetCurrentTicks()', + partition_key='pk', populate_query_advice=True + ) + list(query_iterable) + query_advice = created_collection.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + self.assertIsNotNone(query_advice) + self.assertIn("QA1008", query_advice) + + # QA1009 - GetCurrentTimestamp: usage of GetCurrentTimestamp + query_iterable = created_collection.query_items( + query='SELECT * FROM c WHERE c.ts < GetCurrentTimestamp()', + partition_key='pk', populate_query_advice=True + ) + list(query_iterable) + query_advice = created_collection.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + self.assertIsNotNone(query_advice) + self.assertIn("QA1009", query_advice) + self.created_db.delete_container(created_collection.id) + # TODO: Need to validate the query request count logic @pytest.mark.skip def test_max_item_count_honored_in_order_by_query(self): diff --git a/sdk/cosmos/azure-cosmos/tests/test_query_advisor.py b/sdk/cosmos/azure-cosmos/tests/test_query_advisor.py new file mode 100644 index 000000000000..9caf29d25825 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/tests/test_query_advisor.py @@ -0,0 +1,209 @@ +# The MIT License (MIT) +# Copyright (c) 2014 Microsoft Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import json +import unittest +from urllib.parse import quote + +from azure.cosmos._query_advisor import ( + QueryAdvice, + QueryAdviceEntry, + RuleDirectory, + get_query_advice_info +) + + +class TestQueryAdvisor(unittest.TestCase): + """Unit tests for query advisor functionality.""" + + def test_rule_directory_loads_rules(self): + """Test that RuleDirectory loads all 10 rules.""" + directory = RuleDirectory() + + # Check URL prefix + self.assertTrue(directory.url_prefix.startswith("https://")) + + # Check all 10 rules exist + for rule_id in [f"QA{1000 + i}" for i in range(10)]: + message = directory.get_rule_message(rule_id) + if rule_id == "QA1006": + self.assertIsNotNone(message) + self.assertIsInstance(message, str) + else: + self.assertIsNotNone(message, f"Rule {rule_id} should have a message") + self.assertIsInstance(message, str) + + def test_rule_directory_invalid_rule(self): + """Test that invalid rule IDs return None.""" + directory = RuleDirectory() + self.assertIsNone(directory.get_rule_message("INVALID_RULE")) + + def test_query_advice_entry_from_dict(self): + """Test creating QueryAdviceEntry from dictionary.""" + data = {"Id": "QA1000", "Params": ["param1", "param2"]} + entry = QueryAdviceEntry.from_dict(data) + + self.assertEqual(entry.id, "QA1000") + self.assertEqual(entry.parameters, ["param1", "param2"]) + + def test_query_advice_entry_to_string(self): + """Test formatting QueryAdviceEntry as string.""" + directory = RuleDirectory() + entry = QueryAdviceEntry("QA1000", []) + + result = entry.to_string(directory) + + self.assertIsNotNone(result) + self.assertIn("QA1000:", result) + self.assertIn("ARRAY_CONTAINS", result) + self.assertIn(directory.url_prefix, result) + self.assertIn("QA1000", result) + + def test_query_advice_entry_with_parameters(self): + """Test formatting QueryAdviceEntry with parameters.""" + directory = RuleDirectory() + # Create a mock entry that would use parameters + entry = QueryAdviceEntry("QA1000", ["field1", "field2"]) + + result = entry.to_string(directory) + + self.assertIsNotNone(result) + self.assertIn("QA1000:", result) + + def test_query_advice_try_create_from_string_single_entry(self): + """Test parsing query advice with single entry.""" + # Create URL-encoded JSON + data = [{"Id": "QA1002", "Params": []}] + json_str = json.dumps(data) + encoded = quote(json_str) + + advice = QueryAdvice.try_create_from_string(encoded) + + self.assertIsNotNone(advice) + self.assertEqual(len(advice.entries), 1) + self.assertEqual(advice.entries[0].id, "QA1002") + + def test_query_advice_try_create_from_string_multiple_entries(self): + """Test parsing query advice with multiple entries.""" + # Create URL-encoded JSON + data = [ + {"Id": "QA1008", "Params": []}, + {"Id": "QA1009", "Params": []} + ] + json_str = json.dumps(data) + encoded = quote(json_str) + + advice = QueryAdvice.try_create_from_string(encoded) + + self.assertIsNotNone(advice) + self.assertEqual(len(advice.entries), 2) + self.assertEqual(advice.entries[0].id, "QA1008") + self.assertEqual(advice.entries[1].id, "QA1009") + + def test_query_advice_try_create_from_string_null_input(self): + """Test that None input returns None.""" + advice = QueryAdvice.try_create_from_string(None) + self.assertIsNone(advice) + + def test_query_advice_try_create_from_string_invalid_json(self): + """Test that invalid JSON returns None.""" + advice = QueryAdvice.try_create_from_string("not-valid-json") + self.assertIsNone(advice) + + def test_query_advice_try_create_from_string_empty_input(self): + """Test that empty string returns None.""" + advice = QueryAdvice.try_create_from_string("") + self.assertIsNone(advice) + + def test_query_advice_to_string_single_entry(self): + """Test formatting QueryAdvice with single entry.""" + data = [{"Id": "QA1002", "Params": []}] + json_str = json.dumps(data) + encoded = quote(json_str) + + advice = QueryAdvice.try_create_from_string(encoded) + result = advice.to_string() + + self.assertIsInstance(result, str) + self.assertIn("QA1002:", result) + self.assertIn("STARTSWITH", result) + self.assertIn("https://", result) + + def test_query_advice_to_string_multiple_entries(self): + """Test formatting QueryAdvice with multiple entries as multi-line string.""" + data = [ + {"Id": "QA1008", "Params": []}, + {"Id": "QA1009", "Params": []} + ] + json_str = json.dumps(data) + encoded = quote(json_str) + + advice = QueryAdvice.try_create_from_string(encoded) + result = advice.to_string() + + self.assertIsInstance(result, str) + lines = result.split("\n") + self.assertEqual(len(lines), 2) + self.assertIn("QA1008:", lines[0]) + self.assertIn("QA1009:", lines[1]) + self.assertIn("GetCurrentTicks", lines[0]) + self.assertIn("GetCurrentTimestamp", lines[1]) + + def test_query_advice_to_string_empty(self): + """Test formatting empty QueryAdvice.""" + advice = QueryAdvice([]) + result = advice.to_string() + self.assertIsInstance(result, str) + self.assertEqual(result, "") + + def test_get_query_advice_info_valid_input(self): + """Test end-to-end get_query_advice_info with valid input.""" + data = [{"Id": "QA1002", "Params": []}] + json_str = json.dumps(data) + encoded = quote(json_str) + + result = get_query_advice_info(encoded) + + self.assertIsInstance(result, str) + self.assertIn("QA1002:", result) + self.assertIn("STARTSWITH", result) + + def test_get_query_advice_info_null_input(self): + """Test get_query_advice_info with None input.""" + result = get_query_advice_info(None) + self.assertEqual(result, "") + + def test_get_query_advice_info_invalid_input(self): + """Test get_query_advice_info with invalid input.""" + result = get_query_advice_info("invalid-input") + self.assertEqual(result, "") + + def test_query_advice_filters_null_entries(self): + """Test that QueryAdvice filters out None entries.""" + advice = QueryAdvice([QueryAdviceEntry("QA1000"), None, QueryAdviceEntry("QA1002")]) + + self.assertEqual(len(advice.entries), 2) + self.assertEqual(advice.entries[0].id, "QA1000") + self.assertEqual(advice.entries[1].id, "QA1002") + + +if __name__ == "__main__": + unittest.main() diff --git a/sdk/cosmos/azure-cosmos/tests/test_query_async.py b/sdk/cosmos/azure-cosmos/tests/test_query_async.py index 5cda55e88e70..0bf1802522de 100644 --- a/sdk/cosmos/azure-cosmos/tests/test_query_async.py +++ b/sdk/cosmos/azure-cosmos/tests/test_query_async.py @@ -133,6 +133,94 @@ async def test_populate_index_metrics_async(self): await self.created_db.delete_container(created_collection.id) + @pytest.mark.skip(reason="Emulator does not support query advisor yet") + async def test_populate_query_advice_async(self): + created_collection = await self.created_db.create_container( + "query_advice_test" + str(uuid.uuid4()), + PartitionKey(path="/pk")) + doc_id = 'MyId' + str(uuid.uuid4()) + document_definition = { + 'pk': 'pk', 'id': doc_id, 'name': 'test document', + 'tags': [{'name': 'python'}, {'name': 'cosmos'}], + 'timestamp': '2099-01-01T00:00:00Z', 'ticks': 0, 'ts': 0 + } + await created_collection.create_item(body=document_definition) + await asyncio.sleep(1) + + QUERY_ADVICE_HEADER = http_constants.HttpHeaders.QueryAdvice + + # QA1000 - PartialArrayContains: ARRAY_CONTAINS with partial match + query_iterable = created_collection.query_items( + query='SELECT * FROM c WHERE ARRAY_CONTAINS(c.tags, {"name": "python"}, true)', + partition_key='pk', populate_query_advice=True + ) + [item async for item in query_iterable] + query_advice = created_collection.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + assert query_advice is not None + assert "QA1000" in query_advice + + # QA1002 - Contains: CONTAINS usage + query_iterable = created_collection.query_items( + query='SELECT * FROM c WHERE CONTAINS(c.name, "test")', + partition_key='pk', populate_query_advice=True + ) + [item async for item in query_iterable] + query_advice = created_collection.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + assert query_advice is not None + assert "QA1002" in query_advice + + # QA1003 - CaseInsensitiveStartsWithOrStringEquals: case-insensitive STARTSWITH + query_iterable = created_collection.query_items( + query='SELECT * FROM c WHERE STARTSWITH(c.name, "test", true)', + partition_key='pk', populate_query_advice=True + ) + [item async for item in query_iterable] + query_advice = created_collection.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + assert query_advice is not None + assert "QA1003" in query_advice + + # QA1004 - CaseInsensitiveEndsWith: case-insensitive ENDSWITH + query_iterable = created_collection.query_items( + query='SELECT * FROM c WHERE ENDSWITH(c.name, "document", true)', + partition_key='pk', populate_query_advice=True + ) + [item async for item in query_iterable] + query_advice = created_collection.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + assert query_advice is not None + assert "QA1004" in query_advice + + # QA1007 - GetCurrentDateTime: usage of GetCurrentDateTime + query_iterable = created_collection.query_items( + query='SELECT * FROM c WHERE c.timestamp < GetCurrentDateTime()', + partition_key='pk', populate_query_advice=True + ) + [item async for item in query_iterable] + query_advice = created_collection.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + assert query_advice is not None + assert "QA1007" in query_advice + + # QA1008 - GetCurrentTicks: usage of GetCurrentTicks + query_iterable = created_collection.query_items( + query='SELECT * FROM c WHERE c.ticks < GetCurrentTicks()', + partition_key='pk', populate_query_advice=True + ) + [item async for item in query_iterable] + query_advice = created_collection.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + assert query_advice is not None + assert "QA1008" in query_advice + + # QA1009 - GetCurrentTimestamp: usage of GetCurrentTimestamp + query_iterable = created_collection.query_items( + query='SELECT * FROM c WHERE c.ts < GetCurrentTimestamp()', + partition_key='pk', populate_query_advice=True + ) + [item async for item in query_iterable] + query_advice = created_collection.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + assert query_advice is not None + assert "QA1009" in query_advice + + await self.created_db.delete_container(created_collection.id) + # TODO: Need to validate the query request count logic @pytest.mark.skip async def test_max_item_count_honored_in_order_by_query_async(self): diff --git a/sdk/cosmos/azure-cosmos/tests/test_query_cross_partition.py b/sdk/cosmos/azure-cosmos/tests/test_query_cross_partition.py index d3ccd204ecf7..6db8d5c2f3d2 100644 --- a/sdk/cosmos/azure-cosmos/tests/test_query_cross_partition.py +++ b/sdk/cosmos/azure-cosmos/tests/test_query_cross_partition.py @@ -225,6 +225,88 @@ def test_populate_index_metrics(self): 'PotentialCompositeIndexes': []} self.assertDictEqual(expected_index_metrics, index_metrics) + @pytest.mark.skip(reason="Emulator does not support query advisor yet") + def test_populate_query_advice(self): + doc_id = 'MyId' + str(uuid.uuid4()) + document_definition = { + 'pk': 'pk', 'id': doc_id, 'name': 'test document', + 'tags': [{'name': 'python'}, {'name': 'cosmos'}], + 'timestamp': '2099-01-01T00:00:00Z', 'ticks': 0, 'ts': 0 + } + self.created_container.create_item(body=document_definition) + + QUERY_ADVICE_HEADER = http_constants.HttpHeaders.QueryAdvice + + # QA1000 - PartialArrayContains: ARRAY_CONTAINS with partial match + query_iterable = self.created_container.query_items( + query='SELECT * FROM c WHERE ARRAY_CONTAINS(c.tags, {"name": "python"}, true)', + partition_key='pk', populate_query_advice=True + ) + list(query_iterable) + query_advice = self.created_container.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + self.assertIsNotNone(query_advice) + self.assertIn("QA1000", query_advice) + + # QA1002 - Contains: CONTAINS usage + query_iterable = self.created_container.query_items( + query='SELECT * FROM c WHERE CONTAINS(c.name, "test")', + partition_key='pk', populate_query_advice=True + ) + list(query_iterable) + query_advice = self.created_container.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + self.assertIsNotNone(query_advice) + self.assertIn("QA1002", query_advice) + + # QA1003 - CaseInsensitiveStartsWithOrStringEquals: case-insensitive STARTSWITH + query_iterable = self.created_container.query_items( + query='SELECT * FROM c WHERE STARTSWITH(c.name, "test", true)', + partition_key='pk', populate_query_advice=True + ) + list(query_iterable) + query_advice = self.created_container.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + self.assertIsNotNone(query_advice) + self.assertIn("QA1003", query_advice) + + # QA1004 - CaseInsensitiveEndsWith: case-insensitive ENDSWITH + query_iterable = self.created_container.query_items( + query='SELECT * FROM c WHERE ENDSWITH(c.name, "document", true)', + partition_key='pk', populate_query_advice=True + ) + list(query_iterable) + query_advice = self.created_container.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + self.assertIsNotNone(query_advice) + self.assertIn("QA1004", query_advice) + + # QA1007 - GetCurrentDateTime: usage of GetCurrentDateTime + query_iterable = self.created_container.query_items( + query='SELECT * FROM c WHERE c.timestamp < GetCurrentDateTime()', + partition_key='pk', populate_query_advice=True + ) + list(query_iterable) + query_advice = self.created_container.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + self.assertIsNotNone(query_advice) + self.assertIn("QA1007", query_advice) + + # QA1008 - GetCurrentTicks: usage of GetCurrentTicks + query_iterable = self.created_container.query_items( + query='SELECT * FROM c WHERE c.ticks < GetCurrentTicks()', + partition_key='pk', populate_query_advice=True + ) + list(query_iterable) + query_advice = self.created_container.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + self.assertIsNotNone(query_advice) + self.assertIn("QA1008", query_advice) + + # QA1009 - GetCurrentTimestamp: usage of GetCurrentTimestamp + query_iterable = self.created_container.query_items( + query='SELECT * FROM c WHERE c.ts < GetCurrentTimestamp()', + partition_key='pk', populate_query_advice=True + ) + list(query_iterable) + query_advice = self.created_container.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + self.assertIsNotNone(query_advice) + self.assertIn("QA1009", query_advice) + def test_get_query_plan_through_gateway(self): self._validate_query_plan(query="Select top 10 value count(c.id) from c", container_link=self.created_container.container_link, diff --git a/sdk/cosmos/azure-cosmos/tests/test_query_cross_partition_async.py b/sdk/cosmos/azure-cosmos/tests/test_query_cross_partition_async.py index 73d76363b70b..0d7c0b7cc2dd 100644 --- a/sdk/cosmos/azure-cosmos/tests/test_query_cross_partition_async.py +++ b/sdk/cosmos/azure-cosmos/tests/test_query_cross_partition_async.py @@ -13,6 +13,7 @@ from azure.cosmos._execution_context.query_execution_info import _PartitionedQueryExecutionInfo from azure.cosmos.aio import CosmosClient, DatabaseProxy, ContainerProxy from azure.cosmos.documents import _DistinctType +from azure.cosmos import http_constants from azure.cosmos.exceptions import CosmosHttpResponseError from azure.cosmos.partition_key import PartitionKey @@ -231,6 +232,87 @@ async def validate_query_requests_count(self, query_iterable, expected_count): assert self.count == expected_count self.count = 0 + async def test_populate_query_advice(self): + doc_id = 'MyId' + str(uuid.uuid4()) + document_definition = { + 'pk': 'pk', 'id': doc_id, 'name': 'test document', + 'tags': [{'name': 'python'}, {'name': 'cosmos'}], + 'timestamp': '2099-01-01T00:00:00Z', 'ticks': 0, 'ts': 0 + } + await self.created_container.create_item(body=document_definition) + + QUERY_ADVICE_HEADER = http_constants.HttpHeaders.QueryAdvice + + # QA1000 - PartialArrayContains: ARRAY_CONTAINS with partial match + query_iterable = self.created_container.query_items( + query='SELECT * FROM c WHERE ARRAY_CONTAINS(c.tags, {"name": "python"}, true)', + partition_key='pk', populate_query_advice=True + ) + [item async for item in query_iterable] + query_advice = self.created_container.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + assert query_advice is not None + assert "QA1000" in query_advice + + # QA1002 - Contains: CONTAINS usage + query_iterable = self.created_container.query_items( + query='SELECT * FROM c WHERE CONTAINS(c.name, "test")', + partition_key='pk', populate_query_advice=True + ) + [item async for item in query_iterable] + query_advice = self.created_container.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + assert query_advice is not None + assert "QA1002" in query_advice + + # QA1003 - CaseInsensitiveStartsWithOrStringEquals: case-insensitive STARTSWITH + query_iterable = self.created_container.query_items( + query='SELECT * FROM c WHERE STARTSWITH(c.name, "test", true)', + partition_key='pk', populate_query_advice=True + ) + [item async for item in query_iterable] + query_advice = self.created_container.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + assert query_advice is not None + assert "QA1003" in query_advice + + # QA1004 - CaseInsensitiveEndsWith: case-insensitive ENDSWITH + query_iterable = self.created_container.query_items( + query='SELECT * FROM c WHERE ENDSWITH(c.name, "document", true)', + partition_key='pk', populate_query_advice=True + ) + [item async for item in query_iterable] + query_advice = self.created_container.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + assert query_advice is not None + assert "QA1004" in query_advice + + # QA1007 - GetCurrentDateTime: usage of GetCurrentDateTime + query_iterable = self.created_container.query_items( + query='SELECT * FROM c WHERE c.timestamp < GetCurrentDateTime()', + partition_key='pk', populate_query_advice=True + ) + [item async for item in query_iterable] + query_advice = self.created_container.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + assert query_advice is not None + assert "QA1007" in query_advice + + # QA1008 - GetCurrentTicks: usage of GetCurrentTicks + query_iterable = self.created_container.query_items( + query='SELECT * FROM c WHERE c.ticks < GetCurrentTicks()', + partition_key='pk', populate_query_advice=True + ) + [item async for item in query_iterable] + query_advice = self.created_container.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + assert query_advice is not None + assert "QA1008" in query_advice + + # QA1009 - GetCurrentTimestamp: usage of GetCurrentTimestamp + query_iterable = self.created_container.query_items( + query='SELECT * FROM c WHERE c.ts < GetCurrentTimestamp()', + partition_key='pk', populate_query_advice=True + ) + [item async for item in query_iterable] + query_advice = self.created_container.client_connection.last_response_headers.get(QUERY_ADVICE_HEADER) + assert query_advice is not None + assert "QA1009" in query_advice + async def _mock_execute_function(self, function, *args, **kwargs): self.count += 1 return await self.OriginalExecuteFunction(function, *args, **kwargs) From d3375a361b1443e839d6e7b3a36677ea8b5b0967 Mon Sep 17 00:00:00 2001 From: Aayush Kataria Date: Tue, 24 Feb 2026 11:09:53 -0800 Subject: [PATCH 2/5] Adding changelog, resolving comments --- sdk/cosmos/azure-cosmos/CHANGELOG.md | 1 + sdk/cosmos/azure-cosmos/MANIFEST.in | 1 + .../_query_advisor/_get_query_advice_info.py | 30 ++++----- .../cosmos/_query_advisor/_rule_directory.py | 65 +++++++++++-------- .../_query_advisor/query_advice_rules.json | 2 +- sdk/cosmos/azure-cosmos/setup.py | 3 + .../azure-cosmos/tests/test_query_advisor.py | 20 +----- 7 files changed, 55 insertions(+), 67 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index ae40a6fe33a8..f78359dfa4a2 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -3,6 +3,7 @@ ### 4.15.1 (Unreleased) #### Features Added +* Added Query Advisor support for Python SDK. See [PR 45331](https://github.com/Azure/azure-sdk-for-python/pull/45331) #### Breaking Changes diff --git a/sdk/cosmos/azure-cosmos/MANIFEST.in b/sdk/cosmos/azure-cosmos/MANIFEST.in index 960a35145f02..b411338960d1 100644 --- a/sdk/cosmos/azure-cosmos/MANIFEST.in +++ b/sdk/cosmos/azure-cosmos/MANIFEST.in @@ -4,4 +4,5 @@ include azure/__init__.py recursive-include samples *.py *.md recursive-include tests *.py include azure/cosmos/py.typed +recursive-include azure/cosmos/_query_advisor *.json recursive-include doc *.rst diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_get_query_advice_info.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_get_query_advice_info.py index b7c3c6007f41..3fcab90373dd 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_get_query_advice_info.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_get_query_advice_info.py @@ -10,34 +10,26 @@ from ._query_advice import QueryAdvice - +# cspell:ignore STARTSWTIH def get_query_advice_info(header_value: Optional[str]) -> str: - """Process a query advice response header into a formatted string. - - This function takes the raw query advice response header (URL-encoded JSON), + """Process a query advice response header into a formatted human-readable string. + + Takes the raw ``x-ms-cosmos-query-advice`` response header (URL-encoded JSON), decodes it, parses the query advice entries, enriches them with human-readable messages from the rule directory, and returns a formatted multi-line string. - - Args: - header_value: The raw query advice response header value (URL-encoded JSON) - - Returns: - Formatted string with query advice entries, or empty string if parsing fails - - Example: - >>> header = "QA1002%3A%20Instead%20of%20CONTAINS..." - >>> advice = get_query_advice_info(header) - >>> print(advice) - QA1002: Instead of CONTAINS, consider using STARTSWTIH or computed properties... + + :param str header_value: The raw query advice response header value (URL-encoded JSON). + :returns: Formatted string with query advice entries, or empty string if parsing fails. + :rtype: str """ if header_value is None: return "" - + # Parse the query advice from the header query_advice = QueryAdvice.try_create_from_string(header_value) - + if query_advice is None: return "" - + # Format as string return query_advice.to_string() diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_rule_directory.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_rule_directory.py index 52338a9afcc0..e6525b499af1 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_rule_directory.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_rule_directory.py @@ -7,60 +7,69 @@ """Rule directory singleton for loading and accessing query advice rules.""" import json -import os +from importlib.resources import files from typing import Any, Dict, Optional class RuleDirectory: """Singleton for loading and accessing query advice rules. - + The rule directory lazy-loads the query_advice_rules.json file and provides access to rule messages and URL prefix. + Uses importlib.resources so it works correctly in all packaging + scenarios including zip-safe wheels. """ - + _instance: Optional["RuleDirectory"] = None - + def __new__(cls) -> "RuleDirectory": if cls._instance is None: cls._instance = super().__new__(cls) - cls._instance._initialized = False return cls._instance - + def __init__(self) -> None: - if self._initialized: + # Guard so the singleton body only runs once. + if getattr(self, "_initialized", False): return - - self._initialized = True + + self._initialized: bool = True self._rules: Dict[str, Dict[str, Any]] = {} self._url_prefix: str = "" self._load_rules() - + def _load_rules(self) -> None: - """Load rules from the JSON file.""" + """Load rules from the bundled JSON resource.""" try: - rules_file = os.path.join(os.path.dirname(__file__), "query_advice_rules.json") - with open(rules_file, "r", encoding="utf-8") as f: - data = json.load(f) - self._url_prefix = data.get("url_prefix", "") - self._rules = data.get("rules", {}) - except (IOError, json.JSONDecodeError): - # If we can't load rules, use empty defaults - self._url_prefix = "https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/query/queryadvisor/" + resource_text = ( + files(__package__) + .joinpath("query_advice_rules.json") + .read_text(encoding="utf-8") + ) + data = json.loads(resource_text) + self._url_prefix = data.get("url_prefix", "") + self._rules = data.get("rules", {}) + except Exception: # pylint: disable=broad-except + # Silently fall back to empty rules so query execution + # is never blocked by an inability to load advice text. + self._url_prefix = ( + "https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/query/queryadvisor/" + ) self._rules = {} - + @property def url_prefix(self) -> str: - """Get the URL prefix for documentation links.""" + """Get the URL prefix for documentation links. + + :rtype: str + """ return self._url_prefix - + def get_rule_message(self, rule_id: str) -> Optional[str]: """Get the message for a given rule ID. - - Args: - rule_id: The rule identifier (e.g., "QA1000") - - Returns: - The rule message, or None if the rule is not found + + :param str rule_id: The rule identifier (e.g., ``QA1000``). + :returns: The rule message, or ``None`` if the rule is not found. + :rtype: str or None """ rule = self._rules.get(rule_id) if rule: diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/query_advice_rules.json b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/query_advice_rules.json index c6ade8f48638..bced2e538a22 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/query_advice_rules.json +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/query_advice_rules.json @@ -3,7 +3,7 @@ "rules": { "QA1000": { "name": "PartialArrayContains", - "description": "Query uses ARRAY_CONTAINS with the the third argument set to true.", + "description": "Query uses ARRAY_CONTAINS with the third argument set to true.", "message": "Instead of ARRAY_CONTAINS, consider using EXISTS with a subquery, which may improve performance." }, "QA1001": { diff --git a/sdk/cosmos/azure-cosmos/setup.py b/sdk/cosmos/azure-cosmos/setup.py index 1a96740eac2c..e4f85386dad1 100644 --- a/sdk/cosmos/azure-cosmos/setup.py +++ b/sdk/cosmos/azure-cosmos/setup.py @@ -71,6 +71,9 @@ ], zip_safe=False, packages=find_packages(exclude=exclude_packages), + package_data={ + "azure.cosmos._query_advisor": ["query_advice_rules.json"], + }, python_requires=">=3.9", install_requires=[ "azure-core>=1.30.0", diff --git a/sdk/cosmos/azure-cosmos/tests/test_query_advisor.py b/sdk/cosmos/azure-cosmos/tests/test_query_advisor.py index 9caf29d25825..86efd87692fb 100644 --- a/sdk/cosmos/azure-cosmos/tests/test_query_advisor.py +++ b/sdk/cosmos/azure-cosmos/tests/test_query_advisor.py @@ -1,23 +1,5 @@ # The MIT License (MIT) -# Copyright (c) 2014 Microsoft Corporation - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# Copyright (c) Microsoft Corporation. All rights reserved. import json import unittest From c84c1e2dd1383c843e2bdde61c08a07c7bf1f3d4 Mon Sep 17 00:00:00 2001 From: Aayush Kataria Date: Tue, 24 Feb 2026 14:13:44 -0800 Subject: [PATCH 3/5] fixing lint issues --- .../cosmos/_query_advisor/_query_advice.py | 79 +++++++++---------- 1 file changed, 38 insertions(+), 41 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_query_advice.py b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_query_advice.py index 2baed1b412f2..41de4b8cbdb0 100644 --- a/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_query_advice.py +++ b/sdk/cosmos/azure-cosmos/azure/cosmos/_query_advisor/_query_advice.py @@ -19,7 +19,7 @@ class QueryAdviceEntry: Each entry contains a rule ID and optional parameters that provide specific guidance for query optimization. """ - + def __init__(self, rule_id: str, parameters: Optional[List[str]] = None) -> None: """Initialize a query advice entry. @@ -29,27 +29,26 @@ def __init__(self, rule_id: str, parameters: Optional[List[str]] = None) -> None """ self.id = rule_id self.parameters = parameters or [] - + def to_string(self, rule_directory: RuleDirectory) -> Optional[str]: """Format the query advice entry as a human-readable string. - - Args: - rule_directory: Rule directory instance for looking up messages - - Returns: - Formatted string with rule ID, message, and documentation link, - or None if the rule message cannot be found + + :param rule_directory: Rule directory instance for looking up messages. + :type rule_directory: ~azure.cosmos._query_advisor._rule_directory.RuleDirectory + :returns: Formatted string with rule ID, message, and documentation link, + or None if the rule message cannot be found. + :rtype: str or None """ if self.id is None: return None - + message = rule_directory.get_rule_message(self.id) if message is None: return None - + # Format: {id}: {message}. For more information, please visit {url_prefix}{id} result = f"{self.id}: " - + # Format message with parameters if available if self.parameters: try: @@ -59,21 +58,20 @@ def to_string(self, rule_directory: RuleDirectory) -> Optional[str]: result += message else: result += message - + # Add documentation link result += f" For more information, please visit {rule_directory.url_prefix}{self.id}" - + return result - + @classmethod def from_dict(cls, data: Dict[str, Any]) -> "QueryAdviceEntry": """Create a QueryAdviceEntry from a dictionary. - - Args: - data: Dictionary with "Id" and optional "Params" keys - - Returns: - QueryAdviceEntry instance + + :param data: Dictionary with "Id" and optional "Params" keys. + :type data: dict[str, any] + :returns: QueryAdviceEntry instance. + :rtype: ~azure.cosmos._query_advisor._query_advice.QueryAdviceEntry """ rule_id = data.get("Id", "") parameters = data.get("Params", []) @@ -86,7 +84,7 @@ class QueryAdvice: Represents the complete query advice response from Azure Cosmos DB, containing one or more optimization recommendations. """ - + def __init__(self, entries: Optional[List[QueryAdviceEntry]] = None) -> None: """Initialize query advice with a list of entries. @@ -94,52 +92,51 @@ def __init__(self, entries: Optional[List[QueryAdviceEntry]] = None) -> None: entries: List of QueryAdviceEntry objects """ self.entries = [e for e in (entries or []) if e is not None] - + def to_string(self) -> str: """Format all query advice entries as a multi-line string. - - Returns: - Formatted string with each entry on a separate line + + :returns: Formatted string with each entry on a separate line. + :rtype: str """ if not self.entries: return "" - + rule_directory = RuleDirectory() lines = [] - + for entry in self.entries: formatted = entry.to_string(rule_directory) if formatted: lines.append(formatted) - + return "\n".join(lines) - + @classmethod def try_create_from_string(cls, response_header: Optional[str]) -> Optional["QueryAdvice"]: """Parse query advice from a URL-encoded JSON response header. - - Args: - response_header: URL-encoded JSON string from the response header - - Returns: - QueryAdvice instance if parsing succeeds, None otherwise + + :param response_header: URL-encoded JSON string from the response header. + :type response_header: str or None + :returns: QueryAdvice instance if parsing succeeds, None otherwise. + :rtype: ~azure.cosmos._query_advisor._query_advice.QueryAdvice or None """ if response_header is None: return None - + try: # URL-decode the header value decoded_string = unquote(response_header) - + # Parse JSON into list of entry dictionaries data = json.loads(decoded_string) - + if not isinstance(data, list): return None - + # Convert dictionaries to QueryAdviceEntry objects entries = [QueryAdviceEntry.from_dict(item) for item in data if isinstance(item, dict)] - + return cls(entries) except (json.JSONDecodeError, ValueError, AttributeError): return None From 730f965fbf7fea8554bf8b0c3b3398641f33c6a6 Mon Sep 17 00:00:00 2001 From: Aayush Kataria Date: Wed, 25 Feb 2026 09:44:48 -0800 Subject: [PATCH 4/5] fixing build issues --- sdk/cosmos/azure-cosmos/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/cosmos/azure-cosmos/setup.py b/sdk/cosmos/azure-cosmos/setup.py index e4f85386dad1..a20349560d85 100644 --- a/sdk/cosmos/azure-cosmos/setup.py +++ b/sdk/cosmos/azure-cosmos/setup.py @@ -72,6 +72,7 @@ zip_safe=False, packages=find_packages(exclude=exclude_packages), package_data={ + "azure.cosmos": ["py.typed"], "azure.cosmos._query_advisor": ["query_advice_rules.json"], }, python_requires=">=3.9", From c092a67f25be8e0fc5bdd06d239d9cb06176da81 Mon Sep 17 00:00:00 2001 From: Aayush Kataria Date: Fri, 27 Feb 2026 08:45:07 -0800 Subject: [PATCH 5/5] Updating changelog --- sdk/cosmos/azure-cosmos/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index f78359dfa4a2..9953e090c2a0 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -3,7 +3,7 @@ ### 4.15.1 (Unreleased) #### Features Added -* Added Query Advisor support for Python SDK. See [PR 45331](https://github.com/Azure/azure-sdk-for-python/pull/45331) +* Added support for Query Advisor feature - See [PR 45331](https://github.com/Azure/azure-sdk-for-python/pull/45331) #### Breaking Changes