From d090224ec2f2af3030ff3ffe09667b3f21246884 Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Tue, 27 Jan 2026 21:35:29 -0500
Subject: [PATCH 01/24] update dt functions

---
 .../_variable_type_checks.py                  | 12 ++++-----
 .../test_fe_type_checks.py                    | 26 +++++++++++++++++++
 2 files changed, 32 insertions(+), 6 deletions(-)
 create mode 100644 tests/test_variable_handling/test_fe_type_checks.py

diff --git a/feature_engine/variable_handling/_variable_type_checks.py b/feature_engine/variable_handling/_variable_type_checks.py
index c3e16d383..044c2667d 100644
--- a/feature_engine/variable_handling/_variable_type_checks.py
+++ b/feature_engine/variable_handling/_variable_type_checks.py
@@ -1,9 +1,7 @@
-import warnings
-
 import pandas as pd
+from pandas.api.types import is_string_dtype as is_object
 from pandas.core.dtypes.common import is_datetime64_any_dtype as is_datetime
 from pandas.core.dtypes.common import is_numeric_dtype as is_numeric
-from pandas.core.dtypes.common import is_object_dtype as is_object
 
 
 def _is_categorical_and_is_not_datetime(column: pd.Series) -> bool:
@@ -25,9 +23,11 @@ def _is_categories_num(column: pd.Series) -> bool:
 
 
 def _is_convertible_to_dt(column: pd.Series) -> bool:
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore")
-        return is_datetime(pd.to_datetime(column, errors="ignore", utc=True))
+    try:
+        var = pd.to_datetime(column, utc=True)
+        return is_datetime(var)
+    except:
+        return False
 
 
 def _is_convertible_to_num(column: pd.Series) -> bool:
diff --git a/tests/test_variable_handling/test_fe_type_checks.py b/tests/test_variable_handling/test_fe_type_checks.py
new file mode 100644
index 000000000..ecf553e90
--- /dev/null
+++ b/tests/test_variable_handling/test_fe_type_checks.py
@@ -0,0 +1,26 @@
+import pytest
+
+from feature_engine.variable_handling._variable_type_checks import (
+    _is_categorical_and_is_datetime,
+    _is_categorical_and_is_not_datetime,
+    _is_convertible_to_dt,
+
+)
+
+def test_is_convertible_to_num(df):
+    assert _is_convertible_to_dt(df["Name"]) is False
+    assert _is_convertible_to_dt(df["date_obj0"]) is True
+
+def test_is_convertible_to_dt(df):
+    assert _is_convertible_to_dt(df["date_obj0"]) is True
+    assert _is_convertible_to_dt(df["date_range"]) is True
+    assert _is_convertible_to_dt(df["Name"]) is False
+
+def test_is_categorical_and_is_datetime(df):
+    assert _is_categorical_and_is_datetime(df["date_obj0"]) is True
+    assert _is_categorical_and_is_datetime(df["Name"]) is False
+
+def test_is_categorical_and_is_not_datetime(df):
+    assert _is_categorical_and_is_not_datetime(df["date_obj0"]) is False
+    assert _is_categorical_and_is_not_datetime(df["date_obj0"]) is False
+    assert _is_categorical_and_is_not_datetime(df["Name"]) is True

From 6ff27aa30a115ae7354dcef2b692a1c72b0313a1 Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Tue, 27 Jan 2026 21:59:00 -0500
Subject: [PATCH 02/24] expand tests

---
 .../test_fe_type_checks.py                    | 26 ++++++++++++++-----
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/tests/test_variable_handling/test_fe_type_checks.py b/tests/test_variable_handling/test_fe_type_checks.py
index ecf553e90..b0e991617 100644
--- a/tests/test_variable_handling/test_fe_type_checks.py
+++ b/tests/test_variable_handling/test_fe_type_checks.py
@@ -1,26 +1,40 @@
-import pytest
-
 from feature_engine.variable_handling._variable_type_checks import (
     _is_categorical_and_is_datetime,
     _is_categorical_and_is_not_datetime,
     _is_convertible_to_dt,
-
+    _is_convertible_to_num,
+    _is_categories_num,
 )
 
 def test_is_convertible_to_num(df):
-    assert _is_convertible_to_dt(df["Name"]) is False
-    assert _is_convertible_to_dt(df["date_obj0"]) is True
+    assert _is_convertible_to_num(df["Name"]) is False
+    assert _is_convertible_to_num(df["date_obj0"]) is False
+
+    df["age_str"] = ["20", "21", "19", "18"]
+    assert _is_convertible_to_num(df["age_str"]) is True
+
 
 def test_is_convertible_to_dt(df):
     assert _is_convertible_to_dt(df["date_obj0"]) is True
     assert _is_convertible_to_dt(df["date_range"]) is True
     assert _is_convertible_to_dt(df["Name"]) is False
 
-def test_is_categorical_and_is_datetime(df):
+    df["age_str"] = ["20", "21", "19", "18"]
+    assert _is_convertible_to_dt(df["age_str"]) is False
+
+
+def test_is_categorical_and_is_datetime(df, df_datetime):
     assert _is_categorical_and_is_datetime(df["date_obj0"]) is True
     assert _is_categorical_and_is_datetime(df["Name"]) is False
+    assert _is_categorical_and_is_datetime(df_datetime["date_obj1"]) is True
+
+    df["age_str"] = ["20", "21", "19", "18"]
+    assert _is_categorical_and_is_datetime(df["age_str"]) is False
 
 def test_is_categorical_and_is_not_datetime(df):
     assert _is_categorical_and_is_not_datetime(df["date_obj0"]) is False
     assert _is_categorical_and_is_not_datetime(df["date_obj0"]) is False
     assert _is_categorical_and_is_not_datetime(df["Name"]) is True
+
+    df["age_str"] = ["20", "21", "19", "18"]
+    assert _is_categorical_and_is_not_datetime(df["age_str"]) is True
\ No newline at end of file

From 9d443033a03899f8df130cf2237f9e0ee1d792d0 Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Tue, 27 Jan 2026 22:16:41 -0500
Subject: [PATCH 03/24] expand tests

---
 tests/test_variable_handling/test_fe_type_checks.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/tests/test_variable_handling/test_fe_type_checks.py b/tests/test_variable_handling/test_fe_type_checks.py
index b0e991617..ad915b611 100644
--- a/tests/test_variable_handling/test_fe_type_checks.py
+++ b/tests/test_variable_handling/test_fe_type_checks.py
@@ -1,11 +1,19 @@
 from feature_engine.variable_handling._variable_type_checks import (
     _is_categorical_and_is_datetime,
     _is_categorical_and_is_not_datetime,
+    _is_categories_num,
     _is_convertible_to_dt,
     _is_convertible_to_num,
-    _is_categories_num,
 )
 
+
+def test_is_categories_num(df):
+    assert _is_categories_num(df["Name"]) is False
+
+    df["Age"] = df["Age"].astype("category")
+    assert _is_categories_num(df["Age"]) is True
+
+
 def test_is_convertible_to_num(df):
     assert _is_convertible_to_num(df["Name"]) is False
     assert _is_convertible_to_num(df["date_obj0"]) is False
@@ -31,10 +39,11 @@ def test_is_categorical_and_is_datetime(df, df_datetime):
     df["age_str"] = ["20", "21", "19", "18"]
     assert _is_categorical_and_is_datetime(df["age_str"]) is False
 
+
 def test_is_categorical_and_is_not_datetime(df):
     assert _is_categorical_and_is_not_datetime(df["date_obj0"]) is False
     assert _is_categorical_and_is_not_datetime(df["date_obj0"]) is False
     assert _is_categorical_and_is_not_datetime(df["Name"]) is True
 
     df["age_str"] = ["20", "21", "19", "18"]
-    assert _is_categorical_and_is_not_datetime(df["age_str"]) is True
\ No newline at end of file
+    assert _is_categorical_and_is_not_datetime(df["age_str"]) is True

From de4d663031123cb2c4c38d378388a1f9d82ba82b Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Tue, 27 Jan 2026 22:40:13 -0500
Subject: [PATCH 04/24] update fpr new pandas behaviour

---
 feature_engine/variable_handling/_variable_type_checks.py | 2 ++
 tests/test_variable_handling/test_fe_type_checks.py       | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/feature_engine/variable_handling/_variable_type_checks.py b/feature_engine/variable_handling/_variable_type_checks.py
index 044c2667d..fb54c997e 100644
--- a/feature_engine/variable_handling/_variable_type_checks.py
+++ b/feature_engine/variable_handling/_variable_type_checks.py
@@ -49,4 +49,6 @@ def _is_categorical_and_is_datetime(column: pd.Series) -> bool:
     elif isinstance(column.dtype, pd.CategoricalDtype):
         is_dt = not _is_categories_num(column) and _is_convertible_to_dt(column)
 
+    else:
+        is_dt = False
     return is_dt
diff --git a/tests/test_variable_handling/test_fe_type_checks.py b/tests/test_variable_handling/test_fe_type_checks.py
index ad915b611..86c5609b8 100644
--- a/tests/test_variable_handling/test_fe_type_checks.py
+++ b/tests/test_variable_handling/test_fe_type_checks.py
@@ -39,6 +39,12 @@ def test_is_categorical_and_is_datetime(df, df_datetime):
     df["age_str"] = ["20", "21", "19", "18"]
     assert _is_categorical_and_is_datetime(df["age_str"]) is False
 
+    df = df.copy()
+    # from pandas 3 onwards, object types that contain strings are not recognised as
+    # objects any more
+    df["Age"] = df["Age"].astype("O")
+    assert _is_categorical_and_is_datetime(df["Age"]) is False
+
 
 def test_is_categorical_and_is_not_datetime(df):
     assert _is_categorical_and_is_not_datetime(df["date_obj0"]) is False

From 7f8883911ca49c8216bd6afa2fe7e0e1fbb70146 Mon Sep 17 00:00:00 2001
From: Ankit Hemant Lade <ankitlade12@gmail.com>
Date: Fri, 6 Feb 2026 14:56:23 -0600
Subject: [PATCH 05/24] fix: Pandas 3 compatibility - robust dtype checks and
 test fixes (#885)

* fix: Pandas 3 compatibility - robust dtype checks and test fixes

- Fix UnboundLocalError in _variable_type_checks.py by initializing is_cat/is_dt
- Add robust dtype checking using both is_object_dtype and is_string_dtype
- Update find_variables.py with same robust logic for consistency
- Fix warning count assertions in encoder tests (Pandas 3 adds extra deprecation warnings)
- Fix floating point precision assertion in recursive feature elimination test
- Apply ruff formatting and fix linting errors
- All 1900 tests passing

* fix: Remove whitespace before colon in slice notation (flake8 E203)

* feat: finalize Pandas 3 compatibility fixes and test updates

* style: fix flake8 line length and linting issues

* style: fix remaining flake8 C416 issue

* Fix Pandas 3 regressions in check_y, _check_contains_inf, and StringSimilarityEncoder

* Fix E501 line too long in dataframe_checks.py

* Fix StringSimilarityEncoder NaN issues and fragile test assertions

* fix: Pandas 3 stability - mock datasets and fix FutureWarnings

* style: fix flake8 linting errors E501, E302, E305, SIM102

* test: improve patch coverage for Pandas 3 stability fixes

* style: fix E501 line too long in similarity encoder tests

* style: revert unrelated flake8 and formatting changes

* fix: restore Pandas 3 test logic and silence Pandas4Warning

* style: move numpy import to top of math_features.py

* style: fix spacing in MatchVariables verbose error message

* test: revert dynamic std values to hardcoded values in MathFeatures tests

* style: combine imports in _variable_type_checks.py

* refactor: centralize is_object function and use it across the codebase

* refactor: further simplify check_y dtype checks using is_object

* revert: remove unnecessary complexity in _check_contains_inf and associated tests

* docs: rename _normalize_func to _map_unnamed_func_to_str and add comments

* perf: optimize casting logic in SimilarityEncoder

* fix: address remaining code review feedback - follow sklearn convention for init params - make tests conditional on pandas version - restore encoder_dict_ assertion

* style: fix linting and follow sklearn convention for MathFeatures

* revert: remove california housing mock from conftest.py

* revert: restore original error message assertion in DatetimeFeatures test

* fix: use robust datetime normalization and flexible error assertions in tests
---
 feature_engine/creation/math_features.py      |  81 +++++++++----
 feature_engine/dataframe_checks.py            |   8 +-
 feature_engine/encoding/similarity_encoder.py |  47 +++++---
 feature_engine/preprocessing/match_columns.py |   9 +-
 .../timeseries/forecasting/lag_features.py    |   4 +-
 .../timeseries/forecasting/window_features.py |   2 +-
 .../_variable_type_checks.py                  |  36 +++---
 .../variable_handling/find_variables.py       |  18 +--
 tests/test_creation/test_math_features.py     |   3 -
 tests/test_dataframe_checks.py                |  37 ++++--
 tests/test_datetime/test_datetime_features.py |   4 +-
 tests/test_encoding/test_mean_encoder.py      |   9 +-
 tests/test_encoding/test_ordinal_encoder.py   |   9 +-
 .../test_encoding/test_similarity_encoder.py  |  25 ++++
 .../test_woe/test_woe_encoder.py              |   9 +-
 .../test_preprocessing/test_match_columns.py  |   6 +-
 .../test_recursive_feature_elimination.py     |   4 +-
 .../test_fe_type_checks.py                    |  38 ++++++
 tests/test_wrappers/test_sklearn_wrapper.py   | 109 +++++++++++++++++-
 19 files changed, 362 insertions(+), 96 deletions(-)

diff --git a/feature_engine/creation/math_features.py b/feature_engine/creation/math_features.py
index 35cbe73aa..5537c876f 100644
--- a/feature_engine/creation/math_features.py
+++ b/feature_engine/creation/math_features.py
@@ -1,5 +1,6 @@
 from typing import Any, List, Optional, Union
 
+import numpy as np
 import pandas as pd
 
 from feature_engine._docstrings.fit_attributes import (
@@ -140,7 +141,6 @@ def __init__(
         missing_values: str = "raise",
         drop_original: bool = False,
     ) -> None:
-
         if (
             not isinstance(variables, list)
             or not all(isinstance(var, (int, str)) for var in variables)
@@ -157,16 +157,15 @@ def __init__(
                 "func does not work with dictionaries in this transformer."
             )
 
-        if new_variables_names is not None:
-            if (
-                not isinstance(new_variables_names, list)
-                or not all(isinstance(var, str) for var in new_variables_names)
-                or len(set(new_variables_names)) != len(new_variables_names)
-            ):
-                raise ValueError(
-                    "new_variable_names should be None or a list of unique strings. "
-                    f"Got {new_variables_names} instead."
-                )
+        if new_variables_names is not None and (
+            not isinstance(new_variables_names, list)
+            or not all(isinstance(var, str) for var in new_variables_names)
+            or len(set(new_variables_names)) != len(new_variables_names)
+        ):
+            raise ValueError(
+                "new_variable_names should be None or a list of unique strings. "
+                f"Got {new_variables_names} instead."
+            )
 
         if new_variables_names is not None:
             if isinstance(func, list):
@@ -175,12 +174,11 @@ def __init__(
                         "The number of new feature names must coincide with the number "
                         "of functions."
                     )
-            else:
-                if len(new_variables_names) != 1:
-                    raise ValueError(
-                        "The number of new feature names must coincide with the number "
-                        "of functions."
-                    )
+            elif len(new_variables_names) != 1:
+                raise ValueError(
+                    "The number of new feature names must coincide with the number "
+                    "of functions."
+                )
 
         super().__init__(missing_values, drop_original)
 
@@ -188,6 +186,45 @@ def __init__(
         self.func = func
         self.new_variables_names = new_variables_names
 
+    def _map_unnamed_func_to_str(self, func: Any) -> Any:
+        if isinstance(func, list):
+            return [self._map_unnamed_func_to_str(f) for f in func]
+
+        # We map certain numpy functions to their string alias.
+        # This serves two purposes:
+        # 1) It avoids a FutureWarning in pandas 2.1+ which recommends
+        # using the string alias for better performance and future-proofing.
+        # 2) It ensures consistent column naming (e.g. "sum_x1_x2")
+        # regardless of how the function was passed (np.sum vs "sum").
+        map_dict = {
+            np.sum: "sum",
+            np.mean: "mean",
+            np.std: "std",
+            np.min: "min",
+            np.max: "max",
+            np.median: "median",
+            np.prod: "prod",
+        }
+        return map_dict.get(func, func)
+
+    def fit(self, X: pd.DataFrame, y=None):
+        """
+        This method does not learn any parameters. It just stores the normalized
+        function representation.
+
+        Parameters
+        ----------
+        X: pandas dataframe of shape = [n_samples, n_features]
+            The training input samples.
+
+        y: pandas Series, or np.array. Defaults to None.
+            It is not needed in this transformer. You can pass y or None.
+        """
+        super().fit(X, y)
+        # Normalize func to func_ (sklearn convention: don't modify init params)
+        self.func_ = self._map_unnamed_func_to_str(self.func)
+        return self
+
     def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         """
         Create and add new variables.
@@ -207,9 +244,9 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         new_variable_names = self._get_new_features_name()
 
         if len(new_variable_names) == 1:
-            X[new_variable_names[0]] = X[self.variables].agg(self.func, axis=1)
+            X[new_variable_names[0]] = X[self.variables].agg(self.func_, axis=1)
         else:
-            X[new_variable_names] = X[self.variables].agg(self.func, axis=1)
+            X[new_variable_names] = X[self.variables].agg(self.func_, axis=1)
 
         if self.drop_original:
             X.drop(columns=self.variables, inplace=True)
@@ -226,14 +263,14 @@ def _get_new_features_name(self) -> List:
         else:
             varlist = [f"{var}" for var in self.variables_]
 
-            if isinstance(self.func, list):
+            if isinstance(self.func_, list):
                 functions = [
-                    fun if type(fun) is str else fun.__name__ for fun in self.func
+                    fun if type(fun) is str else fun.__name__ for fun in self.func_
                 ]
                 feature_names = [
                     f"{function}_{'_'.join(varlist)}" for function in functions
                 ]
             else:
-                feature_names = [f"{self.func}_{'_'.join(varlist)}"]
+                feature_names = [f"{self.func_}_{'_'.join(varlist)}"]
 
         return feature_names
diff --git a/feature_engine/dataframe_checks.py b/feature_engine/dataframe_checks.py
index 2d41727f7..9ef9b3f82 100644
--- a/feature_engine/dataframe_checks.py
+++ b/feature_engine/dataframe_checks.py
@@ -9,6 +9,8 @@
 from scipy.sparse import issparse
 from sklearn.utils.validation import _check_y, check_consistent_length, column_or_1d
 
+from feature_engine.variable_handling._variable_type_checks import is_object
+
 
 def check_X(X: Union[np.generic, np.ndarray, pd.DataFrame]) -> pd.DataFrame:
     """
@@ -121,10 +123,10 @@ def check_y(
     elif isinstance(y, pd.Series):
         if y.isnull().any():
             raise ValueError("y contains NaN values.")
-        if y.dtype != "O" and not np.isfinite(y).all():
+        if not is_object(y) and not np.isfinite(y).all():
             raise ValueError("y contains infinity values.")
-        if y_numeric and y.dtype == "O":
-            y = y.astype("float")
+        if y_numeric and is_object(y):
+            y = y.astype("float64")
         y = y.copy()
 
     elif isinstance(y, pd.DataFrame):
diff --git a/feature_engine/encoding/similarity_encoder.py b/feature_engine/encoding/similarity_encoder.py
index 137034ddb..2599d2f91 100644
--- a/feature_engine/encoding/similarity_encoder.py
+++ b/feature_engine/encoding/similarity_encoder.py
@@ -232,12 +232,13 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
         X = check_X(X)
         variables_ = self._check_or_select_variables(X)
 
-        if self.keywords:
-            if not all(item in variables_ for item in self.keywords.keys()):
-                raise ValueError(
-                    "There are variables in keywords that are not present "
-                    "in the dataset."
-                )
+        if self.keywords and not all(
+            item in variables_ for item in self.keywords.keys()
+        ):
+            raise ValueError(
+                "There are variables in keywords that are not present "
+                "in the dataset."
+            )
 
         # if data contains nan, fail before running any logic
         if self.missing_values == "raise":
@@ -262,10 +263,10 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
                 )
         elif self.missing_values == "impute":
             for var in cols_to_iterate:
+                series = X[var]
                 self.encoder_dict_[var] = (
-                    X[var]
-                    .astype(str)
-                    .replace("nan", "")
+                    series.astype(str)
+                    .mask(series.isna(), "")
                     .value_counts()
                     .head(self.top_categories)
                     .index.tolist()
@@ -276,7 +277,7 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
                     X[var]
                     .astype(str)
                     .value_counts(dropna=True)
-                    .drop("nan", errors="ignore")
+                    .drop(["nan", "<NA>"], errors="ignore")
                     .head(self.top_categories)
                     .index.tolist()
                 )
@@ -316,13 +317,31 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         new_values = []
         for var in self.variables_:
             if self.missing_values == "impute":
-                X[var] = X[var].astype(str).replace("nan", "")
-            categories = X[var].dropna().astype(str).unique()
+                series = X[var]
+                series = series.astype(str).mask(series.isna(), "")
+            else:
+                series = X[var].astype(str)
+
+            categories = series.unique()
             column_encoder_dict = {
                 x: _gpm_fast_vec(x, self.encoder_dict_[var]) for x in categories
             }
-            column_encoder_dict["nan"] = [np.nan] * len(self.encoder_dict_[var])
-            encoded = np.vstack(X[var].astype(str).map(column_encoder_dict).values)
+            # Ensure map result is always an array of the correct size.
+            # Missing values in categories or unknown categories will map to NaN.
+            default_nan = [np.nan] * len(self.encoder_dict_[var])
+            if "nan" not in column_encoder_dict:
+                column_encoder_dict["nan"] = default_nan
+            if "<NA>" not in column_encoder_dict:
+                column_encoder_dict["<NA>"] = default_nan
+
+            encoded_series = series.map(column_encoder_dict)
+
+            # Robust stacking: replace any float NaNs (from unknown values) with arrays
+            encoded_list = [
+                v if isinstance(v, (list, np.ndarray)) else default_nan
+                for v in encoded_series
+            ]
+            encoded = np.vstack(encoded_list)
             if self.missing_values == "ignore":
                 encoded[X[var].isna(), :] = np.nan
             new_values.append(encoded)
diff --git a/feature_engine/preprocessing/match_columns.py b/feature_engine/preprocessing/match_columns.py
index c5321b6c3..da34f5e9c 100644
--- a/feature_engine/preprocessing/match_columns.py
+++ b/feature_engine/preprocessing/match_columns.py
@@ -175,7 +175,7 @@ def __init__(
 
         if not isinstance(verbose, bool):
             raise ValueError(
-                "verbose takes only booleans True and False." f"Got '{verbose} instead."
+                f"verbose takes only booleans True and False. Got '{verbose} instead."
             )
 
         # note: np.nan is an instance of float!!!
@@ -262,7 +262,12 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
 
         X = X.drop(_columns_to_drop, axis=1)
 
-        X = X.reindex(columns=self.feature_names_in_, fill_value=self.fill_value)
+        # Add missing columns one at a time to avoid Pandas 3 StringDtype reindex issue
+        for col in _columns_to_add:
+            X[col] = self.fill_value
+
+        # Reorder columns to match training set, without fill_value to avoid issues
+        X = X[self.feature_names_in_]
 
         if self.match_dtypes:
             _current_dtypes = X.dtypes.to_dict()
diff --git a/feature_engine/timeseries/forecasting/lag_features.py b/feature_engine/timeseries/forecasting/lag_features.py
index 7ed7ed200..ee9c1c151 100644
--- a/feature_engine/timeseries/forecasting/lag_features.py
+++ b/feature_engine/timeseries/forecasting/lag_features.py
@@ -201,7 +201,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
                         axis=0,
                     )
                     df_ls.append(tmp)
-                tmp = pd.concat(df_ls, axis=1)
+                tmp = pd.concat(df_ls, axis=1, sort=False)
 
             else:
                 tmp = X[self.variables_].shift(
@@ -219,7 +219,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
                         axis=0,
                     )
                     df_ls.append(tmp)
-                tmp = pd.concat(df_ls, axis=1)
+                tmp = pd.concat(df_ls, axis=1, sort=False)
 
             else:
                 tmp = X[self.variables_].shift(
diff --git a/feature_engine/timeseries/forecasting/window_features.py b/feature_engine/timeseries/forecasting/window_features.py
index 47071efa7..a1e526c3e 100644
--- a/feature_engine/timeseries/forecasting/window_features.py
+++ b/feature_engine/timeseries/forecasting/window_features.py
@@ -219,7 +219,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
                     .shift(periods=self.periods, freq=self.freq)
                 )
                 df_ls.append(tmp)
-            tmp = pd.concat(df_ls, axis=1)
+            tmp = pd.concat(df_ls, axis=1, sort=False)
 
         else:
             tmp = (
diff --git a/feature_engine/variable_handling/_variable_type_checks.py b/feature_engine/variable_handling/_variable_type_checks.py
index fb54c997e..3427c60be 100644
--- a/feature_engine/variable_handling/_variable_type_checks.py
+++ b/feature_engine/variable_handling/_variable_type_checks.py
@@ -1,20 +1,25 @@
 import pandas as pd
-from pandas.api.types import is_string_dtype as is_object
+from pandas.api.types import is_object_dtype, is_string_dtype
 from pandas.core.dtypes.common import is_datetime64_any_dtype as is_datetime
 from pandas.core.dtypes.common import is_numeric_dtype as is_numeric
 
 
-def _is_categorical_and_is_not_datetime(column: pd.Series) -> bool:
-    # check for datetime only if object cannot be cast as numeric because
-    # if it could pd.to_datetime would convert it to datetime regardless
-    if is_object(column):
-        is_cat = _is_convertible_to_num(column) or not _is_convertible_to_dt(column)
+def is_object(s) -> bool:
+    return is_object_dtype(s) or is_string_dtype(s)
+
 
+def _is_categorical_and_is_not_datetime(column: pd.Series) -> bool:
+    is_cat = False
     # check for datetime only if the type of the categories is not numeric
     # because pd.to_datetime throws an error when it is an integer
-    elif isinstance(column.dtype, pd.CategoricalDtype):
+    if isinstance(column.dtype, pd.CategoricalDtype):
         is_cat = _is_categories_num(column) or not _is_convertible_to_dt(column)
 
+    # check for datetime only if object cannot be cast as numeric because
+    # if it could pd.to_datetime would convert it to datetime regardless
+    elif is_object(column):
+        is_cat = _is_convertible_to_num(column) or not _is_convertible_to_dt(column)
+
     return is_cat
 
 
@@ -26,7 +31,7 @@ def _is_convertible_to_dt(column: pd.Series) -> bool:
     try:
         var = pd.to_datetime(column, utc=True)
         return is_datetime(var)
-    except:
+    except Exception:
         return False
 
 
@@ -39,16 +44,15 @@ def _is_convertible_to_num(column: pd.Series) -> bool:
 
 
 def _is_categorical_and_is_datetime(column: pd.Series) -> bool:
-    # check for datetime only if object cannot be cast as numeric because
-    # if it could pd.to_datetime would convert it to datetime regardless
-    if is_object(column):
-        is_dt = not _is_convertible_to_num(column) and _is_convertible_to_dt(column)
-
+    is_dt = False
     # check for datetime only if the type of the categories is not numeric
     # because pd.to_datetime throws an error when it is an integer
-    elif isinstance(column.dtype, pd.CategoricalDtype):
+    if isinstance(column.dtype, pd.CategoricalDtype):
         is_dt = not _is_categories_num(column) and _is_convertible_to_dt(column)
 
-    else:
-        is_dt = False
+    # check for datetime only if object cannot be cast as numeric because
+    # if it could pd.to_datetime would convert it to datetime regardless
+    elif is_object(column):
+        is_dt = not _is_convertible_to_num(column) and _is_convertible_to_dt(column)
+
     return is_dt
diff --git a/feature_engine/variable_handling/find_variables.py b/feature_engine/variable_handling/find_variables.py
index 04779ad5d..72e17d9ef 100644
--- a/feature_engine/variable_handling/find_variables.py
+++ b/feature_engine/variable_handling/find_variables.py
@@ -5,11 +5,11 @@
 import pandas as pd
 from pandas.api.types import is_datetime64_any_dtype as is_datetime
 from pandas.core.dtypes.common import is_numeric_dtype as is_numeric
-from pandas.core.dtypes.common import is_object_dtype as is_object
 
 from feature_engine.variable_handling._variable_type_checks import (
     _is_categorical_and_is_datetime,
     _is_categorical_and_is_not_datetime,
+    is_object,
 )
 from feature_engine.variable_handling.dtypes import DATETIME_TYPES
 
@@ -85,7 +85,9 @@ def find_categorical_variables(X: pd.DataFrame) -> List[Union[str, int]]:
     """
     variables = [
         column
-        for column in X.select_dtypes(include=["O", "category"]).columns
+        for column in X.select_dtypes(
+            include=["O", "category", "string"]
+        ).columns
         if _is_categorical_and_is_not_datetime(X[column])
     ]
     if len(variables) == 0:
@@ -254,7 +256,9 @@ def find_categorical_and_numerical_variables(
         if variables is None:
             variables_cat = [
                 column
-                for column in X.select_dtypes(include=["O", "category"]).columns
+                for column in X.select_dtypes(
+                    include=["O", "category", "string"]
+                ).columns
                 if _is_categorical_and_is_not_datetime(X[column])
             ]
         # find numerical variables in dataset
@@ -271,14 +275,14 @@ def find_categorical_and_numerical_variables(
             raise ValueError("The list of variables is empty.")
 
         # find categorical variables
-        variables_cat = [
-            var for var in X[variables].select_dtypes(include=["O", "category"]).columns
-        ]
+        variables_cat = list(
+            X[variables].select_dtypes(include=["O", "category", "string"]).columns
+        )
 
         # find numerical variables
         variables_num = list(X[variables].select_dtypes(include="number").columns)
 
-        if any([v for v in variables if v not in variables_cat + variables_num]):
+        if any(v for v in variables if v not in variables_cat + variables_num):
             raise TypeError(
                 "Some of the variables are neither numerical nor categorical."
             )
diff --git a/tests/test_creation/test_math_features.py b/tests/test_creation/test_math_features.py
index f65e932ee..6a5590019 100644
--- a/tests/test_creation/test_math_features.py
+++ b/tests/test_creation/test_math_features.py
@@ -237,7 +237,6 @@ def test_variable_names_when_df_cols_are_integers(df_numeric_columns):
 
 
 def test_error_when_null_values_in_variable(df_vartypes):
-
     df_na = df_vartypes.copy()
     df_na.loc[1, "Age"] = np.nan
 
@@ -256,7 +255,6 @@ def test_error_when_null_values_in_variable(df_vartypes):
 
 
 def test_no_error_when_null_values_in_variable(df_vartypes):
-
     df_na = df_vartypes.copy()
     df_na.loc[1, "Age"] = np.nan
 
@@ -323,7 +321,6 @@ def test_get_feature_names_out(_varnames, _drop, df_vartypes):
 @pytest.mark.parametrize("_varnames", [None, ["var1", "var2"]])
 @pytest.mark.parametrize("_drop", [True, False])
 def test_get_feature_names_out_from_pipeline(_varnames, _drop, df_vartypes):
-
     # set up transformer
     transformer = MathFeatures(
         variables=["Age", "Marks"],
diff --git a/tests/test_dataframe_checks.py b/tests/test_dataframe_checks.py
index d38e7cd54..09cd22ccf 100644
--- a/tests/test_dataframe_checks.py
+++ b/tests/test_dataframe_checks.py
@@ -249,22 +249,43 @@ def test_optional_contains_na(df_na):
 
 
 def test_contains_inf(df_na):
-    df_na.fillna(np.inf, inplace=True)
+    # Test numeric column with inf
+    df_num_inf = pd.DataFrame({"A": [1.1, np.inf, 3.3]})
     with pytest.raises(ValueError):
-        assert _check_contains_inf(df_na, ["Age", "Marks"])
+        _check_contains_inf(df_num_inf, ["A"])
+
+    # Test numeric column WITHOUT inf
+    df_num_no_inf = pd.DataFrame({"A": [1.1, 2.2, 3.3]})
+    _check_contains_inf(df_num_no_inf, ["A"])
 
 
 def test_check_X_raises_error_on_duplicated_column_names():
     df = pd.DataFrame(
         {
-            "col1": [1, 2, 3],
-            "col2": ["a", "b", "c"],
-            "col3": pd.date_range("2023-01-01", periods=3),
+            "Name": ["tom", "nick", "krish", "jack"],
+            "City": ["London", "Manchester", "Liverpool", "Bristol"],
+            "Age": [20, 21, 19, 18],
+            "Marks": [0.9, 0.8, 0.7, 0.6],
         }
     )
-    df.columns = ["same", "unique", "same"]
-
+    df.columns = ["var_A", "var_A", "var_B", "var_C"]
     with pytest.raises(ValueError) as err_txt:
         check_X(df)
-
     assert err_txt.match("Input data contains duplicated variable names.")
+
+
+def test_check_X_errors():
+    # Test scalar array error (line 58)
+    with pytest.raises(ValueError) as record:
+        check_X(np.array(1))
+    assert record.match("Expected 2D array, got scalar array instead")
+
+    # Test 1D array error (line 65)
+    with pytest.raises(ValueError) as record:
+        check_X(np.array([1, 2, 3]))
+    assert record.match("Expected 2D array, got 1D array instead")
+
+    # Test incorrect type error (line 80)
+    with pytest.raises(TypeError) as record:
+        check_X("not a dataframe")
+    assert record.match("X must be a numpy array or pandas dataframe")
diff --git a/tests/test_datetime/test_datetime_features.py b/tests/test_datetime/test_datetime_features.py
index 1d95ffe83..456f41e84 100644
--- a/tests/test_datetime/test_datetime_features.py
+++ b/tests/test_datetime/test_datetime_features.py
@@ -336,13 +336,13 @@ def test_extract_features_from_different_timezones():
     )
     exp_err_msg = (
         "Tz-aware datetime.datetime cannot be converted to datetime64 "
-        "unless utc=True, at position 3"
+        "unless utc=True"
     )
     with pytest.raises(ValueError) as errinfo:
         assert DatetimeFeatures(
             variables="time", features_to_extract=["hour"], utc=False
         ).fit_transform(df)
-    assert str(errinfo.value) == exp_err_msg
+    assert exp_err_msg in str(errinfo.value)
 
 
 def test_extract_features_from_different_timezones_when_string(
diff --git a/tests/test_encoding/test_mean_encoder.py b/tests/test_encoding/test_mean_encoder.py
index 1026936be..a13d0e5bf 100644
--- a/tests/test_encoding/test_mean_encoder.py
+++ b/tests/test_encoding/test_mean_encoder.py
@@ -183,10 +183,11 @@ def test_warning_if_transform_df_contains_categories_not_present_in_fit_df(
         encoder.fit(df_enc[["var_A", "var_B"]], df_enc["target"])
         encoder.transform(df_enc_rare[["var_A", "var_B"]])
 
-    # check that only one warning was raised
-    assert len(record) == 1
+    # check that at least one warning was raised (Pandas 3 may emit additional
+    # deprecation warnings)
+    assert len(record) >= 1
     # check that the message matches
-    assert record[0].message.args[0] == msg
+    assert any(r.message.args[0] == msg for r in record)
 
     # check for error when rare_labels equals 'raise'
     with pytest.raises(ValueError) as record:
@@ -364,7 +365,7 @@ def test_variables_cast_as_category(df_enc_category_dtypes):
     ]
 
     pd.testing.assert_frame_equal(X, transf_df[["var_A", "var_B"]], check_dtype=False)
-    assert X["var_A"].dtypes == float
+    assert X["var_A"].dtypes.name == "float64"
 
 
 def test_auto_smoothing(df_enc):
diff --git a/tests/test_encoding/test_ordinal_encoder.py b/tests/test_encoding/test_ordinal_encoder.py
index ae7705643..e447c4176 100644
--- a/tests/test_encoding/test_ordinal_encoder.py
+++ b/tests/test_encoding/test_ordinal_encoder.py
@@ -138,10 +138,11 @@ def test_error_if_input_df_contains_categories_not_present_in_training_df(
         encoder.fit(df_enc[["var_A", "var_B"]], df_enc["target"])
         encoder.transform(df_enc_rare[["var_A", "var_B"]])
 
-    # check that only one warning was raised
-    assert len(record) == 1
+    # check that at least one warning was raised (Pandas 3 may emit additional
+    # deprecation warnings)
+    assert len(record) >= 1
     # check that the message matches
-    assert record[0].message.args[0] == msg
+    assert any(r.message.args[0] == msg for r in record)
 
     # check for error when rare_labels equals 'raise'
     with pytest.raises(ValueError) as record:
@@ -243,7 +244,7 @@ def test_variables_cast_as_category(df_enc_category_dtypes):
 
     # test transform output
     pd.testing.assert_frame_equal(X, transf_df[["var_A", "var_B"]], check_dtype=False)
-    assert X["var_A"].dtypes == int
+    assert X["var_A"].dtypes.name == "int64"
 
 
 @pytest.mark.parametrize(
diff --git a/tests/test_encoding/test_similarity_encoder.py b/tests/test_encoding/test_similarity_encoder.py
index 3e74b3717..f32ac3823 100644
--- a/tests/test_encoding/test_similarity_encoder.py
+++ b/tests/test_encoding/test_similarity_encoder.py
@@ -150,6 +150,30 @@ def test_nan_behaviour_ignore(df_enc_big_na):
     }
 
 
+def test_string_dtype_with_pd_na():
+    # Test StringDtype with pd.NA to hit "<NA>" branch in transform
+    df = pd.DataFrame({"var_A": ["A", "B", pd.NA]}, dtype="string")
+    encoder = StringSimilarityEncoder(missing_values="impute")
+    X = encoder.fit_transform(df)
+    assert (X.isna().sum() == 0).all(axis=None)
+    # The categories will include "<NA>" or the string version of it
+    assert (
+        "<NA>" in encoder.encoder_dict_["var_A"]
+        or "" in encoder.encoder_dict_["var_A"]
+    )
+
+
+def test_string_dtype_with_literal_nan_strings():
+    # Test with literal "nan" and "<NA>" strings to hit skips in
+    # transform (line 339, 341 False)
+    df = pd.DataFrame({"var_A": ["nan", "<NA>", "A", "B"]}, dtype="string")
+    encoder = StringSimilarityEncoder(missing_values="impute")
+    X = encoder.fit_transform(df)
+    assert (X.isna().sum() == 0).all(axis=None)
+    assert "nan" in encoder.encoder_dict_["var_A"]
+    assert "<NA>" in encoder.encoder_dict_["var_A"]
+
+
 def test_inverse_transform_error(df_enc_big):
     encoder = StringSimilarityEncoder()
     X = encoder.fit_transform(df_enc_big)
@@ -237,6 +261,7 @@ def test_get_feature_names_out_na(df_enc_big_na):
         "var_C_F",
     ]
 
+    # NaN values are replaced with empty string "" before string conversion
     assert tr.encoder_dict_ == {
         "var_A": ["B", "D", "G", "A", "C", "E", "F", ""],
         "var_B": ["A", "D", "B", "G", "C", "E", "F"],
diff --git a/tests/test_encoding/test_woe/test_woe_encoder.py b/tests/test_encoding/test_woe/test_woe_encoder.py
index 44181c5d7..a38caa6fa 100644
--- a/tests/test_encoding/test_woe/test_woe_encoder.py
+++ b/tests/test_encoding/test_woe/test_woe_encoder.py
@@ -149,10 +149,11 @@ def test_warn_if_transform_df_contains_categories_not_seen_in_fit(df_enc, df_enc
         encoder.fit(df_enc[["var_A", "var_B"]], df_enc["target"])
         encoder.transform(df_enc_rare[["var_A", "var_B"]])
 
-    # check that only one warning was raised
-    assert len(record) == 1
+    # check that at least one warning was raised (Pandas 3 may emit additional
+    # deprecation warnings)
+    assert len(record) >= 1
     # check that the message matches
-    assert record[0].message.args[0] == msg
+    assert any(r.message.args[0] == msg for r in record)
 
     # check for error when rare_labels equals 'raise'
     with pytest.raises(ValueError) as record:
@@ -389,7 +390,7 @@ def test_variables_cast_as_category(df_enc_category_dtypes):
     transf_df["var_B"] = VAR_B
 
     pd.testing.assert_frame_equal(X, transf_df[["var_A", "var_B"]], check_dtype=False)
-    assert X["var_A"].dtypes == float
+    assert X["var_A"].dtypes.name == "float64"
 
 
 @pytest.mark.parametrize(
diff --git a/tests/test_preprocessing/test_match_columns.py b/tests/test_preprocessing/test_match_columns.py
index 16ee0633d..6726b33f9 100644
--- a/tests/test_preprocessing/test_match_columns.py
+++ b/tests/test_preprocessing/test_match_columns.py
@@ -189,7 +189,11 @@ def test_match_dtypes_string_to_datetime(df_vartypes):
     assert match_columns.match_dtypes is True
     assert match_columns.verbose is False
     # test fit attrs
-    assert match_columns.dtype_dict_ == {"dob": np.dtype("<M8[ns]")}
+    # TODO: Remove pandas < 3 support when dropping older pandas versions
+    if pd.__version__ >= "3":
+        assert match_columns.dtype_dict_ == {"dob": np.dtype("<M8[us]")}
+    else:
+        assert match_columns.dtype_dict_ == {"dob": np.dtype("<M8[ns]")}
     # test transform output
     pd.testing.assert_series_equal(train.dtypes, transformed_df.dtypes)
     pd.testing.assert_frame_equal(transformed_df, train)
diff --git a/tests/test_selection/test_recursive_feature_elimination.py b/tests/test_selection/test_recursive_feature_elimination.py
index 598efba4e..27eb689f9 100644
--- a/tests/test_selection/test_recursive_feature_elimination.py
+++ b/tests/test_selection/test_recursive_feature_elimination.py
@@ -101,7 +101,9 @@ def test_classification(
     rounded_perfs = {
         key: round(sel.performance_drifts_[key], 4) for key in sel.performance_drifts_
     }
-    assert rounded_perfs == performances
+    assert rounded_perfs.keys() == performances.keys()
+    for key in performances:
+        assert rounded_perfs[key] == pytest.approx(performances[key], abs=0.001)
 
     # test transform output
     pd.testing.assert_frame_equal(sel.transform(X), Xtransformed)
diff --git a/tests/test_variable_handling/test_fe_type_checks.py b/tests/test_variable_handling/test_fe_type_checks.py
index 86c5609b8..de4bc2d38 100644
--- a/tests/test_variable_handling/test_fe_type_checks.py
+++ b/tests/test_variable_handling/test_fe_type_checks.py
@@ -1,3 +1,5 @@
+import pandas as pd
+
 from feature_engine.variable_handling._variable_type_checks import (
     _is_categorical_and_is_datetime,
     _is_categorical_and_is_not_datetime,
@@ -45,6 +47,22 @@ def test_is_categorical_and_is_datetime(df, df_datetime):
     df["Age"] = df["Age"].astype("O")
     assert _is_categorical_and_is_datetime(df["Age"]) is False
 
+    # Object Datetime
+    s_obj_dt = pd.Series([pd.Timestamp("2020-01-01")], dtype="object")
+    assert _is_categorical_and_is_datetime(s_obj_dt) is True
+
+    # StringDtype Datetime (if convertible)
+    s_str_dt = pd.Series(["2020-01-01", "2020-01-02"], dtype="string")
+    assert _is_categorical_and_is_datetime(s_str_dt) is True
+
+    # Numeric (should be False for both if and elif branches)
+    s_num = pd.Series([1, 2, 3])
+    assert _is_categorical_and_is_datetime(s_num) is False
+
+    # Categorical (should hit the 'if' branch)
+    s_cat = pd.Series(["a", "b"], dtype="category")
+    assert _is_categorical_and_is_datetime(s_cat) is False
+
 
 def test_is_categorical_and_is_not_datetime(df):
     assert _is_categorical_and_is_not_datetime(df["date_obj0"]) is False
@@ -53,3 +71,23 @@ def test_is_categorical_and_is_not_datetime(df):
 
     df["age_str"] = ["20", "21", "19", "18"]
     assert _is_categorical_and_is_not_datetime(df["age_str"]) is True
+
+    # Object Integer
+    s_obj_int = pd.Series([1, 2], dtype="object")
+    assert _is_categorical_and_is_not_datetime(s_obj_int) is True
+
+    # Object Datetime should be False
+    s_obj_dt = pd.Series([pd.Timestamp("2020-01-01")], dtype="object")
+    assert _is_categorical_and_is_not_datetime(s_obj_dt) is False
+
+    # StringDtype (not convertible to numeric/datetime) should be True
+    s_str = pd.Series(["a", "b"], dtype="string")
+    assert _is_categorical_and_is_not_datetime(s_str) is True
+
+    # Numeric should be False
+    s_num = pd.Series([1, 2, 3])
+    assert _is_categorical_and_is_not_datetime(s_num) is False
+
+    # Categorical should be True (it hits the 'if' branch)
+    s_cat = pd.Series(["a", "b"], dtype="category")
+    assert _is_categorical_and_is_not_datetime(s_cat) is True
diff --git a/tests/test_wrappers/test_sklearn_wrapper.py b/tests/test_wrappers/test_sklearn_wrapper.py
index e825a7bc0..cd5cccd01 100644
--- a/tests/test_wrappers/test_sklearn_wrapper.py
+++ b/tests/test_wrappers/test_sklearn_wrapper.py
@@ -345,6 +345,27 @@ def test_sklearn_ohe_object_one_feature(df_vartypes):
 
     transformed_df = transformer.fit_transform(df_vartypes[variables_to_encode])
 
+    # TODO: Remove pandas < 3 support when dropping older pandas versions
+    if pd.__version__ >= "3":
+        # Pandas 3 uses microseconds format
+        transformed_df.columns = [
+            c.replace(".000000000", "").replace(".000000", "")
+            for c in transformed_df.columns
+        ]
+        ref.columns = [
+            c.replace(".000000000", "").replace(".000000", "")
+            for c in ref.columns
+        ]
+    else:
+        # Pandas 2 uses nanoseconds format
+        transformed_df.columns = [
+            c.replace(".000000000", "").replace(".000000", "")
+            for c in transformed_df.columns
+        ]
+        ref.columns = [
+            c.replace(".000000000", "").replace(".000000", "")
+            for c in ref.columns
+        ]
     pd.testing.assert_frame_equal(ref, transformed_df)
 
 
@@ -371,6 +392,27 @@ def test_sklearn_ohe_object_many_features(df_vartypes):
 
     transformed_df = transformer.fit_transform(df_vartypes[variables_to_encode])
 
+    # TODO: Remove pandas < 3 support when dropping older pandas versions
+    if pd.__version__ >= "3":
+        # Pandas 3 uses microseconds format
+        transformed_df.columns = [
+            c.replace(".000000000", "").replace(".000000", "")
+            for c in transformed_df.columns
+        ]
+        ref.columns = [
+            c.replace(".000000000", "").replace(".000000", "")
+            for c in ref.columns
+        ]
+    else:
+        # Pandas 2 uses nanoseconds format
+        transformed_df.columns = [
+            c.replace(".000000000", "").replace(".000000", "")
+            for c in transformed_df.columns
+        ]
+        ref.columns = [
+            c.replace(".000000000", "").replace(".000000", "")
+            for c in ref.columns
+        ]
     pd.testing.assert_frame_equal(ref, transformed_df)
 
 
@@ -393,6 +435,27 @@ def test_sklearn_ohe_numeric(df_vartypes):
 
     transformed_df = transformer.fit_transform(df_vartypes[variables_to_encode])
 
+    # TODO: Remove pandas < 3 support when dropping older pandas versions
+    if pd.__version__ >= "3":
+        # Pandas 3 uses microseconds format
+        transformed_df.columns = [
+            c.replace(".000000000", "").replace(".000000", "")
+            for c in transformed_df.columns
+        ]
+        ref.columns = [
+            c.replace(".000000000", "").replace(".000000", "")
+            for c in ref.columns
+        ]
+    else:
+        # Pandas 2 uses nanoseconds format
+        transformed_df.columns = [
+            c.replace(".000000000", "").replace(".000000", "")
+            for c in transformed_df.columns
+        ]
+        ref.columns = [
+            c.replace(".000000000", "").replace(".000000", "")
+            for c in ref.columns
+        ]
     pd.testing.assert_frame_equal(ref, transformed_df)
 
 
@@ -428,6 +491,27 @@ def test_sklearn_ohe_all_features(df_vartypes):
 
     transformed_df = transformer.fit_transform(df_vartypes)
 
+    # TODO: Remove pandas < 3 support when dropping older pandas versions
+    if pd.__version__ >= "3":
+        # Pandas 3 uses microseconds format
+        transformed_df.columns = [
+            c.replace(".000000000", "").replace(".000000", "")
+            for c in transformed_df.columns
+        ]
+        ref.columns = [
+            c.replace(".000000000", "").replace(".000000", "")
+            for c in ref.columns
+        ]
+    else:
+        # Pandas 2 uses nanoseconds format
+        transformed_df.columns = [
+            c.replace(".000000000", "").replace(".000000", "")
+            for c in transformed_df.columns
+        ]
+        ref.columns = [
+            c.replace(".000000000", "").replace(".000000", "")
+            for c in ref.columns
+        ]
     pd.testing.assert_frame_equal(ref, transformed_df)
 
 
@@ -466,7 +550,7 @@ def test_sklearn_ohe_with_crossvalidation():
     results: np.ndarray = cross_val_score(
         pipeline, X, y, scoring="neg_mean_squared_error", cv=3
     )
-    assert not any([np.isnan(i) for i in results])
+    assert not any(np.isnan(i) for i in results)
 
 
 def test_wrap_one_hot_encoder_get_features_name_out(df_vartypes):
@@ -496,7 +580,28 @@ def test_wrap_one_hot_encoder_get_features_name_out(df_vartypes):
         "dob_2020-02-24T00:03:00.000000000",
     ]
 
-    assert ohe_wrap.get_feature_names_out() == expected_features_all
+    # TODO: Remove pandas < 3 support when dropping older pandas versions
+    if pd.__version__ >= "3":
+        # Pandas 3 uses microseconds format
+        actual_features = [
+            f.replace(".000000000", "").replace(".000000", "")
+            for f in ohe_wrap.get_feature_names_out()
+        ]
+        expected_features = [
+            f.replace(".000000000", "").replace(".000000", "")
+            for f in expected_features_all
+        ]
+    else:
+        # Pandas 2 uses nanoseconds format
+        actual_features = [
+            f.replace(".000000000", "").replace(".000000", "")
+            for f in ohe_wrap.get_feature_names_out()
+        ]
+        expected_features = [
+            f.replace(".000000000", "").replace(".000000", "")
+            for f in expected_features_all
+        ]
+    assert actual_features == expected_features
 
 
 @pytest.mark.parametrize(

From 6099193b3c3e209a71b74900057e9ce3a913d16f Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Fri, 6 Feb 2026 16:05:35 -0500
Subject: [PATCH 06/24] remove extra learned parameter, pass function to
 transform

---
 feature_engine/creation/math_features.py | 37 +++++++-----------------
 1 file changed, 10 insertions(+), 27 deletions(-)

diff --git a/feature_engine/creation/math_features.py b/feature_engine/creation/math_features.py
index 5537c876f..5ea0d0e5b 100644
--- a/feature_engine/creation/math_features.py
+++ b/feature_engine/creation/math_features.py
@@ -186,9 +186,9 @@ def __init__(
         self.func = func
         self.new_variables_names = new_variables_names
 
-    def _map_unnamed_func_to_str(self, func: Any) -> Any:
+    def _map_numpy_func_to_str(self, func: Any) -> Any:
         if isinstance(func, list):
-            return [self._map_unnamed_func_to_str(f) for f in func]
+            return [self._map_numpy_func_to_str(f) for f in func]
 
         # We map certain numpy functions to their string alias.
         # This serves two purposes:
@@ -207,23 +207,6 @@ def _map_unnamed_func_to_str(self, func: Any) -> Any:
         }
         return map_dict.get(func, func)
 
-    def fit(self, X: pd.DataFrame, y=None):
-        """
-        This method does not learn any parameters. It just stores the normalized
-        function representation.
-
-        Parameters
-        ----------
-        X: pandas dataframe of shape = [n_samples, n_features]
-            The training input samples.
-
-        y: pandas Series, or np.array. Defaults to None.
-            It is not needed in this transformer. You can pass y or None.
-        """
-        super().fit(X, y)
-        # Normalize func to func_ (sklearn convention: don't modify init params)
-        self.func_ = self._map_unnamed_func_to_str(self.func)
-        return self
 
     def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         """
@@ -241,12 +224,14 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         """
         X = self._check_transform_input_and_state(X)
 
+        func_ = self._map_numpy_func_to_str(self.func)
+
         new_variable_names = self._get_new_features_name()
 
         if len(new_variable_names) == 1:
-            X[new_variable_names[0]] = X[self.variables].agg(self.func_, axis=1)
+            X[new_variable_names[0]] = X[self.variables].agg(func_, axis=1)
         else:
-            X[new_variable_names] = X[self.variables].agg(self.func_, axis=1)
+            X[new_variable_names] = X[self.variables].agg(func_, axis=1)
 
         if self.drop_original:
             X.drop(columns=self.variables, inplace=True)
@@ -262,15 +247,13 @@ def _get_new_features_name(self) -> List:
 
         else:
             varlist = [f"{var}" for var in self.variables_]
+            func_ = self._map_numpy_func_to_str(self.func)
 
-            if isinstance(self.func_, list):
-                functions = [
-                    fun if type(fun) is str else fun.__name__ for fun in self.func_
-                ]
+            if isinstance(func_, list):
                 feature_names = [
-                    f"{function}_{'_'.join(varlist)}" for function in functions
+                    f"{function}_{'_'.join(varlist)}" for function in func_
                 ]
             else:
-                feature_names = [f"{self.func_}_{'_'.join(varlist)}"]
+                feature_names = [f"{func_}_{'_'.join(varlist)}"]
 
         return feature_names

From 8c7b35ee2c839c7c016f7ffdd8f991171d446d1e Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Fri, 6 Feb 2026 16:23:30 -0500
Subject: [PATCH 07/24] rolled back mapping function

---
 feature_engine/creation/math_features.py | 39 +++++-------------------
 1 file changed, 8 insertions(+), 31 deletions(-)

diff --git a/feature_engine/creation/math_features.py b/feature_engine/creation/math_features.py
index 5ea0d0e5b..19bfded91 100644
--- a/feature_engine/creation/math_features.py
+++ b/feature_engine/creation/math_features.py
@@ -1,6 +1,5 @@
 from typing import Any, List, Optional, Union
 
-import numpy as np
 import pandas as pd
 
 from feature_engine._docstrings.fit_attributes import (
@@ -186,28 +185,6 @@ def __init__(
         self.func = func
         self.new_variables_names = new_variables_names
 
-    def _map_numpy_func_to_str(self, func: Any) -> Any:
-        if isinstance(func, list):
-            return [self._map_numpy_func_to_str(f) for f in func]
-
-        # We map certain numpy functions to their string alias.
-        # This serves two purposes:
-        # 1) It avoids a FutureWarning in pandas 2.1+ which recommends
-        # using the string alias for better performance and future-proofing.
-        # 2) It ensures consistent column naming (e.g. "sum_x1_x2")
-        # regardless of how the function was passed (np.sum vs "sum").
-        map_dict = {
-            np.sum: "sum",
-            np.mean: "mean",
-            np.std: "std",
-            np.min: "min",
-            np.max: "max",
-            np.median: "median",
-            np.prod: "prod",
-        }
-        return map_dict.get(func, func)
-
-
     def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         """
         Create and add new variables.
@@ -224,14 +201,12 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         """
         X = self._check_transform_input_and_state(X)
 
-        func_ = self._map_numpy_func_to_str(self.func)
-
         new_variable_names = self._get_new_features_name()
 
         if len(new_variable_names) == 1:
-            X[new_variable_names[0]] = X[self.variables].agg(func_, axis=1)
+            X[new_variable_names[0]] = X[self.variables].agg(self.func, axis=1)
         else:
-            X[new_variable_names] = X[self.variables].agg(func_, axis=1)
+            X[new_variable_names] = X[self.variables].agg(self.func, axis=1)
 
         if self.drop_original:
             X.drop(columns=self.variables, inplace=True)
@@ -247,13 +222,15 @@ def _get_new_features_name(self) -> List:
 
         else:
             varlist = [f"{var}" for var in self.variables_]
-            func_ = self._map_numpy_func_to_str(self.func)
 
-            if isinstance(func_, list):
+            if isinstance(self.func, list):
+                functions = [
+                    fun if type(fun) is str else fun.__name__ for fun in self.func
+                ]
                 feature_names = [
-                    f"{function}_{'_'.join(varlist)}" for function in func_
+                    f"{function}_{'_'.join(varlist)}" for function in functions
                 ]
             else:
-                feature_names = [f"{func_}_{'_'.join(varlist)}"]
+                feature_names = [f"{self.func}_{'_'.join(varlist)}"]
 
         return feature_names

From ff451615fcd8e595ea648c5720245f6031c8f4c6 Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Fri, 6 Feb 2026 17:01:42 -0500
Subject: [PATCH 08/24] refactor creation of array of nan

---
 feature_engine/encoding/similarity_encoder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/feature_engine/encoding/similarity_encoder.py b/feature_engine/encoding/similarity_encoder.py
index 2599d2f91..b4dd91f99 100644
--- a/feature_engine/encoding/similarity_encoder.py
+++ b/feature_engine/encoding/similarity_encoder.py
@@ -328,7 +328,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
             }
             # Ensure map result is always an array of the correct size.
             # Missing values in categories or unknown categories will map to NaN.
-            default_nan = [np.nan] * len(self.encoder_dict_[var])
+            default_nan = np.full(len(self.encoder_dict_[var]), np.nan)
             if "nan" not in column_encoder_dict:
                 column_encoder_dict["nan"] = default_nan
             if "<NA>" not in column_encoder_dict:

From 2a6775d2dd10185b64410b3405e0b7e3cd7124c8 Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Fri, 6 Feb 2026 17:40:31 -0500
Subject: [PATCH 09/24] expand test to cover different expressions of nan
 values

---
 .../test_encoding/test_similarity_encoder.py  | 45 ++++++++++++++-----
 1 file changed, 33 insertions(+), 12 deletions(-)

diff --git a/tests/test_encoding/test_similarity_encoder.py b/tests/test_encoding/test_similarity_encoder.py
index f32ac3823..f31889dfe 100644
--- a/tests/test_encoding/test_similarity_encoder.py
+++ b/tests/test_encoding/test_similarity_encoder.py
@@ -1,5 +1,6 @@
 from difflib import SequenceMatcher
 
+import numpy as np
 import pandas as pd
 import pytest
 
@@ -115,9 +116,14 @@ def test_nan_behaviour_error_fit(df_enc_big_na):
     assert str(record.value) == msg
 
 
-def test_nan_behaviour_error_transform(df_enc_big, df_enc_big_na):
+@pytest.mark.parametrize("nan_value", [np.nan, pd.NA, None])
+def test_nan_behaviour_error_transform(df_enc_big, nan_value):
     encoder = StringSimilarityEncoder(missing_values="raise")
     encoder.fit(df_enc_big)
+
+    df_enc_big_na = df_enc_big.copy()
+    df_enc_big_na.loc[0, "var_A"] = nan_value
+
     with pytest.raises(ValueError) as record:
         encoder.transform(df_enc_big_na)
     msg = (
@@ -128,9 +134,15 @@ def test_nan_behaviour_error_transform(df_enc_big, df_enc_big_na):
     assert str(record.value) == msg
 
 
-def test_nan_behaviour_impute(df_enc_big_na):
+@pytest.mark.parametrize("nan_value", [np.nan, pd.NA, None])
+def test_nan_behaviour_impute(df_enc_big, nan_value):
+
+    df_enc_big_na = df_enc_big.copy()
+    df_enc_big_na.loc[0, "var_A"] = nan_value
+
     encoder = StringSimilarityEncoder(missing_values="impute")
     X = encoder.fit_transform(df_enc_big_na)
+
     assert (X.isna().sum() == 0).all(axis=None)
     assert encoder.encoder_dict_ == {
         "var_A": ["B", "D", "G", "A", "C", "E", "F", ""],
@@ -139,15 +151,27 @@ def test_nan_behaviour_impute(df_enc_big_na):
     }
 
 
-def test_nan_behaviour_ignore(df_enc_big_na):
+@pytest.mark.parametrize("nan_value", [np.nan, pd.NA, None])
+def test_nan_behaviour_ignore(df_enc_big, nan_value):
+    df_enc_big_na = df_enc_big.copy()
+    df_enc_big_na.loc[0, "var_A"] = nan_value
+
     encoder = StringSimilarityEncoder(missing_values="ignore")
     X = encoder.fit_transform(df_enc_big_na)
     assert (X.isna().any(axis=1) == df_enc_big_na.isna().any(axis=1)).all()
-    assert encoder.encoder_dict_ == {
-        "var_A": ["B", "D", "G", "A", "C", "E", "F"],
-        "var_B": ["A", "D", "B", "G", "C", "E", "F"],
-        "var_C": ["C", "D", "B", "G", "A", "E", "F"],
-    }
+    if nan_value is not None:
+        assert encoder.encoder_dict_ == {
+            "var_A": ["B", "D", "G", "A", "C", "E", "F"],
+            "var_B": ["A", "D", "B", "G", "C", "E", "F"],
+            "var_C": ["C", "D", "B", "G", "A", "E", "F"],
+        }
+    else:
+        # Note that None is converted to a string and not treated as nan value
+        assert encoder.encoder_dict_ == {
+            "var_A": ["B", "D", "G", "A", "C", "E", "F", "None"],
+            "var_B": ["A", "D", "B", "G", "C", "E", "F"],
+            "var_C": ["C", "D", "B", "G", "A", "E", "F"],
+        }
 
 
 def test_string_dtype_with_pd_na():
@@ -157,10 +181,7 @@ def test_string_dtype_with_pd_na():
     X = encoder.fit_transform(df)
     assert (X.isna().sum() == 0).all(axis=None)
     # The categories will include "<NA>" or the string version of it
-    assert (
-        "<NA>" in encoder.encoder_dict_["var_A"]
-        or "" in encoder.encoder_dict_["var_A"]
-    )
+    assert "" in encoder.encoder_dict_["var_A"]
 
 
 def test_string_dtype_with_literal_nan_strings():

From 1f526cf471ead71e29eb274a8f7cbff0824ddea4 Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Fri, 6 Feb 2026 17:52:42 -0500
Subject: [PATCH 10/24] refactor match columns update

---
 feature_engine/preprocessing/match_columns.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/feature_engine/preprocessing/match_columns.py b/feature_engine/preprocessing/match_columns.py
index da34f5e9c..41bd70660 100644
--- a/feature_engine/preprocessing/match_columns.py
+++ b/feature_engine/preprocessing/match_columns.py
@@ -262,11 +262,9 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
 
         X = X.drop(_columns_to_drop, axis=1)
 
-        # Add missing columns one at a time to avoid Pandas 3 StringDtype reindex issue
-        for col in _columns_to_add:
-            X[col] = self.fill_value
-
-        # Reorder columns to match training set, without fill_value to avoid issues
+        # Add missing columns first and then reorder to avoid
+        # Pandas 3 StringDtype reindex issue (before we used reindex)
+        X[_columns_to_add] = self.fill_value
         X = X[self.feature_names_in_]
 
         if self.match_dtypes:

From 9368b17ae257adf3541c581e6dd54bd669d91d50 Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Fri, 6 Feb 2026 17:57:04 -0500
Subject: [PATCH 11/24] refactor code variable checks

---
 feature_engine/variable_handling/_variable_type_checks.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/feature_engine/variable_handling/_variable_type_checks.py b/feature_engine/variable_handling/_variable_type_checks.py
index 3427c60be..17eb4e41d 100644
--- a/feature_engine/variable_handling/_variable_type_checks.py
+++ b/feature_engine/variable_handling/_variable_type_checks.py
@@ -9,7 +9,6 @@ def is_object(s) -> bool:
 
 
 def _is_categorical_and_is_not_datetime(column: pd.Series) -> bool:
-    is_cat = False
     # check for datetime only if the type of the categories is not numeric
     # because pd.to_datetime throws an error when it is an integer
     if isinstance(column.dtype, pd.CategoricalDtype):
@@ -20,6 +19,9 @@ def _is_categorical_and_is_not_datetime(column: pd.Series) -> bool:
     elif is_object(column):
         is_cat = _is_convertible_to_num(column) or not _is_convertible_to_dt(column)
 
+    else:
+        is_cat = False
+
     return is_cat
 
 
@@ -44,7 +46,6 @@ def _is_convertible_to_num(column: pd.Series) -> bool:
 
 
 def _is_categorical_and_is_datetime(column: pd.Series) -> bool:
-    is_dt = False
     # check for datetime only if the type of the categories is not numeric
     # because pd.to_datetime throws an error when it is an integer
     if isinstance(column.dtype, pd.CategoricalDtype):
@@ -55,4 +56,7 @@ def _is_categorical_and_is_datetime(column: pd.Series) -> bool:
     elif is_object(column):
         is_dt = not _is_convertible_to_num(column) and _is_convertible_to_dt(column)
 
+    else:
+        is_dt = False
+
     return is_dt

From 564a84f3e4c36b37c6f936f40fdea0a165e29a8d Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Fri, 6 Feb 2026 18:13:17 -0500
Subject: [PATCH 12/24] refactor inf tests

---
 tests/test_dataframe_checks.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/tests/test_dataframe_checks.py b/tests/test_dataframe_checks.py
index 09cd22ccf..3b43c0b5d 100644
--- a/tests/test_dataframe_checks.py
+++ b/tests/test_dataframe_checks.py
@@ -248,15 +248,19 @@ def test_optional_contains_na(df_na):
     assert str(record.value) == msg
 
 
-def test_contains_inf(df_na):
-    # Test numeric column with inf
-    df_num_inf = pd.DataFrame({"A": [1.1, np.inf, 3.3]})
-    with pytest.raises(ValueError):
-        _check_contains_inf(df_num_inf, ["A"])
+def test_contains_inf_raises_on_inf():
+    msg = (
+        "Some of the variables to transform contain inf values. Check and "
+        "remove those before using this transformer."
+    )
+    df = pd.DataFrame({"A": [1.1, np.inf, 3.3]})
+    with pytest.raises(ValueError, match=msg):
+        _check_contains_inf(df, ["A"])
+
 
-    # Test numeric column WITHOUT inf
-    df_num_no_inf = pd.DataFrame({"A": [1.1, 2.2, 3.3]})
-    _check_contains_inf(df_num_no_inf, ["A"])
+def test_contains_inf_passes_without_inf():
+    df = pd.DataFrame({"A": [1.1, 2.2, 3.3]})
+    assert _check_contains_inf(df, ["A"]) is None
 
 
 def test_check_X_raises_error_on_duplicated_column_names():

From 25414101baf10d7548c561df5f503e6d3d9f765f Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Fri, 6 Feb 2026 18:31:30 -0500
Subject: [PATCH 13/24] split test by pandas version

---
 .../test_encoding/test_similarity_encoder.py  | 22 +++++++++++++------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/tests/test_encoding/test_similarity_encoder.py b/tests/test_encoding/test_similarity_encoder.py
index f31889dfe..672efe0b7 100644
--- a/tests/test_encoding/test_similarity_encoder.py
+++ b/tests/test_encoding/test_similarity_encoder.py
@@ -159,19 +159,27 @@ def test_nan_behaviour_ignore(df_enc_big, nan_value):
     encoder = StringSimilarityEncoder(missing_values="ignore")
     X = encoder.fit_transform(df_enc_big_na)
     assert (X.isna().any(axis=1) == df_enc_big_na.isna().any(axis=1)).all()
-    if nan_value is not None:
+    # TODO: Remove pandas < 3 support when dropping older pandas versions
+    if pd.__version__ >= "3":
         assert encoder.encoder_dict_ == {
             "var_A": ["B", "D", "G", "A", "C", "E", "F"],
             "var_B": ["A", "D", "B", "G", "C", "E", "F"],
             "var_C": ["C", "D", "B", "G", "A", "E", "F"],
         }
     else:
-        # Note that None is converted to a string and not treated as nan value
-        assert encoder.encoder_dict_ == {
-            "var_A": ["B", "D", "G", "A", "C", "E", "F", "None"],
-            "var_B": ["A", "D", "B", "G", "C", "E", "F"],
-            "var_C": ["C", "D", "B", "G", "A", "E", "F"],
-        }
+        if nan_value is not None:
+            assert encoder.encoder_dict_ == {
+                "var_A": ["B", "D", "G", "A", "C", "E", "F"],
+                "var_B": ["A", "D", "B", "G", "C", "E", "F"],
+                "var_C": ["C", "D", "B", "G", "A", "E", "F"],
+            }
+        else:
+            # Note that None is converted to a string and not treated as nan value
+            assert encoder.encoder_dict_ == {
+                "var_A": ["B", "D", "G", "A", "C", "E", "F", "None"],
+                "var_B": ["A", "D", "B", "G", "C", "E", "F"],
+                "var_C": ["C", "D", "B", "G", "A", "E", "F"],
+            }
 
 
 def test_string_dtype_with_pd_na():

From b57a60060354447f3ac69bdd379e99dbf11a4202 Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Fri, 6 Feb 2026 18:41:24 -0500
Subject: [PATCH 14/24] solve additional errors specific to pandas 3

---
 tests/test_creation/test_math_features.py      | 7 +++++++
 tests/test_encoding/test_similarity_encoder.py | 5 +++++
 2 files changed, 12 insertions(+)

diff --git a/tests/test_creation/test_math_features.py b/tests/test_creation/test_math_features.py
index 6a5590019..6e16821be 100644
--- a/tests/test_creation/test_math_features.py
+++ b/tests/test_creation/test_math_features.py
@@ -136,6 +136,13 @@ def test_aggregations_with_functions(df_vartypes):
         }
     )
 
+    # TODO: Remove pandas < 3 support when dropping older pandas versions
+    # In pandas >=3, when the user passes np.std, agg will use numpy.
+    # In pandas <3, when the user passes np.std, agg will use pd.std.
+    # Hence the difference in results
+    if pd.__version__ >= "3":
+        ref["std_Age_Marks"] = np.std(df_vartypes[["Age", "Marks"]], axis=1)
+
     # transform params
     pd.testing.assert_frame_equal(X, ref)
 
diff --git a/tests/test_encoding/test_similarity_encoder.py b/tests/test_encoding/test_similarity_encoder.py
index 672efe0b7..49f809038 100644
--- a/tests/test_encoding/test_similarity_encoder.py
+++ b/tests/test_encoding/test_similarity_encoder.py
@@ -174,6 +174,11 @@ def test_nan_behaviour_ignore(df_enc_big, nan_value):
                 "var_C": ["C", "D", "B", "G", "A", "E", "F"],
             }
         else:
+            assert encoder.encoder_dict_ == {
+                "var_A": ["B", "D", "G", "A", "C", "E", "F"],
+                "var_B": ["A", "D", "B", "G", "C", "E", "F"],
+                "var_C": ["C", "D", "B", "G", "A", "E", "F"],
+            }
             # Note that None is converted to a string and not treated as nan value
             assert encoder.encoder_dict_ == {
                 "var_A": ["B", "D", "G", "A", "C", "E", "F", "None"],

From 75fde70e5354af5fa4ab76b0dbc2ec5a12c82785 Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Fri, 6 Feb 2026 20:06:56 -0500
Subject: [PATCH 15/24] add pandas version tests to tox.ini

---
 tox.ini | 87 ++++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 74 insertions(+), 13 deletions(-)

diff --git a/tox.ini b/tox.ini
index e55e03a47..096cf88f4 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,16 +1,36 @@
 [tox]
-envlist = py39, py310, py311-sklearn150, py311-sklearn160, py311-sklearn170, py312, py313, codecov, docs, stylechecks, typechecks
+envlist =
+    py39
+    py310
+    py311-sklearn150
+    py311-sklearn160
+    py311-sklearn170
+    py311-pandas230
+    py312
+    py313
+    codecov
+    docs
+    stylechecks
+    typechecks
 skipsdist = true
 
+
 [testenv]
-install_command = pip install {opts} {packages}
 envdir = {toxworkdir}/unit_tests
+install_command = pip install {opts} {packages}
+
 setenv =
-    PYTHONPATH=.
+    PYTHONPATH = .
     COVERAGE_RCFILE = {envtmpdir}/coveragerc
+
 commands =
     pytest tests
 
+
+# -------------------------
+# Python versions
+# -------------------------
+
 [testenv:py39]
 deps =
     .[tests]
@@ -19,6 +39,19 @@ deps =
 deps =
     .[tests]
 
+[testenv:py312]
+deps =
+    .[tests]
+
+[testenv:py313]
+deps =
+    .[tests]
+
+
+# -------------------------
+# scikit-learn matrix
+# -------------------------
+
 [testenv:py311-sklearn150]
 deps =
     .[tests]
@@ -34,45 +67,73 @@ deps =
     .[tests]
     scikit-learn==1.7.1
 
-[testenv:py312]
-deps =
-    .[tests]
 
-[testenv:py313]
+[testenv:py311-pandas230]
 deps =
     .[tests]
+    pandas==2.3.0
+
+
+# -------------------------
+# Coverage
+# -------------------------
 
 [testenv:codecov]
 deps =
     .[tests]
+
 commands_pre =
     {envpython} -c 'from pathlib import Path; Path(r"{env:COVERAGE_RCFILE}").write_text(Path(".coveragerc").read_text())'
+
 commands =
     coverage run -m pytest -v
     coverage report
 
+
+# -------------------------
+# Docs
+# -------------------------
+
 [testenv:docs]
 deps =
     .[docs]
+
 commands =
     sphinx-build -W -b html -d {envtmpdir}/doctrees docs {envtmpdir}/html
 
+
+# -------------------------
+# Linting & typing
+# -------------------------
+
 [testenv:stylechecks]
 deps =
     flake8
-commands = {posargs:flake8 feature_engine tests}
+
+commands =
+    {posargs:flake8 feature_engine tests}
 
 [testenv:typechecks]
 deps =
-     mypy
-commands = {posargs:mypy feature_engine}
+    mypy
+
+commands =
+    {posargs:mypy feature_engine}
+
+
+# -------------------------
+# flake8 configuration
+# -------------------------
 
 [flake8]
-exclude = .git, env
-# match black code formatter
+exclude =
+    .git
+    env
+
+# Match Black
 max-line-length = 88
 
 profile = black
 line_length = 88
 lines_between_sections = 1
-known_first_party = "sentry"
\ No newline at end of file
+known_first_party = sentry

From 3488426b60572b0a543bcc304bd8405def7e93ad Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Fri, 6 Feb 2026 20:10:37 -0500
Subject: [PATCH 16/24] move versioned tests to python 12

---
 tox.ini | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tox.ini b/tox.ini
index 096cf88f4..b09a57c6c 100644
--- a/tox.ini
+++ b/tox.ini
@@ -5,8 +5,8 @@ envlist =
     py311-sklearn150
     py311-sklearn160
     py311-sklearn170
-    py311-pandas230
-    py312
+    py312-pandas230
+    py312-pandas300
     py313
     codecov
     docs
@@ -39,10 +39,6 @@ deps =
 deps =
     .[tests]
 
-[testenv:py312]
-deps =
-    .[tests]
-
 [testenv:py313]
 deps =
     .[tests]
@@ -68,11 +64,16 @@ deps =
     scikit-learn==1.7.1
 
 
-[testenv:py311-pandas230]
+[testenv:py312-pandas230]
 deps =
     .[tests]
     pandas==2.3.0
 
+[testenv:py312-pandas300]
+deps =
+    .[tests]
+    pandas==3.0.0
+
 
 # -------------------------
 # Coverage

From 8758cdf3991fe43ff8249421d7b184f7f6ee7262 Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Fri, 6 Feb 2026 20:14:51 -0500
Subject: [PATCH 17/24] add tests to circleci config

---
 .circleci/config.yml | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index cbc578a99..11335e92d 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -90,7 +90,7 @@ jobs:
           command: |
             tox -e py311-sklearn170
 
-  test_feature_engine_py312:
+  test_feature_engine_py312_pandas230:
     docker:
       - image: cimg/python:3.12.1
     working_directory: ~/project
@@ -101,7 +101,20 @@ jobs:
       - run:
           name: Run tests with Python 3.12
           command: |
-            tox -e py312
+            tox -e py312-pandas230
+
+  test_feature_engine_py312_pandas300:
+    docker:
+      - image: cimg/python:3.12.1
+    working_directory: ~/project
+    steps:
+      - checkout:
+          path: ~/project
+      - *prepare_tox
+      - run:
+          name: Run tests with Python 3.12
+          command: |
+            tox -e py312-pandas300
 
   test_feature_engine_py313:
     docker:
@@ -197,7 +210,8 @@ workflows:
       - test_feature_engine_py311_sklearn150
       - test_feature_engine_py311_sklearn160
       - test_feature_engine_py311_sklearn170
-      - test_feature_engine_py312
+      - test_feature_engine_py312_pandas230
+      - test_feature_engine_py312_pandas300
       - test_feature_engine_py313
       - test_style
       - test_docs
@@ -214,7 +228,8 @@ workflows:
             - test_feature_engine_py311_sklearn150
             - test_feature_engine_py311_sklearn160
             - test_feature_engine_py311_sklearn170
-            - test_feature_engine_py312
+            - test_feature_engine_py312_pandas230
+            - test_feature_engine_py312_pandas300
             - test_feature_engine_py313
             - test_style
             - test_docs

From a7ab1a1309de0efbab49a01e6d468dca28221ed4 Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Fri, 6 Feb 2026 20:23:31 -0500
Subject: [PATCH 18/24] reformat circleci config"

---
 .circleci/config.yml | 67 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 53 insertions(+), 14 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 11335e92d..4df23b66b 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,7 +1,13 @@
 version: 2.1
+
 orbs:
   codecov: codecov/codecov@3.2.3
 
+
+# --------------------------------------------------
+# Anchors & defaults
+# --------------------------------------------------
+
 defaults: &defaults
   docker:
     - image: cimg/python:3.10.0
@@ -17,14 +23,24 @@ prepare_tox: &prepare_tox
 
 init_pypirc: &init_pypirc
   run:
-    name: init .pypirc
+    name: Init .pypirc
     command: |
       echo -e "[pypi]" >> ~/.pypirc
       echo -e "repository = $FE_PYPI_URL" >> ~/.pypirc
       echo -e "username = $FE_PYPI_USER" >> ~/.pypirc
       echo -e "password = $FE_PYPI_API_KEY" >> ~/.pypirc
 
+
+# --------------------------------------------------
+# Jobs
+# --------------------------------------------------
+
 jobs:
+
+  # ------------------------
+  # Test matrix
+  # ------------------------
+
   test_feature_engine_py39:
     docker:
       - image: cimg/python:3.9.0
@@ -34,7 +50,7 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run tests with Python 3.9
+          name: Run tests (Python 3.9)
           command: |
             tox -e py39
 
@@ -47,7 +63,7 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run tests with Python 3.10
+          name: Run tests (Python 3.10)
           command: |
             tox -e py310
 
@@ -60,7 +76,7 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run tests with Python 3.11 and scikit-learn 1.5.0
+          name: Run tests (Python 3.11, scikit-learn 1.5)
           command: |
             tox -e py311-sklearn150
 
@@ -73,7 +89,7 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run tests with Python 3.11 and scikit-learn 1.6.0
+          name: Run tests (Python 3.11, scikit-learn 1.6)
           command: |
             tox -e py311-sklearn160
 
@@ -86,7 +102,7 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run tests with Python 3.11 and scikit-learn 1.7.0
+          name: Run tests (Python 3.11, scikit-learn 1.7)
           command: |
             tox -e py311-sklearn170
 
@@ -99,7 +115,7 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run tests with Python 3.12
+          name: Run tests (Python 3.12, pandas 2.3)
           command: |
             tox -e py312-pandas230
 
@@ -112,7 +128,7 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run tests with Python 3.12
+          name: Run tests (Python 3.12, pandas 3.0)
           command: |
             tox -e py312-pandas300
 
@@ -125,10 +141,15 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run tests with Python 3.13
+          name: Run tests (Python 3.13)
           command: |
             tox -e py313
 
+
+  # ------------------------
+  # Quality checks
+  # ------------------------
+
   test_style:
     docker:
       - image: cimg/python:3.10.0
@@ -138,7 +159,7 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run stylechecks
+          name: Run style checks
           command: |
             tox -e stylechecks
 
@@ -151,7 +172,7 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run doc build
+          name: Build documentation
           command: |
             tox -e docs
 
@@ -164,10 +185,15 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run typechecks
+          name: Run type checks
           command: |
             tox -e typechecks
 
+
+  # ------------------------
+  # Coverage
+  # ------------------------
+
   upload_codecov:
     docker:
       - image: cimg/python:3.10.0
@@ -184,13 +210,18 @@ jobs:
             coverage report
       - codecov/upload
 
+
+  # ------------------------
+  # Release
+  # ------------------------
+
   package_and_upload_to_pypi:
     <<: *defaults
     steps:
       - checkout
       - *init_pypirc
       - run:
-          name: upload to pypi
+          name: Build and upload package
           command: |
             python -m venv env
             source env/bin/activate
@@ -201,8 +232,14 @@ jobs:
             ls -l dist
             twine upload dist/*
 
+
+# --------------------------------------------------
+# Workflows
+# --------------------------------------------------
+
 workflows:
   version: 2
+
   test-all:
     jobs:
       - test_feature_engine_py39
@@ -216,11 +253,13 @@ workflows:
       - test_style
       - test_docs
       - test_type
+
       - upload_codecov:
           filters:
             branches:
               ignore:
                 - 1.9.X
+
       - package_and_upload_to_pypi:
           requires:
             - test_feature_engine_py39
@@ -237,4 +276,4 @@ workflows:
           filters:
             branches:
               only:
-                - 1.9.X
\ No newline at end of file
+                - 1.9.X

From c272f86fc82e348da18e0d834fb5ab0ddf34ee57 Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Fri, 6 Feb 2026 20:24:02 -0500
Subject: [PATCH 19/24] test revering pandas version on None

---
 .../test_encoding/test_similarity_encoder.py  | 24 +++++--------------
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/tests/test_encoding/test_similarity_encoder.py b/tests/test_encoding/test_similarity_encoder.py
index 49f809038..b33ef4f00 100644
--- a/tests/test_encoding/test_similarity_encoder.py
+++ b/tests/test_encoding/test_similarity_encoder.py
@@ -167,24 +167,12 @@ def test_nan_behaviour_ignore(df_enc_big, nan_value):
             "var_C": ["C", "D", "B", "G", "A", "E", "F"],
         }
     else:
-        if nan_value is not None:
-            assert encoder.encoder_dict_ == {
-                "var_A": ["B", "D", "G", "A", "C", "E", "F"],
-                "var_B": ["A", "D", "B", "G", "C", "E", "F"],
-                "var_C": ["C", "D", "B", "G", "A", "E", "F"],
-            }
-        else:
-            assert encoder.encoder_dict_ == {
-                "var_A": ["B", "D", "G", "A", "C", "E", "F"],
-                "var_B": ["A", "D", "B", "G", "C", "E", "F"],
-                "var_C": ["C", "D", "B", "G", "A", "E", "F"],
-            }
-            # Note that None is converted to a string and not treated as nan value
-            assert encoder.encoder_dict_ == {
-                "var_A": ["B", "D", "G", "A", "C", "E", "F", "None"],
-                "var_B": ["A", "D", "B", "G", "C", "E", "F"],
-                "var_C": ["C", "D", "B", "G", "A", "E", "F"],
-            }
+
+        assert encoder.encoder_dict_ == {
+            "var_A": ["B", "D", "G", "A", "C", "E", "F"],
+            "var_B": ["A", "D", "B", "G", "C", "E", "F"],
+            "var_C": ["C", "D", "B", "G", "A", "E", "F"],
+        }
 
 
 def test_string_dtype_with_pd_na():

From 2c0c51c7bc9813cdd9d18ba03014be47393f8668 Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Fri, 6 Feb 2026 20:42:11 -0500
Subject: [PATCH 20/24] move dropna a level up when missing values ignore in
 string similarity

---
 feature_engine/encoding/similarity_encoder.py |  2 +-
 .../test_encoding/test_similarity_encoder.py  | 19 +++++--------------
 2 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/feature_engine/encoding/similarity_encoder.py b/feature_engine/encoding/similarity_encoder.py
index b4dd91f99..b6aa1b249 100644
--- a/feature_engine/encoding/similarity_encoder.py
+++ b/feature_engine/encoding/similarity_encoder.py
@@ -275,9 +275,9 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
             for var in cols_to_iterate:
                 self.encoder_dict_[var] = (
                     X[var]
+                    .dropna()
                     .astype(str)
                     .value_counts(dropna=True)
-                    .drop(["nan", "<NA>"], errors="ignore")
                     .head(self.top_categories)
                     .index.tolist()
                 )
diff --git a/tests/test_encoding/test_similarity_encoder.py b/tests/test_encoding/test_similarity_encoder.py
index b33ef4f00..09c17443b 100644
--- a/tests/test_encoding/test_similarity_encoder.py
+++ b/tests/test_encoding/test_similarity_encoder.py
@@ -159,20 +159,11 @@ def test_nan_behaviour_ignore(df_enc_big, nan_value):
     encoder = StringSimilarityEncoder(missing_values="ignore")
     X = encoder.fit_transform(df_enc_big_na)
     assert (X.isna().any(axis=1) == df_enc_big_na.isna().any(axis=1)).all()
-    # TODO: Remove pandas < 3 support when dropping older pandas versions
-    if pd.__version__ >= "3":
-        assert encoder.encoder_dict_ == {
-            "var_A": ["B", "D", "G", "A", "C", "E", "F"],
-            "var_B": ["A", "D", "B", "G", "C", "E", "F"],
-            "var_C": ["C", "D", "B", "G", "A", "E", "F"],
-        }
-    else:
-
-        assert encoder.encoder_dict_ == {
-            "var_A": ["B", "D", "G", "A", "C", "E", "F"],
-            "var_B": ["A", "D", "B", "G", "C", "E", "F"],
-            "var_C": ["C", "D", "B", "G", "A", "E", "F"],
-        }
+    assert encoder.encoder_dict_ == {
+        "var_A": ["B", "D", "G", "A", "C", "E", "F"],
+        "var_B": ["A", "D", "B", "G", "C", "E", "F"],
+        "var_C": ["C", "D", "B", "G", "A", "E", "F"],
+    }
 
 
 def test_string_dtype_with_pd_na():

From 1f8e4e64066dbe24efc03c057a9af715a2ee9eba Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Sat, 7 Feb 2026 06:48:36 -0500
Subject: [PATCH 21/24] refactor condition logic for codecoverage

---
 feature_engine/creation/math_features.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/feature_engine/creation/math_features.py b/feature_engine/creation/math_features.py
index 19bfded91..368ac6a73 100644
--- a/feature_engine/creation/math_features.py
+++ b/feature_engine/creation/math_features.py
@@ -167,13 +167,12 @@ def __init__(
             )
 
         if new_variables_names is not None:
-            if isinstance(func, list):
-                if len(new_variables_names) != len(func):
-                    raise ValueError(
-                        "The number of new feature names must coincide with the number "
-                        "of functions."
-                    )
-            elif len(new_variables_names) != 1:
+            if isinstance(new_variables_names, list):
+                expected = len(func)
+            else:
+                expected = 1
+
+            if len(new_variables_names) != expected:
                 raise ValueError(
                     "The number of new feature names must coincide with the number "
                     "of functions."

From 9406e1b04cdcd80ef4ef7d700d23fb6c0829e184 Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Sat, 7 Feb 2026 07:15:24 -0500
Subject: [PATCH 22/24] fix decreased coverage

---
 .../variable_handling/find_variables.py           | 15 +++++++--------
 tests/test_creation/test_math_features.py         |  6 ++++++
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/feature_engine/variable_handling/find_variables.py b/feature_engine/variable_handling/find_variables.py
index 72e17d9ef..6162fdb43 100644
--- a/feature_engine/variable_handling/find_variables.py
+++ b/feature_engine/variable_handling/find_variables.py
@@ -253,14 +253,13 @@ def find_categorical_and_numerical_variables(
     # If user leaves default None parameter.
     elif variables is None:
         # find categorical variables
-        if variables is None:
-            variables_cat = [
-                column
-                for column in X.select_dtypes(
-                    include=["O", "category", "string"]
-                ).columns
-                if _is_categorical_and_is_not_datetime(X[column])
-            ]
+        variables_cat = [
+            column
+            for column in X.select_dtypes(
+                include=["O", "category", "string"]
+            ).columns
+            if _is_categorical_and_is_not_datetime(X[column])
+        ]
         # find numerical variables in dataset
         variables_num = list(X.select_dtypes(include="number").columns)
 
diff --git a/tests/test_creation/test_math_features.py b/tests/test_creation/test_math_features.py
index 6e16821be..332fcfea8 100644
--- a/tests/test_creation/test_math_features.py
+++ b/tests/test_creation/test_math_features.py
@@ -79,6 +79,12 @@ def test_error_new_variable_names_not_permitted():
             func=["sum", "mean"],
             new_variables_names=["sum_of_two_vars", "sum_of_two_vars"],
         )
+    with pytest.raises(ValueError):
+        MathFeatures(
+            variables=variables,
+            func=["sum", "mean"],
+            new_variables_names="sum_of_two_vars"
+        )
 
 
 def test_aggregations_with_strings(df_vartypes):

From c516db2de308988c2df357a08b1db6eb9e5676b1 Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Sat, 7 Feb 2026 07:36:51 -0500
Subject: [PATCH 23/24] revert math features to main

---
 feature_engine/creation/math_features.py | 40 +++++++++++++-----------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/feature_engine/creation/math_features.py b/feature_engine/creation/math_features.py
index 368ac6a73..35cbe73aa 100644
--- a/feature_engine/creation/math_features.py
+++ b/feature_engine/creation/math_features.py
@@ -140,6 +140,7 @@ def __init__(
         missing_values: str = "raise",
         drop_original: bool = False,
     ) -> None:
+
         if (
             not isinstance(variables, list)
             or not all(isinstance(var, (int, str)) for var in variables)
@@ -156,28 +157,31 @@ def __init__(
                 "func does not work with dictionaries in this transformer."
             )
 
-        if new_variables_names is not None and (
-            not isinstance(new_variables_names, list)
-            or not all(isinstance(var, str) for var in new_variables_names)
-            or len(set(new_variables_names)) != len(new_variables_names)
-        ):
-            raise ValueError(
-                "new_variable_names should be None or a list of unique strings. "
-                f"Got {new_variables_names} instead."
-            )
-
         if new_variables_names is not None:
-            if isinstance(new_variables_names, list):
-                expected = len(func)
-            else:
-                expected = 1
-
-            if len(new_variables_names) != expected:
+            if (
+                not isinstance(new_variables_names, list)
+                or not all(isinstance(var, str) for var in new_variables_names)
+                or len(set(new_variables_names)) != len(new_variables_names)
+            ):
                 raise ValueError(
-                    "The number of new feature names must coincide with the number "
-                    "of functions."
+                    "new_variable_names should be None or a list of unique strings. "
+                    f"Got {new_variables_names} instead."
                 )
 
+        if new_variables_names is not None:
+            if isinstance(func, list):
+                if len(new_variables_names) != len(func):
+                    raise ValueError(
+                        "The number of new feature names must coincide with the number "
+                        "of functions."
+                    )
+            else:
+                if len(new_variables_names) != 1:
+                    raise ValueError(
+                        "The number of new feature names must coincide with the number "
+                        "of functions."
+                    )
+
         super().__init__(missing_values, drop_original)
 
         self.variables = variables

From 5e6338c709b9390bff561b54d159b3d39c2d441a Mon Sep 17 00:00:00 2001
From: solegalli <solegalli@protonmail.com>
Date: Sat, 7 Feb 2026 07:38:23 -0500
Subject: [PATCH 24/24] remove test from math features

---
 tests/test_creation/test_math_features.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/test_creation/test_math_features.py b/tests/test_creation/test_math_features.py
index 332fcfea8..6e16821be 100644
--- a/tests/test_creation/test_math_features.py
+++ b/tests/test_creation/test_math_features.py
@@ -79,12 +79,6 @@ def test_error_new_variable_names_not_permitted():
             func=["sum", "mean"],
             new_variables_names=["sum_of_two_vars", "sum_of_two_vars"],
         )
-    with pytest.raises(ValueError):
-        MathFeatures(
-            variables=variables,
-            func=["sum", "mean"],
-            new_variables_names="sum_of_two_vars"
-        )
 
 
 def test_aggregations_with_strings(df_vartypes):