feature-engine · solegalli · Feb 7, 2026 · Jan 28, 2026 · Jan 28, 2026 · Jan 28, 2026
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -1,7 +1,13 @@
 version: 2.1
+
 orbs:
   codecov: codecov/codecov@3.2.3
 
+
+# --------------------------------------------------
+# Anchors & defaults
+# --------------------------------------------------
+
 defaults: &defaults
   docker:
     - image: cimg/python:3.10.0
@@ -17,14 +23,24 @@ prepare_tox: &prepare_tox
 
 init_pypirc: &init_pypirc
   run:
-    name: init .pypirc
+    name: Init .pypirc
     command: |
       echo -e "[pypi]" >> ~/.pypirc
       echo -e "repository = $FE_PYPI_URL" >> ~/.pypirc
       echo -e "username = $FE_PYPI_USER" >> ~/.pypirc
       echo -e "password = $FE_PYPI_API_KEY" >> ~/.pypirc
 
+
+# --------------------------------------------------
+# Jobs
+# --------------------------------------------------
+
 jobs:
+
+  # ------------------------
+  # Test matrix
+  # ------------------------
+
   test_feature_engine_py39:
     docker:
       - image: cimg/python:3.9.0
@@ -34,7 +50,7 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run tests with Python 3.9
+          name: Run tests (Python 3.9)
           command: |
             tox -e py39
 
@@ -47,7 +63,7 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run tests with Python 3.10
+          name: Run tests (Python 3.10)
           command: |
             tox -e py310
 
@@ -60,7 +76,7 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run tests with Python 3.11 and scikit-learn 1.5.0
+          name: Run tests (Python 3.11, scikit-learn 1.5)
           command: |
             tox -e py311-sklearn150
 
@@ -73,7 +89,7 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run tests with Python 3.11 and scikit-learn 1.6.0
+          name: Run tests (Python 3.11, scikit-learn 1.6)
           command: |
             tox -e py311-sklearn160
 
@@ -86,11 +102,11 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run tests with Python 3.11 and scikit-learn 1.7.0
+          name: Run tests (Python 3.11, scikit-learn 1.7)
           command: |
             tox -e py311-sklearn170
 
-  test_feature_engine_py312:
+  test_feature_engine_py312_pandas230:
     docker:
       - image: cimg/python:3.12.1
     working_directory: ~/project
@@ -99,9 +115,22 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run tests with Python 3.12
+          name: Run tests (Python 3.12, pandas 2.3)
           command: |
-            tox -e py312
+            tox -e py312-pandas230
+
+  test_feature_engine_py312_pandas300:
+    docker:
+      - image: cimg/python:3.12.1
+    working_directory: ~/project
+    steps:
+      - checkout:
+          path: ~/project
+      - *prepare_tox
+      - run:
+          name: Run tests (Python 3.12, pandas 3.0)
+          command: |
+            tox -e py312-pandas300
 
   test_feature_engine_py313:
     docker:
@@ -112,10 +141,15 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run tests with Python 3.13
+          name: Run tests (Python 3.13)
           command: |
             tox -e py313
 
+
+  # ------------------------
+  # Quality checks
+  # ------------------------
+
   test_style:
     docker:
       - image: cimg/python:3.10.0
@@ -125,7 +159,7 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run stylechecks
+          name: Run style checks
           command: |
             tox -e stylechecks
 
@@ -138,7 +172,7 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run doc build
+          name: Build documentation
           command: |
             tox -e docs
 
@@ -151,10 +185,15 @@ jobs:
           path: ~/project
       - *prepare_tox
       - run:
-          name: Run typechecks
+          name: Run type checks
           command: |
             tox -e typechecks
 
+
+  # ------------------------
+  # Coverage
+  # ------------------------
+
   upload_codecov:
     docker:
       - image: cimg/python:3.10.0
@@ -171,13 +210,18 @@ jobs:
             coverage report
       - codecov/upload
 
+
+  # ------------------------
+  # Release
+  # ------------------------
+
   package_and_upload_to_pypi:
     <<: *defaults
     steps:
       - checkout
       - *init_pypirc
       - run:
-          name: upload to pypi
+          name: Build and upload package
           command: |
             python -m venv env
             source env/bin/activate
@@ -188,38 +232,48 @@ jobs:
             ls -l dist
             twine upload dist/*
 
+
+# --------------------------------------------------
+# Workflows
+# --------------------------------------------------
+
 workflows:
   version: 2
+
   test-all:
     jobs:
       - test_feature_engine_py39
       - test_feature_engine_py310
       - test_feature_engine_py311_sklearn150
       - test_feature_engine_py311_sklearn160
       - test_feature_engine_py311_sklearn170
-      - test_feature_engine_py312
+      - test_feature_engine_py312_pandas230
+      - test_feature_engine_py312_pandas300
       - test_feature_engine_py313
       - test_style
       - test_docs
       - test_type
+
       - upload_codecov:
           filters:
             branches:
               ignore:
                 - 1.9.X
+
       - package_and_upload_to_pypi:
           requires:
             - test_feature_engine_py39
             - test_feature_engine_py310
             - test_feature_engine_py311_sklearn150
             - test_feature_engine_py311_sklearn160
             - test_feature_engine_py311_sklearn170
-            - test_feature_engine_py312
+            - test_feature_engine_py312_pandas230
+            - test_feature_engine_py312_pandas300
             - test_feature_engine_py313
             - test_style
             - test_docs
             - test_type
           filters:
             branches:
               only:
-                - 1.9.X
+                - 1.9.X
diff --git a/feature_engine/dataframe_checks.py b/feature_engine/dataframe_checks.py
@@ -9,6 +9,8 @@
 from scipy.sparse import issparse
 from sklearn.utils.validation import _check_y, check_consistent_length, column_or_1d
 
+from feature_engine.variable_handling._variable_type_checks import is_object
+
 
 def check_X(X: Union[np.generic, np.ndarray, pd.DataFrame]) -> pd.DataFrame:
     """
@@ -121,10 +123,10 @@ def check_y(
     elif isinstance(y, pd.Series):
         if y.isnull().any():
             raise ValueError("y contains NaN values.")
-        if y.dtype != "O" and not np.isfinite(y).all():
+        if not is_object(y) and not np.isfinite(y).all():
             raise ValueError("y contains infinity values.")
-        if y_numeric and y.dtype == "O":
-            y = y.astype("float")
+        if y_numeric and is_object(y):
+            y = y.astype("float64")
         y = y.copy()
 
     elif isinstance(y, pd.DataFrame):

diff --git a/feature_engine/encoding/similarity_encoder.py b/feature_engine/encoding/similarity_encoder.py
@@ -232,12 +232,13 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
         X = check_X(X)
         variables_ = self._check_or_select_variables(X)
 
-        if self.keywords:
-            if not all(item in variables_ for item in self.keywords.keys()):
-                raise ValueError(
-                    "There are variables in keywords that are not present "
-                    "in the dataset."
-                )
+        if self.keywords and not all(
+            item in variables_ for item in self.keywords.keys()
+        ):
+            raise ValueError(
+                "There are variables in keywords that are not present "
+                "in the dataset."
+            )
 
         # if data contains nan, fail before running any logic
         if self.missing_values == "raise":
@@ -262,10 +263,10 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
                 )
         elif self.missing_values == "impute":
             for var in cols_to_iterate:
+                series = X[var]
                 self.encoder_dict_[var] = (
-                    X[var]
-                    .astype(str)
-                    .replace("nan", "")
+                    series.astype(str)
+                    .mask(series.isna(), "")
                     .value_counts()
                     .head(self.top_categories)
                     .index.tolist()
@@ -274,9 +275,9 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
             for var in cols_to_iterate:
                 self.encoder_dict_[var] = (
                     X[var]
+                    .dropna()
                     .astype(str)
                     .value_counts(dropna=True)
-                    .drop("nan", errors="ignore")
                     .head(self.top_categories)
                     .index.tolist()
                 )
@@ -316,13 +317,31 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         new_values = []
         for var in self.variables_:
             if self.missing_values == "impute":
-                X[var] = X[var].astype(str).replace("nan", "")
-            categories = X[var].dropna().astype(str).unique()
+                series = X[var]
+                series = series.astype(str).mask(series.isna(), "")
+            else:
+                series = X[var].astype(str)
+
+            categories = series.unique()
             column_encoder_dict = {
                 x: _gpm_fast_vec(x, self.encoder_dict_[var]) for x in categories
             }
-            column_encoder_dict["nan"] = [np.nan] * len(self.encoder_dict_[var])
-            encoded = np.vstack(X[var].astype(str).map(column_encoder_dict).values)
+            # Ensure map result is always an array of the correct size.
+            # Missing values in categories or unknown categories will map to NaN.
+            default_nan = np.full(len(self.encoder_dict_[var]), np.nan)
+            if "nan" not in column_encoder_dict:
+                column_encoder_dict["nan"] = default_nan
+            if "<NA>" not in column_encoder_dict:
+                column_encoder_dict["<NA>"] = default_nan
+
+            encoded_series = series.map(column_encoder_dict)
+
+            # Robust stacking: replace any float NaNs (from unknown values) with arrays
+            encoded_list = [
+                v if isinstance(v, (list, np.ndarray)) else default_nan
+                for v in encoded_series
+            ]
+            encoded = np.vstack(encoded_list)
             if self.missing_values == "ignore":
                 encoded[X[var].isna(), :] = np.nan
             new_values.append(encoded)

diff --git a/feature_engine/preprocessing/match_columns.py b/feature_engine/preprocessing/match_columns.py
@@ -175,7 +175,7 @@ def __init__(
 
         if not isinstance(verbose, bool):
             raise ValueError(
-                "verbose takes only booleans True and False." f"Got '{verbose} instead."
+                f"verbose takes only booleans True and False. Got '{verbose} instead."
             )
 
         # note: np.nan is an instance of float!!!
@@ -262,7 +262,10 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
 
         X = X.drop(_columns_to_drop, axis=1)
 
-        X = X.reindex(columns=self.feature_names_in_, fill_value=self.fill_value)
+        # Add missing columns first and then reorder to avoid
+        # Pandas 3 StringDtype reindex issue (before we used reindex)
+        X[_columns_to_add] = self.fill_value
+        X = X[self.feature_names_in_]
 
         if self.match_dtypes:
             _current_dtypes = X.dtypes.to_dict()

diff --git a/feature_engine/timeseries/forecasting/lag_features.py b/feature_engine/timeseries/forecasting/lag_features.py
@@ -201,7 +201,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
                         axis=0,
                     )
                     df_ls.append(tmp)
-                tmp = pd.concat(df_ls, axis=1)
+                tmp = pd.concat(df_ls, axis=1, sort=False)
 
             else:
                 tmp = X[self.variables_].shift(
@@ -219,7 +219,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
                         axis=0,
                     )
                     df_ls.append(tmp)
-                tmp = pd.concat(df_ls, axis=1)
+                tmp = pd.concat(df_ls, axis=1, sort=False)
 
             else:
                 tmp = X[self.variables_].shift(

diff --git a/feature_engine/timeseries/forecasting/window_features.py b/feature_engine/timeseries/forecasting/window_features.py
@@ -219,7 +219,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
                     .shift(periods=self.periods, freq=self.freq)
                 )
                 df_ls.append(tmp)
-            tmp = pd.concat(df_ls, axis=1)
+            tmp = pd.concat(df_ls, axis=1, sort=False)
 
         else:
             tmp = (