From 2a1ba3061c8f2dc02f337cd82cfcabd9d2cdba25 Mon Sep 17 00:00:00 2001
From: ColinLee <shuolin_l@163.com>
Date: Wed, 14 Jan 2026 22:37:25 +0800
Subject: [PATCH 01/13] support save dataframe to tsfile.

---
 python/tests/test_dataframe.py       | 251 +++++++++++++++++++
 python/tests/test_to_tsfile.py       | 345 +++++++++++++++++++++++++++
 python/tests/test_write_and_read.py  | 238 +-----------------
 python/tsfile/__init__.py            |   2 +-
 python/tsfile/constants.py           |  80 ++++++-
 python/tsfile/tsfile_cpp.pxd         |   3 +-
 python/tsfile/tsfile_py_cpp.pxd      |   1 +
 python/tsfile/tsfile_py_cpp.pyx      | 184 +++++++++++++-
 python/tsfile/tsfile_table_writer.py |  75 +++++-
 python/tsfile/tsfile_writer.pyx      |  30 ++-
 python/tsfile/utils.py               | 106 ++++++++
 11 files changed, 1052 insertions(+), 263 deletions(-)
 create mode 100644 python/tests/test_dataframe.py
 create mode 100644 python/tests/test_to_tsfile.py

diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
new file mode 100644
index 000000000..5138968a2
--- /dev/null
+++ b/python/tests/test_dataframe.py
@@ -0,0 +1,251 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import os
+
+import numpy as np
+import pandas as pd
+import pytest
+from pandas.core.dtypes.common import is_integer_dtype
+
+from tsfile import ColumnSchema, TableSchema, TSDataType
+from tsfile import TsFileTableWriter, ColumnCategory
+from tsfile import to_dataframe
+from tsfile.exceptions import ColumnNotExistError, TypeMismatchError
+
+
+def convert_to_nullable_types(df):
+    """
+    Convert DataFrame columns to nullable types to match returned DataFrame from to_dataframe.
+    This handles the fact that returned DataFrames use nullable types (Int64, Float64, etc.)
+    to support Null values.
+    """
+    df = df.copy()
+    for col in df.columns:
+        dtype = df[col].dtype
+        if dtype == 'int64':
+            df[col] = df[col].astype('Int64')
+        elif dtype == 'int32':
+            df[col] = df[col].astype('Int32')
+        elif dtype == 'float64':
+            df[col] = df[col].astype('Float64')
+        elif dtype == 'float32':
+            df[col] = df[col].astype('Float32')
+        elif dtype == 'bool':
+            df[col] = df[col].astype('boolean')
+    return df
+
+
+def test_write_dataframe_basic():
+    table = TableSchema("test_table",
+                        [ColumnSchema("device", TSDataType.STRING, ColumnCategory.TAG),
+                         ColumnSchema("value", TSDataType.DOUBLE, ColumnCategory.FIELD),
+                         ColumnSchema("value2", TSDataType.INT64, ColumnCategory.FIELD)])
+    tsfile_path = "test_write_dataframe_basic.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+        
+        with TsFileTableWriter(tsfile_path, table) as writer:
+            df = pd.DataFrame({
+                'time': [i for i in range(100)],
+                'device': [f"device{i}" for i in range(100)],
+                'value': [i * 1.5 for i in range(100)],
+                'value2': [i * 10 for i in range(100)]
+            })
+            writer.write_dataframe(df)
+
+        df_read = to_dataframe(tsfile_path, table_name="test_table")
+        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_sorted = convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
+        assert df_read.shape == (100, 4)
+        assert df_read["time"].equals(df_sorted["time"])
+        assert df_read["device"].equals(df_sorted["device"])
+        assert df_read["value"].equals(df_sorted["value"])
+        assert df_read["value2"].equals(df_sorted["value2"])
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_write_dataframe_with_index():
+    table = TableSchema("test_table",
+                        [ColumnSchema("device", TSDataType.STRING, ColumnCategory.TAG),
+                         ColumnSchema("value", TSDataType.DOUBLE, ColumnCategory.FIELD)])
+    tsfile_path = "test_write_dataframe_index.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+        
+        with TsFileTableWriter(tsfile_path, table) as writer:
+            df = pd.DataFrame({
+                'device': [f"device{i}" for i in range(50)],
+                'value': [i * 2.5 for i in range(50)]
+            })
+            df.index = [i * 10 for i in range(50)]  # Set index as timestamps
+            writer.write_dataframe(df)
+        df_read = to_dataframe(tsfile_path, table_name="test_table")
+        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_sorted = df.sort_index()
+        df_sorted = convert_to_nullable_types(df_sorted.reset_index(drop=True))
+        time_series = pd.Series(df.sort_index().index.values, dtype='Int64')
+        assert df_read.shape == (50, 3)
+        assert df_read["time"].equals(time_series)
+        assert df_read["device"].equals(df_sorted["device"])
+        assert df_read["value"].equals(df_sorted["value"])
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_write_dataframe_case_insensitive():
+    table = TableSchema("test_table",
+                        [ColumnSchema("device", TSDataType.STRING, ColumnCategory.TAG),
+                         ColumnSchema("value", TSDataType.DOUBLE, ColumnCategory.FIELD)])
+    tsfile_path = "test_write_dataframe_case.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+        
+        with TsFileTableWriter(tsfile_path, table) as writer:
+            df = pd.DataFrame({
+                'Time': [i for i in range(30)],  # Capital T
+                'Device': [f"device{i}" for i in range(30)],  # Capital D
+                'VALUE': [i * 3.0 for i in range(30)]  # All caps
+            })
+            writer.write_dataframe(df)
+
+        df_read = to_dataframe(tsfile_path, table_name="test_table")
+        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_sorted = convert_to_nullable_types(df.sort_values('Time').reset_index(drop=True))
+        assert df_read.shape == (30, 3)
+        assert df_read["time"].equals(df_sorted["Time"])
+        assert df_read["device"].equals(df_sorted["Device"])
+        assert df_read["value"].equals(df_sorted["VALUE"])
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_write_dataframe_column_not_in_schema():
+    table = TableSchema("test_table",
+                        [ColumnSchema("device", TSDataType.STRING, ColumnCategory.TAG),
+                         ColumnSchema("value", TSDataType.DOUBLE, ColumnCategory.FIELD)])
+    tsfile_path = "test_write_dataframe_extra_col.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+        
+        with TsFileTableWriter(tsfile_path, table) as writer:
+            df = pd.DataFrame({
+                'time': [i for i in range(10)],
+                'device': [f"device{i}" for i in range(10)],
+                'value': [i * 1.0 for i in range(10)],
+                'extra_column': [i for i in range(10)]  # Not in schema
+            })
+            with pytest.raises(ColumnNotExistError) as exc_info:
+                writer.write_dataframe(df)
+            assert "extra_column" in str(exc_info.value)
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_write_dataframe_type_mismatch():
+    table = TableSchema("test_table",
+                        [ColumnSchema("value", TSDataType.STRING, ColumnCategory.FIELD)])
+    tsfile_path = "test_write_dataframe_type_mismatch.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+        
+        with TsFileTableWriter(tsfile_path, table) as writer:
+            df = pd.DataFrame({
+                'time': [i for i in range(10)],
+                'value': [i for i in range(10)]  # INT64, but schema expects STRING
+            })
+            with pytest.raises(TypeMismatchError) as exc_info:
+                writer.write_dataframe(df)
+            assert "Type mismatches" in str(exc_info.value)
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_write_dataframe_all_datatypes():
+    table = TableSchema("test_table",
+                        [ColumnSchema("bool_col", TSDataType.BOOLEAN, ColumnCategory.FIELD),
+                         ColumnSchema("int32_col", TSDataType.INT32, ColumnCategory.FIELD),
+                         ColumnSchema("int64_col", TSDataType.INT64, ColumnCategory.FIELD),
+                         ColumnSchema("float_col", TSDataType.FLOAT, ColumnCategory.FIELD),
+                         ColumnSchema("double_col", TSDataType.DOUBLE, ColumnCategory.FIELD),
+                         ColumnSchema("string_col", TSDataType.STRING, ColumnCategory.FIELD),
+                         ColumnSchema("blob_col", TSDataType.BLOB, ColumnCategory.FIELD)])
+    tsfile_path = "test_write_dataframe_all_types.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+        
+        with TsFileTableWriter(tsfile_path, table) as writer:
+            df = pd.DataFrame({
+                'time': [i for i in range(50)],
+                'bool_col': [i % 2 == 0 for i in range(50)],
+                'int32_col': pd.Series([i for i in range(50)], dtype='int32'),
+                'int64_col': [i * 10 for i in range(50)],
+                'float_col': pd.Series([i * 1.5 for i in range(50)], dtype='float32'),
+                'double_col': [i * 2.5 for i in range(50)],
+                'string_col': [f"str{i}" for i in range(50)],
+                'blob_col': [f"blob{i}".encode('utf-8') for i in range(50)]
+            })
+            writer.write_dataframe(df)
+
+        df_read = to_dataframe(tsfile_path, table_name="test_table")
+        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_sorted = convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
+        assert df_read.shape == (50, 8)
+        assert df_read["bool_col"].equals(df_sorted["bool_col"])
+        assert df_read["int32_col"].equals(df_sorted["int32_col"])
+        assert df_read["int64_col"].equals(df_sorted["int64_col"])
+        assert np.allclose(df_read["float_col"], df_sorted["float_col"])
+        assert np.allclose(df_read["double_col"], df_sorted["double_col"])
+        assert df_read["string_col"].equals(df_sorted["string_col"])
+        for i in range(50):
+            assert df_read["blob_col"].iloc[i] == df_sorted["blob_col"].iloc[i]
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_write_dataframe_empty():
+    table = TableSchema("test_table",
+                        [ColumnSchema("value", TSDataType.DOUBLE, ColumnCategory.FIELD)])
+    tsfile_path = "test_write_dataframe_empty.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+        
+        with TsFileTableWriter(tsfile_path, table) as writer:
+            df = pd.DataFrame({
+                'time': [],
+                'value': []
+            })
+            with pytest.raises(ValueError) as err:
+                writer.write_dataframe(df)
+
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
diff --git a/python/tests/test_to_tsfile.py b/python/tests/test_to_tsfile.py
new file mode 100644
index 000000000..0928f1a94
--- /dev/null
+++ b/python/tests/test_to_tsfile.py
@@ -0,0 +1,345 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import os
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from tsfile import to_dataframe
+from tsfile.utils import dataframe_to_tsfile
+
+
+def convert_to_nullable_types(df):
+    df = df.copy()
+    for col in df.columns:
+        dtype = df[col].dtype
+        if dtype == 'int64':
+            df[col] = df[col].astype('Int64')
+        elif dtype == 'int32':
+            df[col] = df[col].astype('Int32')
+        elif dtype == 'float64':
+            df[col] = df[col].astype('Float64')
+        elif dtype == 'float32':
+            df[col] = df[col].astype('Float32')
+        elif dtype == 'bool':
+            df[col] = df[col].astype('boolean')
+    return df
+
+
+def test_dataframe_to_tsfile_basic():
+    tsfile_path = "test_dataframe_to_tsfile_basic.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({
+            'time': [i for i in range(100)],
+            'device': [f"device{i}" for i in range(100)],
+            'value': [i * 1.5 for i in range(100)],
+            'value2': [i * 10 for i in range(100)]
+        })
+
+        dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
+
+        df_read = to_dataframe(tsfile_path, table_name="test_table")
+        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_sorted = convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
+
+        assert df_read.shape == (100, 4)
+        assert df_read["time"].equals(df_sorted["time"])
+        assert df_read["device"].equals(df_sorted["device"])
+        assert df_read["value"].equals(df_sorted["value"])
+        assert df_read["value2"].equals(df_sorted["value2"])
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_dataframe_to_tsfile_with_index():
+    tsfile_path = "test_dataframe_to_tsfile_index.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({
+            'device': [f"device{i}" for i in range(50)],
+            'value': [i * 2.5 for i in range(50)]
+        })
+        df.index = [i * 10 for i in range(50)]
+
+        dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
+
+        df_read = to_dataframe(tsfile_path, table_name="test_table")
+        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_sorted = df.sort_index()
+        df_sorted = convert_to_nullable_types(df_sorted.reset_index(drop=True))
+        time_series = pd.Series(df.sort_index().index.values, dtype='Int64')
+
+        assert df_read.shape == (50, 3)
+        assert df_read["time"].equals(time_series)
+        assert df_read["device"].equals(df_sorted["device"])
+        assert df_read["value"].equals(df_sorted["value"])
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_dataframe_to_tsfile_custom_time_column():
+    tsfile_path = "test_dataframe_to_tsfile_custom_time.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({
+            'timestamp': [i for i in range(30)],
+            'device': [f"device{i}" for i in range(30)],
+            'value': [i * 3.0 for i in range(30)]
+        })
+
+        dataframe_to_tsfile(df, tsfile_path, table_name="test_table", time_column="timestamp")
+
+        df_read = to_dataframe(tsfile_path, table_name="test_table")
+        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_sorted = convert_to_nullable_types(df.sort_values('timestamp').reset_index(drop=True))
+
+        assert df_read.shape == (30, 3)
+        assert df_read["time"].equals(df_sorted["timestamp"])
+        assert df_read["device"].equals(df_sorted["device"])
+        assert df_read["value"].equals(df_sorted["value"])
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_dataframe_to_tsfile_with_tag_columns():
+    tsfile_path = "test_dataframe_to_tsfile_tags.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({
+            'time': [i for i in range(20)],
+            'device': [f"device{i}" for i in range(20)],
+            'location': [f"loc{i % 5}" for i in range(20)],
+            'value': [i * 1.5 for i in range(20)]
+        })
+
+        dataframe_to_tsfile(df, tsfile_path, table_name="test_table", tag_column=["device", "location"])
+
+        df_read = to_dataframe(tsfile_path, table_name="test_table")
+        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_sorted = convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
+
+        assert df_read.shape == (20, 4)
+        assert df_read["device"].equals(df_sorted["device"])
+        assert df_read["location"].equals(df_sorted["location"])
+        assert df_read["value"].equals(df_sorted["value"])
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_dataframe_to_tsfile_all_datatypes():
+    tsfile_path = "test_dataframe_to_tsfile_all_types.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({
+            'time': [i for i in range(50)],
+            'bool_col': [i % 2 == 0 for i in range(50)],
+            'int32_col': pd.Series([i for i in range(50)], dtype='int32'),
+            'int64_col': [i * 10 for i in range(50)],
+            'float_col': pd.Series([i * 1.5 for i in range(50)], dtype='float32'),
+            'double_col': [i * 2.5 for i in range(50)],
+            'string_col': [f"str{i}" for i in range(50)],
+            'blob_col': [f"blob{i}".encode('utf-8') for i in range(50)]
+        })
+
+        dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
+
+        df_read = to_dataframe(tsfile_path, table_name="test_table")
+        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_sorted = convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
+
+        assert df_read.shape == (50, 8)
+        assert df_read["bool_col"].equals(df_sorted["bool_col"])
+        assert df_read["int32_col"].equals(df_sorted["int32_col"])
+        assert df_read["int64_col"].equals(df_sorted["int64_col"])
+        assert np.allclose(df_read["float_col"], df_sorted["float_col"])
+        assert np.allclose(df_read["double_col"], df_sorted["double_col"])
+        assert df_read["string_col"].equals(df_sorted["string_col"])
+        for i in range(50):
+            assert df_read["blob_col"].iloc[i] == df_sorted["blob_col"].iloc[i]
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_dataframe_to_tsfile_default_table_name():
+    tsfile_path = "test_dataframe_to_tsfile_default_name.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({
+            'time': [i for i in range(10)],
+            'value': [i * 1.0 for i in range(10)]
+        })
+
+        dataframe_to_tsfile(df, tsfile_path)
+
+        df_read = to_dataframe(tsfile_path, table_name="table")
+        assert df_read.shape == (10, 2)
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_dataframe_to_tsfile_case_insensitive_time():
+    tsfile_path = "test_dataframe_to_tsfile_case_time.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({
+            'Time': [i for i in range(20)],
+            'value': [i * 2.0 for i in range(20)]
+        })
+
+        dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
+
+        df_read = to_dataframe(tsfile_path, table_name="test_table")
+        assert df_read.shape == (20, 2)
+        assert df_read["time"].equals(pd.Series([i for i in range(20)], dtype='Int64'))
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_dataframe_to_tsfile_empty_dataframe():
+    tsfile_path = "test_dataframe_to_tsfile_empty.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame()
+
+        with pytest.raises(ValueError, match="DataFrame cannot be None or empty"):
+            dataframe_to_tsfile(df, tsfile_path)
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_dataframe_to_tsfile_no_data_columns():
+    tsfile_path = "test_dataframe_to_tsfile_no_data.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({
+            'time': [i for i in range(10)]
+        })
+
+        with pytest.raises(ValueError, match="DataFrame must have at least one data column"):
+            dataframe_to_tsfile(df, tsfile_path)
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_dataframe_to_tsfile_invalid_time_column():
+    tsfile_path = "test_dataframe_to_tsfile_invalid_time.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({
+            'timestamp': [i for i in range(10)],
+            'value': [i * 1.0 for i in range(10)]
+        })
+
+        with pytest.raises(ValueError, match="Time column 'time' not found"):
+            dataframe_to_tsfile(df, tsfile_path, time_column="time")
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_dataframe_to_tsfile_non_integer_time_column():
+    tsfile_path = "test_dataframe_to_tsfile_non_int_time.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({
+            'time': [f"time{i}" for i in range(10)],
+            'value': [i * 1.0 for i in range(10)]
+        })
+
+        with pytest.raises(TypeError, match="must be integer type"):
+            dataframe_to_tsfile(df, tsfile_path)
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_dataframe_to_tsfile_invalid_tag_column():
+    tsfile_path = "test_dataframe_to_tsfile_invalid_tag.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({
+            'time': [i for i in range(10)],
+            'value': [i * 1.0 for i in range(10)]
+        })
+
+        with pytest.raises(ValueError, match="Tag column 'invalid' not found"):
+            dataframe_to_tsfile(df, tsfile_path, tag_column=["invalid"])
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_dataframe_to_tsfile_string_vs_blob():
+    tsfile_path = "test_dataframe_to_tsfile_string_blob.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({
+            'time': [i for i in range(20)],
+            'string_col': [f"str{i}" for i in range(20)],
+            'blob_col': [f"blob{i}".encode('utf-8') for i in range(20)]
+        })
+
+        dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
+
+        df_read = to_dataframe(tsfile_path, table_name="test_table")
+        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_sorted = convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
+
+        assert df_read["string_col"].equals(df_sorted["string_col"])
+        for i in range(20):
+            assert df_read["blob_col"].iloc[i] == df_sorted["blob_col"].iloc[i]
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
diff --git a/python/tests/test_write_and_read.py b/python/tests/test_write_and_read.py
index b327e2d3d..1ffc22b99 100644
--- a/python/tests/test_write_and_read.py
+++ b/python/tests/test_write_and_read.py
@@ -16,11 +16,13 @@
 # under the License.
 #
 
+import os
 from datetime import date
 
 import numpy as np
 import pandas as pd
 import pytest
+from pandas import Float64Dtype
 from pandas.core.dtypes.common import is_integer_dtype
 
 from tsfile import ColumnSchema, TableSchema, TSEncoding
@@ -31,7 +33,7 @@
 from tsfile import TsFileTableWriter
 from tsfile import TsFileWriter, TsFileReader, ColumnCategory
 from tsfile import to_dataframe
-from tsfile.exceptions import TableNotExistError, ColumnNotExistError, NotSupportedError
+from tsfile.exceptions import TableNotExistError, ColumnNotExistError, NotSupportedError, TypeMismatchError
 
 
 def test_row_record_write_and_read():
@@ -544,7 +546,7 @@ def test_tsfile_to_df():
         assert df1.shape == (4097, 4)
         assert df1["value2"].sum() == 100 * (1 + 4096) / 2 * 4096
         assert is_integer_dtype(df1["time"])
-        assert df1["value"].dtype == np.float64
+        assert df1["value"].dtype == Float64Dtype()
         assert is_integer_dtype(df1["value2"])
         df2 = to_dataframe("table_write_to_df.tsfile", column_names=["device", "value2"])
         assert df2.shape == (4097, 3)
@@ -755,237 +757,9 @@ def test_tree_all_datatype_query_to_dataframe_variants():
             pass
 
     finally:
-        if os.path.exists(tsfile_path):
-            os.remove(tsfile_path)
-
-
-def test_table_all_datatype_query_to_dataframe_variants():
-    tsfile_path = "test_table.tsfile"
-    table = TableSchema(
-        "test_table",
-        [
-            ColumnSchema("Device1", TSDataType.STRING, ColumnCategory.TAG),
-            ColumnSchema("Device2", TSDataType.STRING, ColumnCategory.TAG),
-            ColumnSchema("Value1", TSDataType.BOOLEAN, ColumnCategory.FIELD),
-            ColumnSchema("Value2", TSDataType.INT32, ColumnCategory.FIELD),
-            ColumnSchema("Value3", TSDataType.INT64, ColumnCategory.FIELD),
-            ColumnSchema("Value4", TSDataType.FLOAT, ColumnCategory.FIELD),
-            ColumnSchema("Value5", TSDataType.DOUBLE, ColumnCategory.FIELD),
-            ColumnSchema("Value6", TSDataType.TEXT, ColumnCategory.FIELD),
-            ColumnSchema("Value7", TSDataType.STRING, ColumnCategory.FIELD),
-            ColumnSchema("Value8", TSDataType.BLOB, ColumnCategory.FIELD),
-            ColumnSchema("Value9", TSDataType.TIMESTAMP, ColumnCategory.FIELD),
-            ColumnSchema("Value10", TSDataType.DATE, ColumnCategory.FIELD),
-        ],
-    )
-    dateSet = set()
-    try:
-        if os.path.exists(tsfile_path):
-            os.remove(tsfile_path)
-        max_row_num = 100
-        with TsFileTableWriter(tsfile_path, table) as writer:
-            tablet = Tablet(
-                [
-                    "Device1",
-                    "Device2",
-                    "Value1",
-                    "Value2",
-                    "Value3",
-                    "Value4",
-                    "Value5",
-                    "Value6",
-                    "Value7",
-                    "Value8",
-                    "Value9",
-                    "Value10",
-                ],
-                [
-                    TSDataType.STRING,
-                    TSDataType.STRING,
-                    TSDataType.BOOLEAN,
-                    TSDataType.INT32,
-                    TSDataType.INT64,
-                    TSDataType.FLOAT,
-                    TSDataType.DOUBLE,
-                    TSDataType.TEXT,
-                    TSDataType.STRING,
-                    TSDataType.BLOB,
-                    TSDataType.TIMESTAMP,
-                    TSDataType.DATE,
-                ],
-                max_row_num,
-            )
-            for i in range(max_row_num):
-                tablet.add_timestamp(i, i)
-                tablet.add_value_by_name("Device1", i, "d1_" + str(i))
-                tablet.add_value_by_name("Device2", i, "d2_" + str(i))
-                tablet.add_value_by_name("Value1", i, i % 2 == 0)
-                tablet.add_value_by_name("Value2", i, i * 3)
-                tablet.add_value_by_name("Value3", i, i * 4)
-                tablet.add_value_by_name("Value4", i, i * 5.5)
-                tablet.add_value_by_name("Value5", i, i * 6.6)
-                tablet.add_value_by_name("Value6", i, f"string_value_{i}")
-                tablet.add_value_by_name("Value7", i, f"text_value_{i}")
-                tablet.add_value_by_name("Value8", i, f"blob_data_{i}".encode('utf-8'))
-                tablet.add_value_by_name("Value9", i, i * 9)
-                tablet.add_value_by_name("Value10", i, date(2025, 1, i % 20 + 1))
-                dateSet.add(date(2025, 1, i % 20 + 1))
-            writer.write_table(tablet)
-
-        df1_1 = to_dataframe(tsfile_path)
-        assert df1_1.shape[0] == max_row_num
-        for i in range(max_row_num):
-            assert df1_1.iloc[i, 1] == "d1_" + str(df1_1.iloc[i, 0])
-            assert df1_1.iloc[i, 2] == "d2_" + str(df1_1.iloc[i, 0])
-
-        df2_1 = to_dataframe(tsfile_path, column_names=["Value1"])
-        for i in range(max_row_num):
-            assert df2_1.iloc[i, 1] == np.bool_(df2_1.iloc[i, 0] % 2 == 0)
-        df2_2 = to_dataframe(tsfile_path, column_names=["Value2"])
-        for i in range(max_row_num):
-            assert df2_2.iloc[i, 1] == np.int32(df2_2.iloc[i, 0] * 3)
-        df2_3 = to_dataframe(tsfile_path, column_names=["Value3"])
-        for i in range(max_row_num):
-            assert df2_3.iloc[i, 1] == np.int64(df2_3.iloc[i, 0] * 4)
-        df2_4 = to_dataframe(tsfile_path, column_names=["Value4"])
-        for i in range(max_row_num):
-            assert df2_4.iloc[i, 1] == np.float32(df2_4.iloc[i, 0] * 5.5)
-        df2_5 = to_dataframe(tsfile_path, column_names=["Value5"])
-        for i in range(max_row_num):
-            assert df2_5.iloc[i, 1] == np.float64(df2_5.iloc[i, 0] * 6.6)
-        df2_6 = to_dataframe(tsfile_path, column_names=["Value6"])
-        for i in range(max_row_num):
-            assert df2_6.iloc[i, 1] == f"string_value_{df2_6.iloc[i, 0]}"
-        df2_7 = to_dataframe(tsfile_path, column_names=["Value7"])
-        for i in range(max_row_num):
-            assert df2_7.iloc[i, 1] == f"text_value_{df2_7.iloc[i, 0]}"
-        df2_8 = to_dataframe(tsfile_path, column_names=["Value8"])
-        for i in range(max_row_num):
-            assert df2_8.iloc[i, 1] == f"blob_data_{df2_8.iloc[i, 0]}".encode('utf-8')
-        df2_9 = to_dataframe(tsfile_path, column_names=["Value9"])
-        for i in range(max_row_num):
-            assert df2_9.iloc[i, 1] == np.int64(df2_9.iloc[i, 0] * 9)
-        df2_10 = to_dataframe(tsfile_path, column_names=["Value10"])
-        for i in range(max_row_num):
-            assert df2_10.iloc[i, 1] in dateSet
-        df2_11 = to_dataframe(tsfile_path, column_names=["Device1", "Value1"])
-        for i in range(max_row_num):
-            assert df2_11.iloc[i, 1] == "d1_" + str(df2_11.iloc[i, 0])
-            assert df2_11.iloc[i, 2] == np.bool_(df2_11.iloc[i, 0] % 2 == 0)
-        df2_12 = to_dataframe(
-            tsfile_path,
-            column_names=[
-                "Device1",
-                "Device2",
-                "Value1",
-                "Value2",
-                "Value3",
-                "Value4",
-                "Value5",
-                "Value6",
-                "Value7",
-                "Value8",
-                "Value9",
-                "Value10",
-            ],
-        )
-        for i in range(max_row_num):
-            assert df2_12.iloc[i, 1] == "d1_" + str(df2_12.iloc[i, 0])
-            assert df2_12.iloc[i, 2] == "d2_" + str(df2_12.iloc[i, 0])
-            assert df2_12.iloc[i, 3] == np.bool_(df2_12.iloc[i, 0] % 2 == 0)
-            assert df2_12.iloc[i, 4] == np.int32(df2_12.iloc[i, 0] * 3)
-            assert df2_12.iloc[i, 5] == np.int64(df2_12.iloc[i, 0] * 4)
-            assert df2_12.iloc[i, 6] == np.float32(df2_12.iloc[i, 0] * 5.5)
-            assert df2_12.iloc[i, 7] == np.float64(df2_12.iloc[i, 0] * 6.6)
-            assert df2_12.iloc[i, 8] == f"string_value_{df2_12.iloc[i, 0]}"
-            assert df2_12.iloc[i, 9] == f"text_value_{df2_12.iloc[i, 0]}"
-            assert df2_12.iloc[i, 10] == f"blob_data_{df2_12.iloc[i, 0]}".encode(
-                "utf-8"
-            )
-            assert df2_12.iloc[i, 11] == np.int64(df2_12.iloc[i, 0] * 9)
-            assert df2_12.iloc[i, 12] == date(2025, 1, df2_12.iloc[i, 0] % 20 + 1)
-        df2_13 = to_dataframe(
-            tsfile_path, column_names=["Device1", "Device2", "Value1"]
-        )
-        for i in range(max_row_num):
-            assert df2_13.iloc[i, 1] == "d1_" + str(df2_13.iloc[i, 0])
-            assert df2_13.iloc[i, 2] == "d2_" + str(df2_13.iloc[i, 0])
-            assert df2_13.iloc[i, 3] == np.bool_(df2_13.iloc[i, 0] % 2 == 0)
-
-        df3_1 = to_dataframe(tsfile_path, table_name="test_table")
-        assert df3_1.shape[0] == max_row_num
-        assert df3_1.iloc[0, 0] == 0
-        df3_2 = to_dataframe(tsfile_path, table_name="TEST_TABLE")
-        assert df3_2.shape[0] == max_row_num
-        assert df3_2.iloc[0, 0] == 0
-
-        df4_1 = to_dataframe(tsfile_path, start_time=10)
-        assert df4_1.shape[0] == 90
-        df4_2 = to_dataframe(tsfile_path, start_time=-10)
-        assert df4_2.shape[0] == max_row_num
-        df4_3 = to_dataframe(tsfile_path, end_time=5)
-        assert df4_3.shape[0] == 6
-        df4_4 = to_dataframe(tsfile_path, end_time=-5)
-        assert df4_4.shape[0] == 0
-        df4_5 = to_dataframe(tsfile_path, start_time=5, end_time=5)
-        assert df4_5.shape[0] == 1
-        df4_6 = to_dataframe(tsfile_path, start_time=-5, end_time=-5)
-        assert df4_6.shape[0] == 0
-        df4_7 = to_dataframe(tsfile_path, start_time=10, end_time=-10)
-        assert df4_7.shape[0] == 0
-        df4_8 = to_dataframe(tsfile_path, start_time=-10, end_time=10)
-        assert df4_8.shape[0] == 11
-        df4_8 = to_dataframe(tsfile_path, start_time=-50, end_time=50)
-        assert df4_8.shape[0] == 51
-
-        df5_1 = to_dataframe(tsfile_path, max_row_num=1)
-        assert df5_1.shape[0] == 1
-        df5_2 = to_dataframe(tsfile_path, max_row_num=50)
-        assert df5_2.shape[0] == 50
-        df5_3 = to_dataframe(tsfile_path, max_row_num=100)
-        assert df5_3.shape[0] == 100
-        df5_4 = to_dataframe(tsfile_path, max_row_num=1000)
-        assert df5_4.shape[0] == 100
-        df5_5 = to_dataframe(tsfile_path, max_row_num=0)
-        assert df5_5.shape[0] == 0
-        df5_6 = to_dataframe(tsfile_path, max_row_num=-10)
-        assert df5_6.shape[0] == 0
-
-        for df6_1 in to_dataframe(tsfile_path, max_row_num=20, as_iterator=True):
-            assert df6_1.shape[0] == 20
-        for df6_2 in to_dataframe(tsfile_path, max_row_num=1000, as_iterator=True):
-            assert df6_2.shape[0] == 100
-
-        for df7_1 in to_dataframe(
-                tsfile_path,
-                table_name="test_table",
-                column_names=["Device1", "Value1"],
-                start_time=21,
-                end_time=50,
-                max_row_num=10,
-                as_iterator=True,
-        ):
-            assert df7_1.shape[0] == 10
-            for i in range(30):
-                assert df2_11.iloc[i, 1] == "d1_" + str(df2_11.iloc[i, 0])
-                assert df2_11.iloc[i, 2] == np.bool_(df2_11.iloc[i, 0] % 2 == 0)
-
-        try:
-            to_dataframe(tsfile_path, table_name="non_existent_table")
-        except TableNotExistError as e:
-            assert e.args[0] == "[non_existent_table] Requested table does not exist"
-
-        try:
-            to_dataframe(tsfile_path, column_names=["non_existent_column"])
-        except ColumnNotExistError as e:
-            assert e.args[0] == "[non_existent_column] Column does not exist"
-
-    finally:
-        if os.path.exists(tsfile_path):
-            os.remove(tsfile_path)
-
+        if os.path.exists("tablet_write_and_read.tsfile"):
+            os.remove("tablet_write_and_read.tsfile")
 
-import os
 
 if __name__ == "__main__":
     os.chdir(os.path.dirname(os.path.abspath(__file__)))
diff --git a/python/tsfile/__init__.py b/python/tsfile/__init__.py
index bf755fcef..a9237257b 100644
--- a/python/tsfile/__init__.py
+++ b/python/tsfile/__init__.py
@@ -34,4 +34,4 @@
 from .tsfile_writer import TsFileWriterPy as TsFileWriter
 from .tsfile_py_cpp import get_tsfile_config, set_tsfile_config
 from .tsfile_table_writer import TsFileTableWriter
-from .utils import to_dataframe
\ No newline at end of file
+from .utils import to_dataframe, dataframe_to_tsfile
\ No newline at end of file
diff --git a/python/tsfile/constants.py b/python/tsfile/constants.py
index 7d1f5ff5c..d4f87200d 100644
--- a/python/tsfile/constants.py
+++ b/python/tsfile/constants.py
@@ -16,6 +16,7 @@
 # under the License.
 #
 from enum import unique, IntEnum
+import numpy as np
 
 
 @unique
@@ -62,13 +63,13 @@ def to_pandas_dtype(self):
         elif self == TSDataType.INT64:
             return "Int64"
         elif self == TSDataType.FLOAT:
-            return "float32"
+            return "Float32"
         elif self == TSDataType.DOUBLE:
-            return "float64"
+            return "Float64"
         elif self == TSDataType.TEXT or self == TSDataType.STRING:
             return "object"
         elif self == TSDataType.TIMESTAMP:
-            return "int64"
+            return "Int64"
         elif self == TSDataType.DATE:
             return "object"
         elif self == TSDataType.BLOB:
@@ -76,6 +77,79 @@ def to_pandas_dtype(self):
         else:
             raise ValueError(f"Unknown data type: {self}")
 
+    @classmethod
+    def from_pandas_datatype(cls, dtype):
+        if dtype is np.bool_:
+            return cls.BOOLEAN
+        elif dtype is np.int32:
+            return cls.INT32
+        elif dtype is np.int64:
+            return cls.INT64
+        elif dtype is np.float32:
+            return cls.FLOAT
+        elif dtype is np.float64:
+            return cls.DOUBLE
+        elif dtype is np.object_:
+            return cls.STRING
+
+        try:
+            import pandas as pd
+            if hasattr(pd, 'StringDtype') and isinstance(dtype, pd.StringDtype):
+                return cls.STRING
+        except (ImportError, AttributeError):
+            pass
+        
+        if hasattr(dtype, 'type'):
+            dtype = dtype.type
+            if dtype is np.bool_:
+                return cls.BOOLEAN
+            elif dtype is np.int32:
+                return cls.INT32
+            elif dtype is np.int64:
+                return cls.INT64
+            elif dtype is np.float32:
+                return cls.FLOAT
+            elif dtype is np.float64:
+                return cls.DOUBLE
+            elif dtype is np.object_:
+                return cls.STRING
+        
+        dtype_str = str(dtype)
+
+        if 'stringdtype' in dtype_str.lower() or dtype_str.startswith('string'):
+            return cls.STRING
+        
+        dtype_map = {
+            'bool': cls.BOOLEAN,
+            'boolean': cls.BOOLEAN,
+            'int32': cls.INT32,
+            'Int32': cls.INT32,
+            'int64': cls.INT64,
+            'Int64': cls.INT64,
+            'float32': cls.FLOAT,
+            'float64': cls.DOUBLE,
+            'bytes': cls.BLOB,
+            'object': cls.STRING,
+            'string': cls.STRING,
+        }
+        
+        if dtype_str in dtype_map:
+            return dtype_map[dtype_str]
+        
+        dtype_lower = dtype_str.lower()
+        if dtype_lower in dtype_map:
+            return dtype_map[dtype_lower]
+
+        if 'object_' in dtype_lower or dtype_str == "<class 'numpy.object_'>":
+            return cls.STRING
+        
+        if dtype_str.startswith('datetime64'):
+            return cls.TIMESTAMP
+        
+        return cls.STRING
+
+
+
 
 @unique
 class TSEncoding(IntEnum):
diff --git a/python/tsfile/tsfile_cpp.pxd b/python/tsfile/tsfile_cpp.pxd
index 40bff4eba..ab915fefe 100644
--- a/python/tsfile/tsfile_cpp.pxd
+++ b/python/tsfile/tsfile_cpp.pxd
@@ -137,7 +137,8 @@ cdef extern from "./tsfile_cwrapper.h":
                                         TSDataType * data_types,
                                         int column_num, int max_rows);
 
-    Tablet tablet_new(const char** column_names, TSDataType * data_types, int column_num);
+    Tablet tablet_new(char** column_name_list, TSDataType* data_types,
+                  uint32_t column_num, uint32_t max_rows);
 
     ErrorCode tablet_add_timestamp(Tablet tablet, uint32_t row_index, int64_t timestamp);
     ErrorCode tablet_add_value_by_index_int64_t(Tablet tablet, uint32_t row_index, uint32_t column_index,
diff --git a/python/tsfile/tsfile_py_cpp.pxd b/python/tsfile/tsfile_py_cpp.pxd
index e44bb588d..9ce2f90da 100644
--- a/python/tsfile/tsfile_py_cpp.pxd
+++ b/python/tsfile/tsfile_py_cpp.pxd
@@ -33,6 +33,7 @@ cdef public api DeviceSchema* to_c_device_schema(object py_schema)
 cdef public api ColumnSchema* to_c_column_schema(object py_schema)
 cdef public api TableSchema* to_c_table_schema(object py_schema)
 cdef public api Tablet to_c_tablet(object tablet)
+cdef public api Tablet dataframe_to_c_tablet(object target_name, object dataframe)
 cdef public api TsRecord to_c_record(object row_record)
 cdef public api void free_c_table_schema(TableSchema* c_schema)
 cdef public api void free_c_column_schema(ColumnSchema* c_schema)
diff --git a/python/tsfile/tsfile_py_cpp.pyx b/python/tsfile/tsfile_py_cpp.pyx
index d9924d7ad..851346853 100644
--- a/python/tsfile/tsfile_py_cpp.pyx
+++ b/python/tsfile/tsfile_py_cpp.pyx
@@ -19,6 +19,9 @@
 from .date_utils import parse_date_to_int
 from .tsfile_cpp cimport *
 
+import pandas as pd
+import numpy as np
+
 from libc.stdlib cimport free
 from libc.stdlib cimport malloc
 from libc.string cimport strdup
@@ -220,7 +223,7 @@ cdef Tablet to_c_tablet(object tablet):
     cdef TSDataType * column_types
     cdef bytes row_bytes
     cdef char *raw_str
-    cdef const char* str_ptr
+    cdef const char * str_ptr
     cdef Py_ssize_t raw_len
 
     if tablet.get_target_name() is not None:
@@ -293,7 +296,7 @@ cdef Tablet to_c_tablet(object tablet):
             for row in range(max_row_num):
                 if value[row] is not None:
                     py_value = value[row]
-                    str_ptr =  PyUnicode_AsUTF8AndSize(py_value, &raw_len)
+                    str_ptr = PyUnicode_AsUTF8AndSize(py_value, &raw_len)
                     tablet_add_value_by_index_string_with_len(ctablet, row, col, str_ptr, raw_len)
 
         elif data_type == TS_DATATYPE_BLOB:
@@ -304,13 +307,168 @@ cdef Tablet to_c_tablet(object tablet):
 
     return ctablet
 
+cdef TSDataType pandas_dtype_to_ts_data_type(object dtype):
+    return to_c_data_type(TSDataTypePy.from_pandas_datatype(dtype))
+
+cdef TSDataType check_string_or_blob(TSDataType ts_data_type, object dtype, object column_series):
+    if ts_data_type == TS_DATATYPE_STRING:
+        dtype_str = str(dtype)
+        if dtype == 'object' or dtype_str == "<class 'numpy.object_'>":
+            first_valid_idx = column_series.first_valid_index()
+            if first_valid_idx is not None:
+                first_value = column_series[first_valid_idx]
+                if isinstance(first_value, bytes):
+                    return TS_DATATYPE_BLOB
+    return ts_data_type
+
+cdef Tablet dataframe_to_c_tablet(object target_name, object dataframe):
+    cdef Tablet ctablet
+    cdef int max_row_num
+    cdef TSDataType data_type
+    cdef int64_t timestamp
+    cdef const char * device_id_c = NULL
+    cdef char** columns_names
+    cdef TSDataType * columns_types
+    cdef char *raw_str
+    cdef const char * str_ptr
+    cdef Py_ssize_t raw_len
+    cdef int column_num
+    cdef int i, row
+    cdef object value
+    cdef object py_value
+    cdef object value_bytes
+
+    device_id_bytes = PyUnicode_AsUTF8String(target_name.lower())
+    device_id_c = device_id_bytes
+    df_columns = list(dataframe.columns)
+    use_id_as_time = False
+    time_column_name = None
+
+    for col in df_columns:
+        if col.lower() == 'time':
+            time_column_name = col
+            break
+
+    if time_column_name is None:
+        use_id_as_time = True
+
+    data_columns = [col for col in df_columns if col.lower() != 'time']
+    column_num = len(data_columns)
+
+    if column_num == 0:
+        raise ValueError("DataFrame must have at least one data column besides 'time'")
+
+    max_row_num = len(dataframe)
+
+    column_types_list = []
+    for col_name in data_columns:
+        pandas_dtype = dataframe[col_name].dtype
+        ds_type = pandas_dtype_to_ts_data_type(pandas_dtype)
+        ds_type = check_string_or_blob(ds_type, pandas_dtype, dataframe[col_name])
+        column_types_list.append(ds_type)
+
+    columns_names = <char**> malloc(sizeof(char *) * column_num)
+    columns_types = <TSDataType *> malloc(sizeof(TSDataType) * column_num)
+
+    for i in range(column_num):
+        columns_names[i] = strdup(data_columns[i].lower().encode('utf-8'))
+        columns_types[i] = column_types_list[i]
+
+    ctablet = _tablet_new_with_target_name(device_id_c, columns_names, columns_types, column_num,
+                         max_row_num)
+
+    free(columns_types)
+    for i in range(column_num):
+        free(columns_names[i])
+    free(columns_names)
+
+    if use_id_as_time:
+        for row in range(max_row_num):
+            timestamp_py = dataframe.index[row]
+            if pd.isna(timestamp_py):
+                continue
+            timestamp = <int64_t> timestamp_py
+            tablet_add_timestamp(ctablet, row, timestamp)
+    else:
+        time_values = dataframe[time_column_name].values
+        for row in range(max_row_num):
+            timestamp_py = time_values[row]
+            if pd.isna(timestamp_py):
+                continue
+            timestamp = <int64_t> timestamp_py
+            tablet_add_timestamp(ctablet, row, timestamp)
+
+    for col in range(column_num):
+        col_name = data_columns[col]
+        data_type = column_types_list[col]
+        column_values = dataframe[col_name].values
+
+        # BOOLEAN
+        if data_type == TS_DATATYPE_BOOLEAN:
+            for row in range(max_row_num):
+                value = column_values[row]
+                if not pd.isna(value):
+                    tablet_add_value_by_index_bool(ctablet, row, col, <bint> value)
+        # INT32
+        elif data_type == TS_DATATYPE_INT32:
+            for row in range(max_row_num):
+                value = column_values[row]
+                if not pd.isna(value):
+                    tablet_add_value_by_index_int32_t(ctablet, row, col, <int32_t> value)
+        # INT64
+        elif data_type == TS_DATATYPE_INT64 or data_type == TS_DATATYPE_TIMESTAMP:
+            for row in range(max_row_num):
+                value = column_values[row]
+                if not pd.isna(value):
+                    tablet_add_value_by_index_int64_t(ctablet, row, col, <int64_t> value)
+        # FLOAT
+        elif data_type == TS_DATATYPE_FLOAT:
+            for row in range(max_row_num):
+                value = column_values[row]
+                if not pd.isna(value):
+                    tablet_add_value_by_index_float(ctablet, row, col, <float> value)
+        # DOUBLE
+        elif data_type == TS_DATATYPE_DOUBLE:
+            for row in range(max_row_num):
+                value = column_values[row]
+                if not pd.isna(value):
+                    tablet_add_value_by_index_double(ctablet, row, col, <double> value)
+        # DATE
+        elif data_type == TS_DATATYPE_DATE:
+            for row in range(max_row_num):
+                value = column_values[row]
+                if not pd.isna(value):
+                    tablet_add_value_by_index_int32_t(ctablet, row, col, parse_date_to_int(value))
+        # STRING or TEXT
+        elif data_type == TS_DATATYPE_STRING or data_type == TS_DATATYPE_TEXT:
+            for row in range(max_row_num):
+                value = column_values[row]
+                if not pd.isna(value):
+                    py_value = str(value)
+                    str_ptr = PyUnicode_AsUTF8AndSize(py_value, &raw_len)
+                    tablet_add_value_by_index_string_with_len(ctablet, row, col, str_ptr, raw_len)
+        # BLOB
+        elif data_type == TS_DATATYPE_BLOB:
+            for row in range(max_row_num):
+                value = column_values[row]
+                if not pd.isna(value):
+                    if isinstance(value, bytes):
+                        PyBytes_AsStringAndSize(value, &raw_str, &raw_len)
+                        tablet_add_value_by_index_string_with_len(ctablet, row, col, raw_str, raw_len)
+                    else:
+                        value_bytes = bytes(value)
+                        PyBytes_AsStringAndSize(value_bytes, &raw_str, &raw_len)
+                        tablet_add_value_by_index_string_with_len(ctablet, row, col, raw_str, raw_len)
+
+    return ctablet
+
 cdef TsRecord to_c_record(object row_record):
     cdef int field_num = row_record.get_fields_num()
     cdef int64_t timestamp = <int64_t> row_record.get_timestamp()
     cdef bytes device_id_bytes = PyUnicode_AsUTF8String(row_record.get_device_id())
-    cdef const char* device_id = device_id_bytes
-    cdef const char* str_ptr
-    cdef char* blob_ptr
+    cdef const char * device_id = device_id_bytes
+    cdef const char * str_ptr
+    cdef char * blob_ptr
     cdef Py_ssize_t str_len
     cdef TsRecord record
     cdef int i
@@ -320,9 +478,11 @@ cdef TsRecord to_c_record(object row_record):
         field = row_record.get_fields()[i]
         data_type = to_c_data_type(field.get_data_type())
         if data_type == TS_DATATYPE_BOOLEAN:
-            _insert_data_into_ts_record_by_name_bool(record, PyUnicode_AsUTF8(field.get_field_name()), field.get_bool_value())
+            _insert_data_into_ts_record_by_name_bool(record, PyUnicode_AsUTF8(field.get_field_name()),
+                                                     field.get_bool_value())
         elif data_type == TS_DATATYPE_INT32 or data_type == TS_DATATYPE_DATE:
-            _insert_data_into_ts_record_by_name_int32_t(record, PyUnicode_AsUTF8(field.get_field_name()), field.get_int_value())
+            _insert_data_into_ts_record_by_name_int32_t(record, PyUnicode_AsUTF8(field.get_field_name()),
+                                                        field.get_int_value())
         elif data_type == TS_DATATYPE_INT64:
             _insert_data_into_ts_record_by_name_int64_t(record, PyUnicode_AsUTF8(field.get_field_name()),
                                                         field.get_long_value())
@@ -333,15 +493,17 @@ cdef TsRecord to_c_record(object row_record):
             _insert_data_into_ts_record_by_name_double(record, PyUnicode_AsUTF8(field.get_field_name()),
                                                        field.get_double_value())
         elif data_type == TS_DATATYPE_FLOAT:
-            _insert_data_into_ts_record_by_name_float(record, PyUnicode_AsUTF8(field.get_field_name()), field.get_float_value())
+            _insert_data_into_ts_record_by_name_float(record, PyUnicode_AsUTF8(field.get_field_name()),
+                                                      field.get_float_value())
         elif data_type == TS_DATATYPE_TEXT or data_type == TS_DATATYPE_STRING:
-            str_ptr =  PyUnicode_AsUTF8AndSize(field.get_string_value(), &str_len)
-            _insert_data_into_ts_record_by_name_string_with_len(record, PyUnicode_AsUTF8(field.get_field_name()), str_ptr, str_len)
+            str_ptr = PyUnicode_AsUTF8AndSize(field.get_string_value(), &str_len)
+            _insert_data_into_ts_record_by_name_string_with_len(record, PyUnicode_AsUTF8(field.get_field_name()),
+                                                                str_ptr, str_len)
         elif data_type == TS_DATATYPE_BLOB:
             if PyBytes_AsStringAndSize(field.get_string_value(), &blob_ptr, &str_len) < 0:
                 raise ValueError("blob not legal")
             _insert_data_into_ts_record_by_name_string_with_len(record, PyUnicode_AsUTF8(field.get_field_name()),
-                                                                <const char*> blob_ptr, <uint32_t>str_len)
+                                                                <const char *> blob_ptr, <uint32_t> str_len)
     return record
 
 # Free c structs' space
diff --git a/python/tsfile/tsfile_table_writer.py b/python/tsfile/tsfile_table_writer.py
index 281933606..56f9c3417 100644
--- a/python/tsfile/tsfile_table_writer.py
+++ b/python/tsfile/tsfile_table_writer.py
@@ -15,9 +15,21 @@
 # specific language governing permissions and limitations
 # under the License.
 #
+import pandas as pd
 
 from tsfile import TableSchema, Tablet, TableNotExistError
 from tsfile import TsFileWriter
+from tsfile.constants import TSDataType
+from tsfile.exceptions import ColumnNotExistError, TypeMismatchError
+
+def check_string_or_blob(ts_data_type: TSDataType, dtype, column_series: pd.Series) -> TSDataType:
+    if ts_data_type == TSDataType.STRING and (dtype == 'object' or str(dtype) == "<class 'numpy.object_'>"):
+        first_valid_idx = column_series.first_valid_index()
+        if first_valid_idx is not None:
+            first_value = column_series[first_valid_idx]
+            if isinstance(first_value, bytes):
+                return TSDataType.BLOB
+    return ts_data_type
 
 
 class TsFileTableWriter:
@@ -31,7 +43,7 @@ class TsFileTableWriter:
     according to that schema, and serialize this data into a TsFile.
     """
 
-    def __init__(self, path: str, table_schema: TableSchema, memory_threshold = 128 * 1024 * 1024):
+    def __init__(self, path: str, table_schema: TableSchema, memory_threshold=128 * 1024 * 1024):
         """
         :param path: The path of tsfile, will create if it doesn't exist.
         :param table_schema: describes the schema of the tables they want to write.
@@ -39,7 +51,7 @@ def __init__(self, path: str, table_schema: TableSchema, memory_threshold = 128
         """
         self.writer = TsFileWriter(path, memory_threshold)
         self.writer.register_table(table_schema)
-        self.exclusive_table_name_ = table_schema.get_table_name()
+        self.tableSchema = table_schema
 
     def write_table(self, tablet: Tablet):
         """
@@ -49,11 +61,66 @@ def write_table(self, tablet: Tablet):
         :raise: TableNotExistError if table does not exist or tablet's table_name does not match tableschema.
         """
         if tablet.get_target_name() is None:
-            tablet.set_table_name(self.exclusive_table_name_)
-        elif self.exclusive_table_name_ is not None and tablet.get_target_name() != self.exclusive_table_name_:
+            tablet.set_table_name(self.tableSchema.get_table_name())
+        elif (self.tableSchema.get_table_name() is not None
+              and tablet.get_target_name() != self.tableSchema.get_table_name()):
             raise TableNotExistError
         self.writer.write_table(tablet)
 
+    def write_dataframe(self, dataframe: pd.DataFrame):
+        """
+        Write a pandas DataFrame into table in tsfile.
+        :param dataframe: pandas DataFrame with 'time' column and data columns matching schema.
+        :return: no return value.
+        :raise: ValueError if dataframe is None or is empty.
+        :raise: ColumnNotExistError if DataFrame columns don't match schema.
+        :raise: TypeMismatchError if DataFrame column types are incompatible with schema.
+        """
+        if dataframe is None or dataframe.empty:
+            raise ValueError("DataFrame cannot be None or empty")
+
+        # Create mapping from lowercase column name to original column name
+        df_column_name_map = {col.lower(): col for col in dataframe.columns if col.lower() != 'time'}
+        df_columns = list(df_column_name_map.keys())
+
+        schema_column_names = set(self.tableSchema.get_column_names())
+        df_columns_set = set(df_columns)
+
+        extra_columns = df_columns_set - schema_column_names
+        if extra_columns:
+            raise ColumnNotExistError(
+                code=50,
+                context=f"DataFrame has columns not in schema: {', '.join(sorted(extra_columns))}"
+            )
+
+        schema_column_map = {
+            col.get_column_name(): col for col in self.tableSchema.get_columns()
+        }
+        
+        type_mismatches = []
+        for col_name in df_columns:
+            df_col_name_original = df_column_name_map[col_name]
+                
+            df_dtype = dataframe[df_col_name_original].dtype
+            df_ts_type = TSDataType.from_pandas_datatype(df_dtype)
+            df_ts_type = check_string_or_blob(df_ts_type, df_dtype, dataframe[df_col_name_original])
+
+            schema_col = schema_column_map[col_name]
+            expected_ts_type = schema_col.get_data_type()
+
+            if df_ts_type != expected_ts_type:
+                type_mismatches.append(
+                    f"Column '{col_name}': expected {expected_ts_type.name}, got {df_ts_type.name}"
+                )
+        
+        if type_mismatches:
+            raise TypeMismatchError(
+                code=27,
+                context=f"Type mismatches: {'; '.join(type_mismatches)}"
+            )
+
+        self.writer.write_dataframe(self.tableSchema.get_table_name(), dataframe)
+
     def close(self):
         """
         Close TsFileTableWriter and will flush data automatically.
diff --git a/python/tsfile/tsfile_writer.pyx b/python/tsfile/tsfile_writer.pyx
index 201991952..c558984e1 100644
--- a/python/tsfile/tsfile_writer.pyx
+++ b/python/tsfile/tsfile_writer.pyx
@@ -15,21 +15,21 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-
-#cython: language_level=3
-
-from .tsfile_cpp cimport *
-from .tsfile_py_cpp cimport *
+import pandas
 
 from tsfile.row_record import RowRecord
-from tsfile.schema import TimeseriesSchema as TimeseriesSchemaPy, DeviceSchema as DeviceSchemaPy
 from tsfile.schema import TableSchema as TableSchemaPy
+from tsfile.schema import TimeseriesSchema as TimeseriesSchemaPy, DeviceSchema as DeviceSchemaPy
 from tsfile.tablet import Tablet as TabletPy
+from .tsfile_cpp cimport *
+from .tsfile_py_cpp cimport *
+
+#cython: language_level=3
 
 cdef class TsFileWriterPy:
     cdef TsFileWriter writer
 
-    def __init__(self, pathname:str, memory_threshold:int = 128 * 1024 * 1024):
+    def __init__(self, pathname: str, memory_threshold: int = 128 * 1024 * 1024):
         self.writer = tsfile_writer_new_c(pathname, memory_threshold)
 
     def register_timeseries(self, device_name : str, timeseries_schema : TimeseriesSchemaPy):
@@ -38,7 +38,7 @@ cdef class TsFileWriterPy:
         device_name: device name of the timeseries
         timeseries_schema: measurement's name/datatype/encoding/compressor
         """
-        cdef TimeseriesSchema* c_schema = to_c_timeseries_schema(timeseries_schema)
+        cdef TimeseriesSchema * c_schema = to_c_timeseries_schema(timeseries_schema)
         cdef ErrorCode errno
         try:
             errno = tsfile_writer_register_timeseries_py_cpp(self.writer, device_name, c_schema)
@@ -51,7 +51,7 @@ cdef class TsFileWriterPy:
         Register a device with tsfile writer.
         device_schema: the device definition, including device_name, some measurements' schema.
         """
-        cdef DeviceSchema* device_schema_c = to_c_device_schema(device_schema)
+        cdef DeviceSchema * device_schema_c = to_c_device_schema(device_schema)
         cdef ErrorCode errno
         try:
             errno = tsfile_writer_register_device_py_cpp(self.writer, device_schema_c)
@@ -64,7 +64,7 @@ cdef class TsFileWriterPy:
         Register a table with tsfile writer.
         table_schema: the table definition, include table_name, columns' schema.
         """
-        cdef TableSchema* c_schema = to_c_table_schema(table_schema)
+        cdef TableSchema * c_schema = to_c_table_schema(table_schema)
         cdef ErrorCode errno
         try:
             errno = tsfile_writer_register_table_py_cpp(self.writer, c_schema)
@@ -86,6 +86,15 @@ cdef class TsFileWriterPy:
         finally:
             free_c_tablet(ctablet)
 
+    def write_dataframe(self, target_table: str, dataframe: pandas.DataFrame):
+        cdef Tablet ctablet = dataframe_to_c_tablet(target_table, dataframe)
+        cdef ErrorCode errno
+        try:
+            errno = _tsfile_writer_write_table(self.writer, ctablet)
+            check_error(errno)
+        finally:
+            free_c_tablet(ctablet)
+
     def write_row_record(self, record : RowRecord):
         """
         Write a record into tsfile with tsfile writer.
@@ -143,4 +152,3 @@ cdef class TsFileWriterPy:
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         self.close()
-
diff --git a/python/tsfile/utils.py b/python/tsfile/utils.py
index d27a0fae3..f3c2adc57 100644
--- a/python/tsfile/utils.py
+++ b/python/tsfile/utils.py
@@ -20,9 +20,12 @@
 
 import numpy as np
 import pandas as pd
+from pandas.core.dtypes.common import is_integer_dtype
 
+from tsfile import ColumnSchema, TableSchema, ColumnCategory, TSDataType
 from tsfile.exceptions import TableNotExistError, ColumnNotExistError
 from tsfile.tsfile_reader import TsFileReaderPy
+from tsfile.tsfile_table_writer import TsFileTableWriter, check_string_or_blob
 
 
 def to_dataframe(file_path: str,
@@ -159,3 +162,106 @@ def _gen(is_iterator: bool) -> Iterator[pd.DataFrame]:
             return df
         else:
             return pd.DataFrame()
+
+
+def dataframe_to_tsfile(dataframe: pd.DataFrame,
+                        file_path: str,
+                        table_name: Optional[str] = None,
+                        time_column: Optional[str] = None,
+                        tag_column: Optional[list[str]] = None,
+                        ):
+    """
+    Write a pandas DataFrame to a TsFile by inferring the table schema from the DataFrame.
+
+    This function automatically infers the table schema based on the DataFrame's column
+    names and data types, then writes the data to a TsFile.
+
+    Parameters
+    ----------
+    dataframe : pd.DataFrame
+        The pandas DataFrame to write to TsFile.
+        - If a 'time' column (case-insensitive) exists, it will be used as the time column.
+        - Otherwise, the DataFrame index will be used as timestamps.
+        - All other columns will be treated as data columns.
+
+    file_path : str
+        Path to the TsFile to write. Will be created if it doesn't exist.
+
+    table_name : Optional[str], default None
+        Name of the table. If None, defaults to "table".
+
+    time_column : Optional[str], default None
+        Name of the time column. If None, will look for a column named 'time' (case-insensitive),
+        or use the DataFrame index if no 'time' column is found.
+
+    tag_column : Optional[list[str]], default None
+        List of column names to be treated as TAG columns. All other columns will be FIELD columns.
+        If None, all columns are treated as FIELD columns.
+
+    Returns
+    -------
+    None
+
+    Raises
+    ------
+    ValueError
+        If the DataFrame is empty or has no data columns.
+    """
+    if dataframe is None or dataframe.empty:
+        raise ValueError("DataFrame cannot be None or empty")
+
+    if table_name is None:
+        table_name = "table"
+
+    time_col_name = None
+    if time_column is not None:
+        if time_column not in dataframe.columns:
+            raise ValueError(f"Time column '{time_column}' not found in DataFrame")
+        if not is_integer_dtype(dataframe[time_column].dtype):
+            raise TypeError(
+                f"Time column '{time_column}' must be integer type (int64 or int), got {dataframe[time_column].dtype}")
+        time_col_name = time_column
+    else:
+        for col in dataframe.columns:
+            if col.lower() == 'time':
+                if is_integer_dtype(dataframe[col].dtype):
+                    time_col_name = col
+                    break
+                else:
+                    raise TypeError(
+                        f"Time column '{col}' must be integer type (int64 or int), got {dataframe[col].dtype}")
+
+    data_columns = [col for col in dataframe.columns if col != time_col_name]
+
+    if len(data_columns) == 0:
+        raise ValueError("DataFrame must have at least one data column besides the time column")
+
+    tag_columns_lower = []
+    if tag_column is not None:
+        for tag_col in tag_column:
+            if tag_col not in dataframe.columns:
+                raise ValueError(f"Tag column '{tag_col}' not found in DataFrame")
+            tag_columns_lower.append(tag_col.lower())
+
+    column_schemas = []
+    for col_name in data_columns:
+        col_dtype = dataframe[col_name].dtype
+        ts_data_type = TSDataType.from_pandas_datatype(col_dtype)
+        ts_data_type = check_string_or_blob(ts_data_type, col_dtype, dataframe[col_name])
+
+        if col_name.lower() in tag_columns_lower:
+            category = ColumnCategory.TAG
+        else:
+            category = ColumnCategory.FIELD
+
+        column_schemas.append(ColumnSchema(col_name, ts_data_type, category))
+
+    table_schema = TableSchema(table_name, column_schemas)
+
+    if time_col_name is not None and time_col_name != 'time':
+        df_to_write = dataframe.rename(columns={time_col_name: 'time'})
+    else:
+        df_to_write = dataframe
+
+    with TsFileTableWriter(file_path, table_schema) as writer:
+        writer.write_dataframe(df_to_write)

From 64e465a361990226c78fcfe5a188fd8c39c93063 Mon Sep 17 00:00:00 2001
From: ColinLee <shuolin_l@163.com>
Date: Fri, 16 Jan 2026 11:26:31 +0800
Subject: [PATCH 02/13] fix sort data.

---
 python/tests/test_to_tsfile.py       | 30 +++++++++++++++++++++++++++-
 python/tsfile/tsfile_table_writer.py | 20 ++++++++++++++++++-
 python/tsfile/utils.py               |  6 ++++--
 3 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/python/tests/test_to_tsfile.py b/python/tests/test_to_tsfile.py
index 0928f1a94..7c1fb84ca 100644
--- a/python/tests/test_to_tsfile.py
+++ b/python/tests/test_to_tsfile.py
@@ -205,7 +205,7 @@ def test_dataframe_to_tsfile_default_table_name():
 
         dataframe_to_tsfile(df, tsfile_path)
 
-        df_read = to_dataframe(tsfile_path, table_name="table")
+        df_read = to_dataframe(tsfile_path, table_name="test_dataframe_to_tsfile_default_name")
         assert df_read.shape == (10, 2)
     finally:
         if os.path.exists(tsfile_path):
@@ -343,3 +343,31 @@ def test_dataframe_to_tsfile_string_vs_blob():
     finally:
         if os.path.exists(tsfile_path):
             os.remove(tsfile_path)
+
+
+def test_dataframe_to_tsfile_tag_time_unsorted():
+    tsfile_path = "test_dataframe_to_tsfile_tag_time_unsorted.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({
+            'time': [30, 10, 20, 50, 40, 15, 25, 35, 5, 45],
+            'device': ['device1', 'device1', 'device1', 'device2', 'device2', 'device1', 'device1', 'device2',
+                       'device1', 'device2'],
+            'value': [i * 1.5 for i in range(10)]
+        })
+
+        dataframe_to_tsfile(df, tsfile_path, table_name="test_table", tag_column=["device"])
+
+        df_read = to_dataframe(tsfile_path, table_name="test_table")
+        df_expected = df.sort_values(by=['device', 'time']).reset_index(drop=True)
+        df_expected = convert_to_nullable_types(df_expected)
+
+        assert df_read.shape == (10, 3)
+        assert df_read["device"].equals(df_expected["device"])
+        assert df_read["time"].equals(df_expected["time"])
+        assert df_read["value"].equals(df_expected["value"])
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
diff --git a/python/tsfile/tsfile_table_writer.py b/python/tsfile/tsfile_table_writer.py
index 56f9c3417..5b33f9b2f 100644
--- a/python/tsfile/tsfile_table_writer.py
+++ b/python/tsfile/tsfile_table_writer.py
@@ -18,7 +18,7 @@
 import pandas as pd
 
 from tsfile import TableSchema, Tablet, TableNotExistError
-from tsfile import TsFileWriter
+from tsfile import TsFileWriter, ColumnCategory
 from tsfile.constants import TSDataType
 from tsfile.exceptions import ColumnNotExistError, TypeMismatchError
 
@@ -119,6 +119,24 @@ def write_dataframe(self, dataframe: pd.DataFrame):
                 context=f"Type mismatches: {'; '.join(type_mismatches)}"
             )
 
+        tag_columns = []
+        for col in self.tableSchema.get_columns():
+            if col.get_category() == ColumnCategory.TAG:
+                tag_col_name = col.get_column_name()
+                if tag_col_name in df_column_name_map:
+                    tag_columns.append(df_column_name_map[tag_col_name])
+
+        time_column = None
+        for col in dataframe.columns:
+            if col.lower() == 'time':
+                time_column = col
+                break
+
+        if time_column:
+            sort_by = tag_columns.copy()
+            sort_by.append(time_column)
+            dataframe = dataframe.sort_values(by=sort_by)
+
         self.writer.write_dataframe(self.tableSchema.get_table_name(), dataframe)
 
     def close(self):
diff --git a/python/tsfile/utils.py b/python/tsfile/utils.py
index f3c2adc57..567c4fe19 100644
--- a/python/tsfile/utils.py
+++ b/python/tsfile/utils.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 #
+from pathlib import Path
 from typing import Iterator, Union
 from typing import Optional
 
@@ -188,7 +189,7 @@ def dataframe_to_tsfile(dataframe: pd.DataFrame,
         Path to the TsFile to write. Will be created if it doesn't exist.
 
     table_name : Optional[str], default None
-        Name of the table. If None, defaults to "table".
+        Name of the table. If None, defaults to tsfile file name.
 
     time_column : Optional[str], default None
         Name of the time column. If None, will look for a column named 'time' (case-insensitive),
@@ -211,7 +212,8 @@ def dataframe_to_tsfile(dataframe: pd.DataFrame,
         raise ValueError("DataFrame cannot be None or empty")
 
     if table_name is None:
-        table_name = "table"
+        filename = Path(file_path).stem
+        table_name = filename
 
     time_col_name = None
     if time_column is not None:

From e405cb35a2d8b99c19e9e1bc127f4712cc3aa52e Mon Sep 17 00:00:00 2001
From: ColinLee <shuolin_l@163.com>
Date: Mon, 9 Feb 2026 02:31:13 +0800
Subject: [PATCH 03/13] tmp code.

---
 cpp/src/cwrapper/tsfile_cwrapper.h   |   7 +-
 python/tests/test_dataframe.py       | 101 +++++++++++++++++---
 python/tests/test_write_and_read.py  |   2 +-
 python/tsfile/constants.py           |  22 ++++-
 python/tsfile/exceptions.py          |   2 +-
 python/tsfile/schema.py              |  44 ++++++++-
 python/tsfile/tsfile_cpp.pxd         |   5 +-
 python/tsfile/tsfile_py_cpp.pxd      |   2 +-
 python/tsfile/tsfile_py_cpp.pyx      |  63 ++++++++----
 python/tsfile/tsfile_reader.pyx      |  12 ++-
 python/tsfile/tsfile_table_writer.py | 137 ++++++++++++++++-----------
 python/tsfile/tsfile_writer.pyx      |   4 +-
 12 files changed, 300 insertions(+), 101 deletions(-)

diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h b/cpp/src/cwrapper/tsfile_cwrapper.h
index d9fe6bb85..643b4e52b 100644
--- a/cpp/src/cwrapper/tsfile_cwrapper.h
+++ b/cpp/src/cwrapper/tsfile_cwrapper.h
@@ -71,7 +71,12 @@ typedef enum {
     TS_COMPRESSION_INVALID = 255
 } CompressionType;
 
-typedef enum column_category { TAG = 0, FIELD = 1 } ColumnCategory;
+typedef enum column_category {
+    TAG = 0,
+    FIELD = 1,
+    ATTRIBUTE = 2,
+    TIME = 3
+} ColumnCategory;
 
 typedef struct column_schema {
     char* column_name;
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index 5138968a2..e3c923a3c 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -16,11 +16,11 @@
 # under the License.
 #
 import os
+from datetime import date
 
 import numpy as np
 import pandas as pd
 import pytest
-from pandas.core.dtypes.common import is_integer_dtype
 
 from tsfile import ColumnSchema, TableSchema, TSDataType
 from tsfile import TsFileTableWriter, ColumnCategory
@@ -59,7 +59,7 @@ def test_write_dataframe_basic():
     try:
         if os.path.exists(tsfile_path):
             os.remove(tsfile_path)
-        
+
         with TsFileTableWriter(tsfile_path, table) as writer:
             df = pd.DataFrame({
                 'time': [i for i in range(100)],
@@ -90,7 +90,7 @@ def test_write_dataframe_with_index():
     try:
         if os.path.exists(tsfile_path):
             os.remove(tsfile_path)
-        
+
         with TsFileTableWriter(tsfile_path, table) as writer:
             df = pd.DataFrame({
                 'device': [f"device{i}" for i in range(50)],
@@ -120,7 +120,7 @@ def test_write_dataframe_case_insensitive():
     try:
         if os.path.exists(tsfile_path):
             os.remove(tsfile_path)
-        
+
         with TsFileTableWriter(tsfile_path, table) as writer:
             df = pd.DataFrame({
                 'Time': [i for i in range(30)],  # Capital T
@@ -149,7 +149,7 @@ def test_write_dataframe_column_not_in_schema():
     try:
         if os.path.exists(tsfile_path):
             os.remove(tsfile_path)
-        
+
         with TsFileTableWriter(tsfile_path, table) as writer:
             df = pd.DataFrame({
                 'time': [i for i in range(10)],
@@ -157,9 +157,8 @@ def test_write_dataframe_column_not_in_schema():
                 'value': [i * 1.0 for i in range(10)],
                 'extra_column': [i for i in range(10)]  # Not in schema
             })
-            with pytest.raises(ColumnNotExistError) as exc_info:
+            with pytest.raises(ColumnNotExistError):
                 writer.write_dataframe(df)
-            assert "extra_column" in str(exc_info.value)
     finally:
         if os.path.exists(tsfile_path):
             os.remove(tsfile_path)
@@ -172,15 +171,14 @@ def test_write_dataframe_type_mismatch():
     try:
         if os.path.exists(tsfile_path):
             os.remove(tsfile_path)
-        
+
         with TsFileTableWriter(tsfile_path, table) as writer:
             df = pd.DataFrame({
                 'time': [i for i in range(10)],
-                'value': [i for i in range(10)]  # INT64, but schema expects STRING
+                'value': [i for i in range(10)]
             })
             with pytest.raises(TypeMismatchError) as exc_info:
                 writer.write_dataframe(df)
-            assert "Type mismatches" in str(exc_info.value)
     finally:
         if os.path.exists(tsfile_path):
             os.remove(tsfile_path)
@@ -194,12 +192,15 @@ def test_write_dataframe_all_datatypes():
                          ColumnSchema("float_col", TSDataType.FLOAT, ColumnCategory.FIELD),
                          ColumnSchema("double_col", TSDataType.DOUBLE, ColumnCategory.FIELD),
                          ColumnSchema("string_col", TSDataType.STRING, ColumnCategory.FIELD),
-                         ColumnSchema("blob_col", TSDataType.BLOB, ColumnCategory.FIELD)])
+                         ColumnSchema("blob_col", TSDataType.BLOB, ColumnCategory.FIELD),
+                         ColumnSchema("text_col", TSDataType.TEXT, ColumnCategory.FIELD),
+                         ColumnSchema("date_col", TSDataType.DATE, ColumnCategory.FIELD),
+                         ColumnSchema("timestamp_col", TSDataType.TIMESTAMP, ColumnCategory.FIELD)])
     tsfile_path = "test_write_dataframe_all_types.tsfile"
     try:
         if os.path.exists(tsfile_path):
             os.remove(tsfile_path)
-        
+
         with TsFileTableWriter(tsfile_path, table) as writer:
             df = pd.DataFrame({
                 'time': [i for i in range(50)],
@@ -209,20 +210,27 @@ def test_write_dataframe_all_datatypes():
                 'float_col': pd.Series([i * 1.5 for i in range(50)], dtype='float32'),
                 'double_col': [i * 2.5 for i in range(50)],
                 'string_col': [f"str{i}" for i in range(50)],
-                'blob_col': [f"blob{i}".encode('utf-8') for i in range(50)]
+                'blob_col': [f"blob{i}".encode('utf-8') for i in range(50)],
+                'text_col': [f"text{i}" for i in range(50)],
+                'date_col': [date(2025, i % 11 + 1, i % 20 + 1) for i in range(50)],
+                'timestamp_col': [i for i in range(50)]
             })
             writer.write_dataframe(df)
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
         df_read = df_read.sort_values('time').reset_index(drop=True)
         df_sorted = convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
-        assert df_read.shape == (50, 8)
+        assert df_read.shape == (50, 11)
         assert df_read["bool_col"].equals(df_sorted["bool_col"])
         assert df_read["int32_col"].equals(df_sorted["int32_col"])
         assert df_read["int64_col"].equals(df_sorted["int64_col"])
         assert np.allclose(df_read["float_col"], df_sorted["float_col"])
         assert np.allclose(df_read["double_col"], df_sorted["double_col"])
         assert df_read["string_col"].equals(df_sorted["string_col"])
+        assert df_read["blob_col"].equals(df_sorted["blob_col"])
+        assert df_read["text_col"].equals(df_sorted["text_col"])
+        assert df_read["date_col"].equals(df_sorted["date_col"])
+        assert df_read["timestamp_col"].equals(df_sorted["timestamp_col"])
         for i in range(50):
             assert df_read["blob_col"].iloc[i] == df_sorted["blob_col"].iloc[i]
     finally:
@@ -230,6 +238,67 @@ def test_write_dataframe_all_datatypes():
             os.remove(tsfile_path)
 
 
+def test_write_dataframe_schema_time_column():
+    table = TableSchema("test_table",
+                        [ColumnSchema("time", TSDataType.TIMESTAMP, ColumnCategory.TIME),
+                         ColumnSchema("device", TSDataType.STRING, ColumnCategory.TAG),
+                         ColumnSchema("value", TSDataType.DOUBLE, ColumnCategory.FIELD)])
+    tsfile_path = "test_write_dataframe_schema_time.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        with TsFileTableWriter(tsfile_path, table) as writer:
+            df = pd.DataFrame({
+                'time': [i * 100 for i in range(50)],
+                'device': [f"device{i}" for i in range(50)],
+                'value': [i * 1.5 for i in range(50)]
+            })
+            writer.write_dataframe(df)
+
+        df_read = to_dataframe(tsfile_path, table_name="test_table")
+        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_sorted = convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
+        assert df_read.shape == (50, 3)
+        assert df_read["time"].equals(df_sorted["time"])
+        assert df_read["device"].equals(df_sorted["device"])
+        assert df_read["value"].equals(df_sorted["value"])
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
+def test_write_dataframe_schema_time_and_dataframe_time():
+    table = TableSchema("test_table",
+                        [ColumnSchema("device", TSDataType.STRING, ColumnCategory.TAG),
+                         ColumnSchema("value", TSDataType.DOUBLE, ColumnCategory.FIELD)])
+    tsfile_path = "test_write_dataframe_schema_and_df_time.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        with TsFileTableWriter(tsfile_path, table) as writer:
+            df = pd.DataFrame({
+                'Time': [i for i in range(30)],
+                'device': [f"dev{i}" for i in range(30)],
+                'value': [float(i) for i in range(30)]
+            })
+            writer.write_dataframe(df)
+
+        df_read = to_dataframe(tsfile_path, table_name="test_table")
+        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_sorted = convert_to_nullable_types(
+            df.sort_values('Time').rename(columns=str.lower).reset_index(drop=True)
+        )
+        assert df_read.shape == (30, 3)
+        assert df_read["time"].equals(df_sorted["time"])
+        assert df_read["device"].equals(df_sorted["device"])
+        assert df_read["value"].equals(df_sorted["value"])
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
 def test_write_dataframe_empty():
     table = TableSchema("test_table",
                         [ColumnSchema("value", TSDataType.DOUBLE, ColumnCategory.FIELD)])
@@ -237,7 +306,7 @@ def test_write_dataframe_empty():
     try:
         if os.path.exists(tsfile_path):
             os.remove(tsfile_path)
-        
+
         with TsFileTableWriter(tsfile_path, table) as writer:
             df = pd.DataFrame({
                 'time': [],
@@ -249,3 +318,5 @@ def test_write_dataframe_empty():
     finally:
         if os.path.exists(tsfile_path):
             os.remove(tsfile_path)
+
+
diff --git a/python/tests/test_write_and_read.py b/python/tests/test_write_and_read.py
index 1ffc22b99..3cef99c4a 100644
--- a/python/tests/test_write_and_read.py
+++ b/python/tests/test_write_and_read.py
@@ -84,7 +84,7 @@ def test_row_record_write_and_read():
             assert result.get_value_by_index(4) == row_num * 2
             assert result.get_value_by_index(5) == f"string_value_{row_num}"
             assert result.get_value_by_index(6) == f"text_value_{row_num}"
-            assert result.get_value_by_index(7) == f"blob_data_{row_num}"
+            assert result.get_value_by_index(7) == f"blob_data_{row_num}".encode('utf-8')
             assert result.get_value_by_index(8) == date(2025, 1, row_num % 20 + 1)
             assert result.get_value_by_index(9) == row_num
 
diff --git a/python/tsfile/constants.py b/python/tsfile/constants.py
index d4f87200d..6f233e271 100644
--- a/python/tsfile/constants.py
+++ b/python/tsfile/constants.py
@@ -15,10 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 #
+from datetime import datetime
 from enum import unique, IntEnum
 import numpy as np
 
-
 @unique
 class TSDataType(IntEnum):
     BOOLEAN = 0
@@ -32,6 +32,11 @@ class TSDataType(IntEnum):
     BLOB = 10
     STRING = 11
 
+    def is_compatible_with(self, other: 'TSDataType') -> bool:
+        if self == other:
+            return True
+        return other in _TSDATATYPE_COMPATIBLE_SOURCES.get(self, ())
+
     def to_py_type(self):
         if self == TSDataType.BOOLEAN:
             return bool
@@ -73,7 +78,7 @@ def to_pandas_dtype(self):
         elif self == TSDataType.DATE:
             return "object"
         elif self == TSDataType.BLOB:
-            return "bytes"
+            return "object"
         else:
             raise ValueError(f"Unknown data type: {self}")
 
@@ -145,10 +150,19 @@ def from_pandas_datatype(cls, dtype):
         
         if dtype_str.startswith('datetime64'):
             return cls.TIMESTAMP
-        
+
         return cls.STRING
 
 
+_TSDATATYPE_COMPATIBLE_SOURCES = {
+    TSDataType.INT64: (TSDataType.INT32, TSDataType.TIMESTAMP),
+    TSDataType.STRING: (TSDataType.TEXT,),
+    TSDataType.TEXT: (TSDataType.STRING,),
+    TSDataType.DOUBLE: (TSDataType.FLOAT,),
+    TSDataType.TIMESTAMP: (TSDataType.INT64, TSDataType.INT32)
+}
+
+
 
 
 @unique
@@ -186,3 +200,5 @@ class Compressor(IntEnum):
 class ColumnCategory(IntEnum):
     TAG = 0
     FIELD = 1
+    ATTRIBUTE = 2
+    TIME = 3
diff --git a/python/tsfile/exceptions.py b/python/tsfile/exceptions.py
index 2a3df283a..a02f392ce 100644
--- a/python/tsfile/exceptions.py
+++ b/python/tsfile/exceptions.py
@@ -23,7 +23,7 @@ class LibraryError(Exception):
     def __init__(self, code=None, context=None):
         self.code = code if code is not None else self._default_code
         self.message = context if context is not None else self._default_message
-        super().__init__(f"[{code}] {self.message}")
+        super().__init__(f"[{self.code}] {self.message}")
 
     def __str__(self):
         return f"{self.code}: {self.message}"
diff --git a/python/tsfile/schema.py b/python/tsfile/schema.py
index 3aa1313cd..379307da5 100644
--- a/python/tsfile/schema.py
+++ b/python/tsfile/schema.py
@@ -53,7 +53,6 @@ def __repr__(self):
         return f"TimeseriesSchema({self.timeseries_name}, {self.data_type.name}, {self.encoding_type.name}, {self.compression_type.name})"
 
 
-
 class DeviceSchema:
     """Represents a device entity containing multiple time series."""
 
@@ -73,6 +72,7 @@ def get_timeseries_list(self):
     def __repr__(self):
         return f"DeviceSchema({self.device_name}, {self.timeseries_list})"
 
+
 class ColumnSchema:
     """Defines schema for a table column (name, datatype, category)."""
 
@@ -85,6 +85,9 @@ def __init__(self, column_name: str, data_type: TSDataType, category: ColumnCate
         self.column_name = column_name.lower()
         if data_type is None:
             raise ValueError("Data type cannot be None")
+        if category == ColumnCategory.TIME and data_type not in [TSDataType.INT64, TSDataType.TIMESTAMP]:
+            raise TypeError(f"Time Column should have type : INT64/Timestamp,"
+                            f" but got {data_type}")
         self.data_type = data_type
         self.category = category
 
@@ -105,6 +108,7 @@ class TableSchema:
     """Schema definition for a table structure."""
     table_name = None
     columns = None
+    time_column = None
 
     def __init__(self, table_name: str, columns: List[ColumnSchema]):
         if table_name is None or len(table_name) == 0:
@@ -113,6 +117,14 @@ def __init__(self, table_name: str, columns: List[ColumnSchema]):
         if len(columns) == 0:
             raise ValueError("Columns cannot be empty")
         self.columns = columns
+        for column in self.columns:
+            if column.get_category() == ColumnCategory.TIME:
+                if self.time_column is not None:
+                    raise ValueError(
+                        f"Table '{self.table_name}' cannot have multiple time columns: "
+                        f"'{self.time_column.name}' and '{column.name}'"
+                    )
+                self.time_column = column
 
     def get_table_name(self):
         return self.table_name
@@ -120,9 +132,39 @@ def get_table_name(self):
     def get_columns(self):
         return self.columns
 
+    def get_column(self, column_name: str):
+        name_lower = column_name.lower()
+        for col in self.columns:
+            if col.get_column_name() == name_lower:
+                return col
+        return None
+
+    def get_time_column(self):
+        return self.time_column
+
     def get_column_names(self):
         return [name.get_column_name() for name in self.columns]
 
+    def get_field_columns(self):
+        return [
+            column
+            for column in self.columns
+            if column.get_category() == ColumnCategory.FIELD
+        ]
+
+    def get_tag_columns(self):
+        return [
+            column
+            for column in self.columns
+            if column.get_category() == ColumnCategory.TAG
+        ]
+
+
+    def add_column(self, column: ColumnSchema):
+        if column.get_category() == ColumnCategory.TIME:
+            self.time_column = column
+            self.columns.append(column)
+
     def __repr__(self) -> str:
         return f"TableSchema({self.table_name}, {self.columns})"
 
diff --git a/python/tsfile/tsfile_cpp.pxd b/python/tsfile/tsfile_cpp.pxd
index ab915fefe..9c65fb26f 100644
--- a/python/tsfile/tsfile_cpp.pxd
+++ b/python/tsfile/tsfile_cpp.pxd
@@ -76,7 +76,10 @@ cdef extern from "./tsfile_cwrapper.h":
 
     ctypedef enum ColumnCategory:
         TAG = 0,
-        FIELD = 1
+        FIELD = 1,
+        ATTRIBUTE = 2,
+        TIME = 3
+
 
     # struct types
     ctypedef struct ColumnSchema:
diff --git a/python/tsfile/tsfile_py_cpp.pxd b/python/tsfile/tsfile_py_cpp.pxd
index 9ce2f90da..2389aa9a6 100644
--- a/python/tsfile/tsfile_py_cpp.pxd
+++ b/python/tsfile/tsfile_py_cpp.pxd
@@ -33,7 +33,7 @@ cdef public api DeviceSchema* to_c_device_schema(object py_schema)
 cdef public api ColumnSchema* to_c_column_schema(object py_schema)
 cdef public api TableSchema* to_c_table_schema(object py_schema)
 cdef public api Tablet to_c_tablet(object tablet)
-cdef public api Tablet dataframe_to_c_tablet(object target_name, object dataframe)
+cdef public api Tablet dataframe_to_c_tablet(object target_name, object dataframe, object table_schema)
 cdef public api TsRecord to_c_record(object row_record)
 cdef public api void free_c_table_schema(TableSchema* c_schema)
 cdef public api void free_c_column_schema(ColumnSchema* c_schema)
diff --git a/python/tsfile/tsfile_py_cpp.pyx b/python/tsfile/tsfile_py_cpp.pyx
index 851346853..b8bd73d0b 100644
--- a/python/tsfile/tsfile_py_cpp.pyx
+++ b/python/tsfile/tsfile_py_cpp.pyx
@@ -16,6 +16,7 @@
 # under the License.
 #
 #cython: language_level=3
+from datetime import date as date_type
 from .date_utils import parse_date_to_int
 from .tsfile_cpp cimport *
 
@@ -29,7 +30,7 @@ from cpython.exc cimport PyErr_SetObject
 from cpython.unicode cimport PyUnicode_AsUTF8String, PyUnicode_AsUTF8, PyUnicode_AsUTF8AndSize
 from cpython.bytes cimport PyBytes_AsString, PyBytes_AsStringAndSize
 
-from tsfile.exceptions import ERROR_MAPPING
+from tsfile.exceptions import ERROR_MAPPING, TypeMismatchError
 from tsfile.schema import ResultSetMetaData as ResultSetMetaDataPy
 from tsfile.schema import TSDataType as TSDataTypePy, TSEncoding as TSEncodingPy
 from tsfile.schema import Compressor as CompressorPy, ColumnCategory as CategoryPy
@@ -133,7 +134,9 @@ cdef dict COMPRESSION_TYPE_MAP = {
 
 cdef dict CATEGORY_MAP = {
     CategoryPy.TAG: ColumnCategory.TAG,
-    CategoryPy.FIELD: ColumnCategory.FIELD
+    CategoryPy.FIELD: ColumnCategory.FIELD,
+    CategoryPy.ATTRIBUTE: ColumnCategory.ATTRIBUTE,
+    CategoryPy.TIME: ColumnCategory.TIME
 }
 
 cdef TSDataType to_c_data_type(object data_type):
@@ -321,7 +324,7 @@ cdef TSDataType check_string_or_blob(TSDataType ts_data_type, object dtype, obje
                     return TS_DATATYPE_BLOB
     return ts_data_type
 
-cdef Tablet dataframe_to_c_tablet(object target_name, object dataframe):
+cdef Tablet dataframe_to_c_tablet(object target_name, object dataframe, object table_schema):
     cdef Tablet ctablet
     cdef int max_row_num
     cdef TSDataType data_type
@@ -342,17 +345,12 @@ cdef Tablet dataframe_to_c_tablet(object target_name, object dataframe):
     device_id_c = device_id_bytes
     df_columns = list(dataframe.columns)
     use_id_as_time = False
-    time_column_name = None
 
-    for col in df_columns:
-        if col.lower() == 'time':
-            time_column_name = col
-            break
+    time_column = table_schema.get_time_column()
+    use_id_as_time = time_column is None
+    time_column_name = None if time_column is None else time_column.get_column_name()
 
-    if time_column_name is None:
-        use_id_as_time = True
-
-    data_columns = [col for col in df_columns if col.lower() != 'time']
+    data_columns = [col for col in df_columns if col != time_column_name]
     column_num = len(data_columns)
 
     if column_num == 0:
@@ -361,11 +359,9 @@ cdef Tablet dataframe_to_c_tablet(object target_name, object dataframe):
     max_row_num = len(dataframe)
 
     column_types_list = []
-    for col_name in data_columns:
-        pandas_dtype = dataframe[col_name].dtype
-        ds_type = pandas_dtype_to_ts_data_type(pandas_dtype)
-        ds_type = check_string_or_blob(ds_type, pandas_dtype, dataframe[col_name])
-        column_types_list.append(ds_type)
+    for column in data_columns:
+        data_type = table_schema.get_column(column).get_data_type()
+        column_types_list.append(data_type)
 
     columns_names = <char**> malloc(sizeof(char *) * column_num)
     columns_types = <TSDataType *> malloc(sizeof(TSDataType) * column_num)
@@ -390,7 +386,7 @@ cdef Tablet dataframe_to_c_tablet(object target_name, object dataframe):
             timestamp = <int64_t> timestamp_py
             tablet_add_timestamp(ctablet, row, timestamp)
     else:
-        time_values = dataframe[time_column_name].values
+        time_values = dataframe[time_column.get_column_name()].values
         for row in range(max_row_num):
             timestamp_py = time_values[row]
             if pd.isna(timestamp_py):
@@ -403,6 +399,31 @@ cdef Tablet dataframe_to_c_tablet(object target_name, object dataframe):
         data_type = column_types_list[col]
         column_values = dataframe[col_name].values
 
+        # Per-column validation for object types (check first non-null value only)
+        if data_type in (TS_DATATYPE_DATE, TS_DATATYPE_STRING, TS_DATATYPE_TEXT, TS_DATATYPE_BLOB):
+            col_series = dataframe[col_name]
+            first_valid_idx = col_series.first_valid_index()
+            if first_valid_idx is not None:
+                value = col_series[first_valid_idx]
+                if data_type == TS_DATATYPE_DATE:
+                    if not isinstance(value, date_type):
+                        raise TypeMismatchError(context=
+                            f"Column '{col_name}': expected DATE (datetime.date), "
+                            f"got {type(value).__name__}: {value!r}"
+                        )
+                elif data_type in (TS_DATATYPE_STRING, TS_DATATYPE_TEXT):
+                    if not isinstance(value, str):
+                        raise TypeMismatchError(context=
+                            f"Column '{col_name}': expected STRING/TEXT, "
+                            f"got {type(value).__name__}: {value!r}"
+                        )
+                elif data_type == TS_DATATYPE_BLOB:
+                    if not isinstance(value, bytes):
+                        raise TypeMismatchError(context=
+                            f"Column '{col_name}': expected BLOB (bytes or bytearray), "
+                            f"got {type(value).__name__}: {value!r}"
+                        )
+
         # BOOLEAN
         if data_type == TS_DATATYPE_BOOLEAN:
             for row in range(max_row_num):
@@ -433,13 +454,13 @@ cdef Tablet dataframe_to_c_tablet(object target_name, object dataframe):
                 value = column_values[row]
                 if not pd.isna(value):
                     tablet_add_value_by_index_double(ctablet, row, col, <double> value)
-        # DATE
+        # DATE (validated per-column above)
         elif data_type == TS_DATATYPE_DATE:
             for row in range(max_row_num):
                 value = column_values[row]
                 if not pd.isna(value):
                     tablet_add_value_by_index_int32_t(ctablet, row, col, parse_date_to_int(value))
-        # STRING or TEXT
+        # STRING or TEXT (validated per-column above)
         elif data_type == TS_DATATYPE_STRING or data_type == TS_DATATYPE_TEXT:
             for row in range(max_row_num):
                 value = column_values[row]
@@ -447,7 +468,7 @@ cdef Tablet dataframe_to_c_tablet(object target_name, object dataframe):
                     py_value = str(value)
                     str_ptr = PyUnicode_AsUTF8AndSize(py_value, &raw_len)
                     tablet_add_value_by_index_string_with_len(ctablet, row, col, str_ptr, raw_len)
-        # BLOB
+        # BLOB (validated per-column above)
         elif data_type == TS_DATATYPE_BLOB:
             for row in range(max_row_num):
                 value = column_values[row]
diff --git a/python/tsfile/tsfile_reader.pyx b/python/tsfile/tsfile_reader.pyx
index 359492d6f..041764f91 100644
--- a/python/tsfile/tsfile_reader.pyx
+++ b/python/tsfile/tsfile_reader.pyx
@@ -24,6 +24,8 @@ from typing import List
 
 import pandas as pd
 from libc.stdint cimport INT64_MIN, INT64_MAX
+from libc.string cimport strlen
+from cpython.bytes cimport PyBytes_FromStringAndSize
 
 from tsfile.schema import TSDataType as TSDataTypePy
 from .date_utils import parse_int_to_date
@@ -166,7 +168,7 @@ cdef class ResultSetPy:
             return tsfile_result_set_get_value_by_index_double(self.result, index)
         elif data_type == TSDataTypePy.BOOLEAN:
             return tsfile_result_set_get_value_by_index_bool(self.result, index)
-        elif data_type == TSDataTypePy.STRING or data_type == TSDataTypePy.TEXT or data_type == TSDataTypePy.BLOB:
+        elif data_type == TSDataTypePy.STRING or data_type == TSDataTypePy.TEXT:
             try:
                 string = tsfile_result_set_get_value_by_index_string(self.result, index)
                 if string == NULL:
@@ -174,6 +176,14 @@ cdef class ResultSetPy:
                 return string.decode('utf-8')
             finally:
                 pass
+        elif data_type == TSDataTypePy.BLOB:
+            try:
+                string = tsfile_result_set_get_value_by_index_string(self.result, index)
+                if string == NULL:
+                    return None
+                return PyBytes_FromStringAndSize(string, strlen(string))
+            finally:
+                pass
 
     def get_value_by_name(self, column_name : str):
         """
diff --git a/python/tsfile/tsfile_table_writer.py b/python/tsfile/tsfile_table_writer.py
index 5b33f9b2f..c11b78594 100644
--- a/python/tsfile/tsfile_table_writer.py
+++ b/python/tsfile/tsfile_table_writer.py
@@ -17,10 +17,44 @@
 #
 import pandas as pd
 
-from tsfile import TableSchema, Tablet, TableNotExistError
-from tsfile import TsFileWriter, ColumnCategory
+from tsfile import TableSchema, Tablet, TableNotExistError, ColumnCategory
+from tsfile import TsFileWriter, ColumnSchema
 from tsfile.constants import TSDataType
-from tsfile.exceptions import ColumnNotExistError, TypeMismatchError
+from tsfile.exceptions import TypeMismatchError, ColumnNotExistError
+
+
+def validate_dataframe_for_tsfile(df: pd.DataFrame) -> None:
+    if df is None or df.empty:
+        raise ValueError("DataFrame cannot be None or empty")
+
+    columns = list(df.columns)
+
+    seen = set()
+    duplicates = []
+    for c in columns:
+        lower = c.lower()
+        if lower in seen:
+            duplicates.append(c)
+        seen.add(lower)
+    if duplicates:
+        raise ValueError(
+            f"Column names must be unique (case-insensitive). Duplicate columns: {duplicates}"
+        )
+
+    unsupported = []
+    for col in columns:
+        dtype = df[col].dtype
+        try:
+            TSDataType.from_pandas_datatype(dtype)
+        except (ValueError, TypeError) as e:
+            unsupported.append((col, str(dtype), str(e)))
+
+    if unsupported:
+        msg_parts = [f"  - {col}: dtype={dtype}" for col, dtype in unsupported]
+        raise ValueError(
+            "Data types not supported by tsfile:\n" + "\n".join(msg_parts)
+        )
+
 
 def check_string_or_blob(ts_data_type: TSDataType, dtype, column_series: pd.Series) -> TSDataType:
     if ts_data_type == TSDataType.STRING and (dtype == 'object' or str(dtype) == "<class 'numpy.object_'>"):
@@ -76,68 +110,65 @@ def write_dataframe(self, dataframe: pd.DataFrame):
         :raise: ColumnNotExistError if DataFrame columns don't match schema.
         :raise: TypeMismatchError if DataFrame column types are incompatible with schema.
         """
-        if dataframe is None or dataframe.empty:
-            raise ValueError("DataFrame cannot be None or empty")
-
-        # Create mapping from lowercase column name to original column name
-        df_column_name_map = {col.lower(): col for col in dataframe.columns if col.lower() != 'time'}
-        df_columns = list(df_column_name_map.keys())
 
-        schema_column_names = set(self.tableSchema.get_column_names())
-        df_columns_set = set(df_columns)
+        validate_dataframe_for_tsfile(dataframe)
+
+        # rename columns to lowercase
+        dataframe = dataframe.rename(columns=str.lower)
+        time_column = self.tableSchema.get_time_column()
+        # tag columns used for sorting
+        tag_columns = self.tableSchema.get_tag_columns()
+        if time_column is None:
+            if 'time' in dataframe.columns:
+                dtype = TSDataType.from_pandas_datatype(dataframe['time'].dtype)
+                if not TSDataType.TIMESTAMP.is_compatible_with(dtype):
+                    raise TypeMismatchError(
+                        code=27,
+                        context=f"time column require INT/Timestamp"
+                    )
+
+                self.tableSchema.add_column(ColumnSchema("time",
+                                                         TSDataType.TIMESTAMP,
+                                                         ColumnCategory.TIME))
+                time_column = self.tableSchema.get_time_column()
 
-        extra_columns = df_columns_set - schema_column_names
-        if extra_columns:
-            raise ColumnNotExistError(
-                code=50,
-                context=f"DataFrame has columns not in schema: {', '.join(sorted(extra_columns))}"
-            )
-
-        schema_column_map = {
-            col.get_column_name(): col for col in self.tableSchema.get_columns()
-        }
-        
         type_mismatches = []
-        for col_name in df_columns:
-            df_col_name_original = df_column_name_map[col_name]
-                
-            df_dtype = dataframe[df_col_name_original].dtype
-            df_ts_type = TSDataType.from_pandas_datatype(df_dtype)
-            df_ts_type = check_string_or_blob(df_ts_type, df_dtype, dataframe[df_col_name_original])
-
-            schema_col = schema_column_map[col_name]
-            expected_ts_type = schema_col.get_data_type()
-
-            if df_ts_type != expected_ts_type:
-                type_mismatches.append(
-                    f"Column '{col_name}': expected {expected_ts_type.name}, got {df_ts_type.name}"
-                )
-        
+        for col_name in dataframe.columns:
+            if time_column is not None and col_name == time_column.get_column_name():
+                continue
+            schema_col = self.tableSchema.get_column(col_name)
+            if schema_col is None:
+                raise ColumnNotExistError(context=f"{col_name} is not define in table schema")
+            # Object dtype can represent STRING, DATE, TEXT, BLOB; validation will be performed during insert, skip here
+            if schema_col.get_data_type() in [TSDataType.INT64, TSDataType.INT32, TSDataType.DOUBLE, TSDataType.FLOAT,
+                                              TSDataType.BOOLEAN, TSDataType.TIMESTAMP]:
+                df_dtype = dataframe[col_name].dtype
+                df_ts_type = TSDataType.from_pandas_datatype(df_dtype)
+                expected_ts_type = schema_col.get_data_type()
+
+                if not expected_ts_type.is_compatible_with(df_ts_type):
+                    type_mismatches.append(
+                        f"Column '{col_name}': expected {expected_ts_type.name}, got {df_ts_type.name}"
+                    )
+
         if type_mismatches:
             raise TypeMismatchError(
                 code=27,
                 context=f"Type mismatches: {'; '.join(type_mismatches)}"
             )
 
-        tag_columns = []
-        for col in self.tableSchema.get_columns():
-            if col.get_category() == ColumnCategory.TAG:
-                tag_col_name = col.get_column_name()
-                if tag_col_name in df_column_name_map:
-                    tag_columns.append(df_column_name_map[tag_col_name])
-
-        time_column = None
-        for col in dataframe.columns:
-            if col.lower() == 'time':
-                time_column = col
-                break
-
         if time_column:
-            sort_by = tag_columns.copy()
-            sort_by.append(time_column)
+            time_column_name = time_column.get_column_name()
+            time_series = dataframe[time_column_name]
+            if time_series.isna().any():
+                raise ValueError(
+                    f"Time column '{time_column}' must not contain null/NaN values"
+                )
+            sort_by = [column.get_column_name() for column in tag_columns]
+            sort_by.append(time_column_name)
             dataframe = dataframe.sort_values(by=sort_by)
 
-        self.writer.write_dataframe(self.tableSchema.get_table_name(), dataframe)
+        self.writer.write_dataframe(self.tableSchema.get_table_name(), dataframe, self.tableSchema)
 
     def close(self):
         """
diff --git a/python/tsfile/tsfile_writer.pyx b/python/tsfile/tsfile_writer.pyx
index c558984e1..4826ef72d 100644
--- a/python/tsfile/tsfile_writer.pyx
+++ b/python/tsfile/tsfile_writer.pyx
@@ -86,8 +86,8 @@ cdef class TsFileWriterPy:
         finally:
             free_c_tablet(ctablet)
 
-    def write_dataframe(self, target_table: str, dataframe: pandas.DataFrame):
-        cdef Tablet ctablet = dataframe_to_c_tablet(target_table, dataframe)
+    def write_dataframe(self, target_table: str, dataframe: pandas.DataFrame, tableschema: TableSchemaPy):
+        cdef Tablet ctablet = dataframe_to_c_tablet(target_table, dataframe, tableschema)
         cdef ErrorCode errno
         try:
             errno = _tsfile_writer_write_table(self.writer, ctablet)

From 18971c85b460f888bda793be649c4e9479328ddd Mon Sep 17 00:00:00 2001
From: ColinLee <shuolin_l@163.com>
Date: Mon, 9 Feb 2026 03:03:10 +0800
Subject: [PATCH 04/13] tmp code.

---
 cpp/src/cwrapper/tsfile_cwrapper.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc
index 7c22ccd5c..5bba87b9a 100644
--- a/cpp/src/cwrapper/tsfile_cwrapper.cc
+++ b/cpp/src/cwrapper/tsfile_cwrapper.cc
@@ -116,6 +116,10 @@ TsFileWriter tsfile_writer_new(WriteFile file, TableSchema* schema,
             *err_code = common::E_INVALID_SCHEMA;
             return nullptr;
         }
+        // Ignore time column definition.
+        if (cur_schema.column_category == TIME) {
+            continue;
+        }
 
         column_schemas.emplace_back(
             cur_schema.column_name,

From 1b3c27552605016f989c4292381221059131769a Mon Sep 17 00:00:00 2001
From: ColinLee <shuolin_l@163.com>
Date: Mon, 9 Feb 2026 04:01:16 +0800
Subject: [PATCH 05/13] tmp code.

---
 cpp/src/cwrapper/tsfile_cwrapper.cc  |   3 +
 python/tests/test_dataframe.py       |   2 -
 python/tests/test_to_tsfile.py       | 213 ++++++++++++++-------------
 python/tsfile/tsfile_py_cpp.pyx      |  11 --
 python/tsfile/tsfile_table_writer.py |  25 ++--
 python/tsfile/utils.py               |  86 +++++------
 6 files changed, 166 insertions(+), 174 deletions(-)

diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc
index 5bba87b9a..f384698ba 100644
--- a/cpp/src/cwrapper/tsfile_cwrapper.cc
+++ b/cpp/src/cwrapper/tsfile_cwrapper.cc
@@ -691,6 +691,9 @@ ERRNO _tsfile_writer_register_table(TsFileWriter writer, TableSchema* schema) {
     measurement_schemas.resize(schema->column_num);
     for (int i = 0; i < schema->column_num; i++) {
         ColumnSchema* cur_schema = schema->column_schemas + i;
+        if (cur_schema->column_category == TIME) {
+            continue;
+        }
         measurement_schemas[i] = new storage::MeasurementSchema(
             cur_schema->column_name,
             static_cast<common::TSDataType>(cur_schema->data_type));
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index e3c923a3c..09d0001ba 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -318,5 +318,3 @@ def test_write_dataframe_empty():
     finally:
         if os.path.exists(tsfile_path):
             os.remove(tsfile_path)
-
-
diff --git a/python/tests/test_to_tsfile.py b/python/tests/test_to_tsfile.py
index 7c1fb84ca..c3a970e3c 100644
--- a/python/tests/test_to_tsfile.py
+++ b/python/tests/test_to_tsfile.py
@@ -16,12 +16,13 @@
 # under the License.
 #
 import os
+from datetime import date
 
 import numpy as np
 import pandas as pd
 import pytest
 
-from tsfile import to_dataframe
+from tsfile import to_dataframe, TsFileReader, ColumnCategory
 from tsfile.utils import dataframe_to_tsfile
 
 
@@ -71,6 +72,22 @@ def test_dataframe_to_tsfile_basic():
             os.remove(tsfile_path)
 
 
+def test_dataframe_to_tsfile_default_table_name():
+    tsfile_path = "test_dataframe_to_tsfile_default.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({'time': [0, 1], 'value': [1.0, 2.0]})
+        dataframe_to_tsfile(df, tsfile_path)
+
+        df_read = to_dataframe(tsfile_path, table_name="default_table")
+        assert len(df_read) == 2
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
 def test_dataframe_to_tsfile_with_index():
     tsfile_path = "test_dataframe_to_tsfile_index.tsfile"
     try:
@@ -78,23 +95,23 @@ def test_dataframe_to_tsfile_with_index():
             os.remove(tsfile_path)
 
         df = pd.DataFrame({
-            'device': [f"device{i}" for i in range(50)],
-            'value': [i * 2.5 for i in range(50)]
+            'device': [f"device{i}" for i in range(30)],
+            'value': [i * 2.0 for i in range(30)]
         })
-        df.index = [i * 10 for i in range(50)]
-
+        df.index = [i * 100 for i in range(30)]
         dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
         df_read = df_read.sort_values('time').reset_index(drop=True)
-        df_sorted = df.sort_index()
-        df_sorted = convert_to_nullable_types(df_sorted.reset_index(drop=True))
-        time_series = pd.Series(df.sort_index().index.values, dtype='Int64')
+        time_expected = pd.Series(df.index.values, dtype='Int64')
+        assert df_read.shape == (30, 3)
+        assert df_read["time"].equals(time_expected)
 
-        assert df_read.shape == (50, 3)
-        assert df_read["time"].equals(time_series)
-        assert df_read["device"].equals(df_sorted["device"])
-        assert df_read["value"].equals(df_sorted["value"])
+        with TsFileReader(tsfile_path) as reader:
+            table_schema = reader.get_table_schema("test_table")
+            device_col = table_schema.get_column("device")
+            assert device_col is not None
+            assert device_col.get_category() == ColumnCategory.FIELD
     finally:
         if os.path.exists(tsfile_path):
             os.remove(tsfile_path)
@@ -127,6 +144,27 @@ def test_dataframe_to_tsfile_custom_time_column():
             os.remove(tsfile_path)
 
 
+def test_dataframe_to_tsfile_case_insensitive_time():
+    tsfile_path = "test_dataframe_to_tsfile_case_time.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({
+            'Time': [i for i in range(20)],
+            'value': [i * 2.0 for i in range(20)]
+        })
+
+        dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
+
+        df_read = to_dataframe(tsfile_path, table_name="test_table")
+        assert df_read.shape == (20, 2)
+        assert df_read["time"].equals(pd.Series([i for i in range(20)], dtype='Int64'))
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
 def test_dataframe_to_tsfile_with_tag_columns():
     tsfile_path = "test_dataframe_to_tsfile_tags.tsfile"
     try:
@@ -155,6 +193,34 @@ def test_dataframe_to_tsfile_with_tag_columns():
             os.remove(tsfile_path)
 
 
+def test_dataframe_to_tsfile_tag_time_unsorted():
+    tsfile_path = "test_dataframe_to_tsfile_tag_time_unsorted.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({
+            'time': [30, 10, 20, 50, 40, 15, 25, 35, 5, 45],
+            'device': ['device1', 'device1', 'device1', 'device2', 'device2', 'device1', 'device1', 'device2',
+                       'device1', 'device2'],
+            'value': [i * 1.5 for i in range(10)]
+        })
+
+        dataframe_to_tsfile(df, tsfile_path, table_name="test_table", tag_column=["device"])
+
+        df_read = to_dataframe(tsfile_path, table_name="test_table")
+        df_expected = df.sort_values(by=['device', 'time']).reset_index(drop=True)
+        df_expected = convert_to_nullable_types(df_expected)
+
+        assert df_read.shape == (10, 3)
+        assert df_read["device"].equals(df_expected["device"])
+        assert df_read["time"].equals(df_expected["time"])
+        assert df_read["value"].equals(df_expected["value"])
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
 def test_dataframe_to_tsfile_all_datatypes():
     tsfile_path = "test_dataframe_to_tsfile_all_types.tsfile"
     try:
@@ -169,7 +235,10 @@ def test_dataframe_to_tsfile_all_datatypes():
             'float_col': pd.Series([i * 1.5 for i in range(50)], dtype='float32'),
             'double_col': [i * 2.5 for i in range(50)],
             'string_col': [f"str{i}" for i in range(50)],
-            'blob_col': [f"blob{i}".encode('utf-8') for i in range(50)]
+            'blob_col': [f"blob{i}".encode('utf-8') for i in range(50)],
+            'text_col': [f"text{i}" for i in range(50)],
+            'date_col': [date(2025, i % 11 + 1, i % 20 + 1) for i in range(50)],
+            'timestamp_col': [i for i in range(50)]
         })
 
         dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
@@ -178,13 +247,16 @@ def test_dataframe_to_tsfile_all_datatypes():
         df_read = df_read.sort_values('time').reset_index(drop=True)
         df_sorted = convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
 
-        assert df_read.shape == (50, 8)
+        assert df_read.shape == (50, 11)
         assert df_read["bool_col"].equals(df_sorted["bool_col"])
         assert df_read["int32_col"].equals(df_sorted["int32_col"])
         assert df_read["int64_col"].equals(df_sorted["int64_col"])
         assert np.allclose(df_read["float_col"], df_sorted["float_col"])
         assert np.allclose(df_read["double_col"], df_sorted["double_col"])
         assert df_read["string_col"].equals(df_sorted["string_col"])
+        assert df_read["text_col"].equals(df_sorted["text_col"])
+        assert df_read["date_col"].equals(df_sorted["date_col"])
+        assert df_read["timestamp_col"].equals(df_sorted["timestamp_col"])
         for i in range(50):
             assert df_read["blob_col"].iloc[i] == df_sorted["blob_col"].iloc[i]
     finally:
@@ -192,47 +264,6 @@ def test_dataframe_to_tsfile_all_datatypes():
             os.remove(tsfile_path)
 
 
-def test_dataframe_to_tsfile_default_table_name():
-    tsfile_path = "test_dataframe_to_tsfile_default_name.tsfile"
-    try:
-        if os.path.exists(tsfile_path):
-            os.remove(tsfile_path)
-
-        df = pd.DataFrame({
-            'time': [i for i in range(10)],
-            'value': [i * 1.0 for i in range(10)]
-        })
-
-        dataframe_to_tsfile(df, tsfile_path)
-
-        df_read = to_dataframe(tsfile_path, table_name="test_dataframe_to_tsfile_default_name")
-        assert df_read.shape == (10, 2)
-    finally:
-        if os.path.exists(tsfile_path):
-            os.remove(tsfile_path)
-
-
-def test_dataframe_to_tsfile_case_insensitive_time():
-    tsfile_path = "test_dataframe_to_tsfile_case_time.tsfile"
-    try:
-        if os.path.exists(tsfile_path):
-            os.remove(tsfile_path)
-
-        df = pd.DataFrame({
-            'Time': [i for i in range(20)],
-            'value': [i * 2.0 for i in range(20)]
-        })
-
-        dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
-
-        df_read = to_dataframe(tsfile_path, table_name="test_table")
-        assert df_read.shape == (20, 2)
-        assert df_read["time"].equals(pd.Series([i for i in range(20)], dtype='Int64'))
-    finally:
-        if os.path.exists(tsfile_path):
-            os.remove(tsfile_path)
-
-
 def test_dataframe_to_tsfile_empty_dataframe():
     tsfile_path = "test_dataframe_to_tsfile_empty.tsfile"
     try:
@@ -265,6 +296,20 @@ def test_dataframe_to_tsfile_no_data_columns():
             os.remove(tsfile_path)
 
 
+def test_dataframe_to_tsfile_time_column_not_found():
+    tsfile_path = "test_dataframe_to_tsfile_time_err.tsfile"
+    try:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+        df = pd.DataFrame({'time': [0, 1], 'value': [1.0, 2.0]})
+        with pytest.raises(ValueError, match="Time column 'timestamp' not found"):
+            dataframe_to_tsfile(df, tsfile_path, time_column="timestamp")
+    finally:
+        if os.path.exists(tsfile_path):
+            os.remove(tsfile_path)
+
+
 def test_dataframe_to_tsfile_invalid_time_column():
     tsfile_path = "test_dataframe_to_tsfile_invalid_time.tsfile"
     try:
@@ -301,17 +346,13 @@ def test_dataframe_to_tsfile_non_integer_time_column():
             os.remove(tsfile_path)
 
 
-def test_dataframe_to_tsfile_invalid_tag_column():
-    tsfile_path = "test_dataframe_to_tsfile_invalid_tag.tsfile"
+def test_dataframe_to_tsfile_tag_column_not_found():
+    tsfile_path = "test_dataframe_to_tsfile_tag_err.tsfile"
     try:
         if os.path.exists(tsfile_path):
             os.remove(tsfile_path)
 
-        df = pd.DataFrame({
-            'time': [i for i in range(10)],
-            'value': [i * 1.0 for i in range(10)]
-        })
-
+        df = pd.DataFrame({'time': [0, 1], 'device': ['a', 'b'], 'value': [1.0, 2.0]})
         with pytest.raises(ValueError, match="Tag column 'invalid' not found"):
             dataframe_to_tsfile(df, tsfile_path, tag_column=["invalid"])
     finally:
@@ -319,55 +360,19 @@ def test_dataframe_to_tsfile_invalid_tag_column():
             os.remove(tsfile_path)
 
 
-def test_dataframe_to_tsfile_string_vs_blob():
-    tsfile_path = "test_dataframe_to_tsfile_string_blob.tsfile"
-    try:
-        if os.path.exists(tsfile_path):
-            os.remove(tsfile_path)
-
-        df = pd.DataFrame({
-            'time': [i for i in range(20)],
-            'string_col': [f"str{i}" for i in range(20)],
-            'blob_col': [f"blob{i}".encode('utf-8') for i in range(20)]
-        })
-
-        dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
-
-        df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
-        df_sorted = convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
-
-        assert df_read["string_col"].equals(df_sorted["string_col"])
-        for i in range(20):
-            assert df_read["blob_col"].iloc[i] == df_sorted["blob_col"].iloc[i]
-    finally:
-        if os.path.exists(tsfile_path):
-            os.remove(tsfile_path)
-
-
-def test_dataframe_to_tsfile_tag_time_unsorted():
-    tsfile_path = "test_dataframe_to_tsfile_tag_time_unsorted.tsfile"
+def test_dataframe_to_tsfile_invalid_tag_column():
+    tsfile_path = "test_dataframe_to_tsfile_invalid_tag.tsfile"
     try:
         if os.path.exists(tsfile_path):
             os.remove(tsfile_path)
 
         df = pd.DataFrame({
-            'time': [30, 10, 20, 50, 40, 15, 25, 35, 5, 45],
-            'device': ['device1', 'device1', 'device1', 'device2', 'device2', 'device1', 'device1', 'device2',
-                       'device1', 'device2'],
-            'value': [i * 1.5 for i in range(10)]
+            'time': [i for i in range(10)],
+            'value': [i * 1.0 for i in range(10)]
         })
 
-        dataframe_to_tsfile(df, tsfile_path, table_name="test_table", tag_column=["device"])
-
-        df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_expected = df.sort_values(by=['device', 'time']).reset_index(drop=True)
-        df_expected = convert_to_nullable_types(df_expected)
-
-        assert df_read.shape == (10, 3)
-        assert df_read["device"].equals(df_expected["device"])
-        assert df_read["time"].equals(df_expected["time"])
-        assert df_read["value"].equals(df_expected["value"])
+        with pytest.raises(ValueError, match="Tag column 'invalid' not found"):
+            dataframe_to_tsfile(df, tsfile_path, tag_column=["invalid"])
     finally:
         if os.path.exists(tsfile_path):
             os.remove(tsfile_path)
diff --git a/python/tsfile/tsfile_py_cpp.pyx b/python/tsfile/tsfile_py_cpp.pyx
index b8bd73d0b..98b28673c 100644
--- a/python/tsfile/tsfile_py_cpp.pyx
+++ b/python/tsfile/tsfile_py_cpp.pyx
@@ -313,17 +313,6 @@ cdef Tablet to_c_tablet(object tablet):
 cdef TSDataType pandas_dtype_to_ts_data_type(object dtype):
     return to_c_data_type(TSDataTypePy.from_pandas_datatype(dtype))
 
-cdef TSDataType check_string_or_blob(TSDataType ts_data_type, object dtype, object column_series):
-    if ts_data_type == TS_DATATYPE_STRING:
-        dtype_str = str(dtype)
-        if dtype == 'object' or dtype_str == "<class 'numpy.object_'>":
-            first_valid_idx = column_series.first_valid_index()
-            if first_valid_idx is not None:
-                first_value = column_series[first_valid_idx]
-                if isinstance(first_value, bytes):
-                    return TS_DATATYPE_BLOB
-    return ts_data_type
-
 cdef Tablet dataframe_to_c_tablet(object target_name, object dataframe, object table_schema):
     cdef Tablet ctablet
     cdef int max_row_num
diff --git a/python/tsfile/tsfile_table_writer.py b/python/tsfile/tsfile_table_writer.py
index c11b78594..1561d7c61 100644
--- a/python/tsfile/tsfile_table_writer.py
+++ b/python/tsfile/tsfile_table_writer.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 #
+from datetime import date, datetime
+
 import pandas as pd
 
 from tsfile import TableSchema, Tablet, TableNotExistError, ColumnCategory
@@ -56,15 +58,20 @@ def validate_dataframe_for_tsfile(df: pd.DataFrame) -> None:
         )
 
 
-def check_string_or_blob(ts_data_type: TSDataType, dtype, column_series: pd.Series) -> TSDataType:
-    if ts_data_type == TSDataType.STRING and (dtype == 'object' or str(dtype) == "<class 'numpy.object_'>"):
-        first_valid_idx = column_series.first_valid_index()
-        if first_valid_idx is not None:
-            first_value = column_series[first_valid_idx]
-            if isinstance(first_value, bytes):
-                return TSDataType.BLOB
-    return ts_data_type
-
+def infer_object_column_type(column_series: pd.Series) -> TSDataType:
+    first_valid_idx = column_series.first_valid_index()
+    if first_valid_idx is None:
+        return TSDataType.STRING
+    value = column_series[first_valid_idx]
+    if isinstance(value, (bytes, bytearray)):
+        return TSDataType.BLOB
+    if isinstance(value, (date, datetime)):
+        return TSDataType.DATE
+    if isinstance(value, str):
+        return TSDataType.STRING
+    raise TypeError(
+        f"Cannot infer type from object column: expected str/bytes/date, got {type(value).__name__}: {value!r}"
+    )
 
 class TsFileTableWriter:
     """
diff --git a/python/tsfile/utils.py b/python/tsfile/utils.py
index 567c4fe19..1bb7aa1d3 100644
--- a/python/tsfile/utils.py
+++ b/python/tsfile/utils.py
@@ -15,18 +15,17 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-from pathlib import Path
 from typing import Iterator, Union
 from typing import Optional
 
 import numpy as np
 import pandas as pd
-from pandas.core.dtypes.common import is_integer_dtype
+from pandas.core.dtypes.common import is_integer_dtype, is_object_dtype
 
 from tsfile import ColumnSchema, TableSchema, ColumnCategory, TSDataType
 from tsfile.exceptions import TableNotExistError, ColumnNotExistError
 from tsfile.tsfile_reader import TsFileReaderPy
-from tsfile.tsfile_table_writer import TsFileTableWriter, check_string_or_blob
+from tsfile.tsfile_table_writer import TsFileTableWriter, infer_object_column_type, validate_dataframe_for_tsfile
 
 
 def to_dataframe(file_path: str,
@@ -189,7 +188,7 @@ def dataframe_to_tsfile(dataframe: pd.DataFrame,
         Path to the TsFile to write. Will be created if it doesn't exist.
 
     table_name : Optional[str], default None
-        Name of the table. If None, defaults to tsfile file name.
+        Name of the table. If None, defaults to tsfile file name (without extension).
 
     time_column : Optional[str], default None
         Name of the time column. If None, will look for a column named 'time' (case-insensitive),
@@ -208,62 +207,53 @@ def dataframe_to_tsfile(dataframe: pd.DataFrame,
     ValueError
         If the DataFrame is empty or has no data columns.
     """
-    if dataframe is None or dataframe.empty:
-        raise ValueError("DataFrame cannot be None or empty")
+    validate_dataframe_for_tsfile(dataframe)
+    df = dataframe.rename(columns=str.lower)
 
-    if table_name is None:
-        filename = Path(file_path).stem
-        table_name = filename
+    if not table_name:
+        table_name = "default_table"
 
-    time_col_name = None
     if time_column is not None:
-        if time_column not in dataframe.columns:
+        if time_column.lower() not in df.columns:
             raise ValueError(f"Time column '{time_column}' not found in DataFrame")
-        if not is_integer_dtype(dataframe[time_column].dtype):
-            raise TypeError(
-                f"Time column '{time_column}' must be integer type (int64 or int), got {dataframe[time_column].dtype}")
-        time_col_name = time_column
-    else:
-        for col in dataframe.columns:
-            if col.lower() == 'time':
-                if is_integer_dtype(dataframe[col].dtype):
-                    time_col_name = col
-                    break
-                else:
-                    raise TypeError(
-                        f"Time column '{col}' must be integer type (int64 or int), got {dataframe[col].dtype}")
-
-    data_columns = [col for col in dataframe.columns if col != time_col_name]
-
-    if len(data_columns) == 0:
-        raise ValueError("DataFrame must have at least one data column besides the time column")
-
-    tag_columns_lower = []
     if tag_column is not None:
         for tag_col in tag_column:
-            if tag_col not in dataframe.columns:
+            if tag_col.lower() not in df.columns:
                 raise ValueError(f"Tag column '{tag_col}' not found in DataFrame")
-            tag_columns_lower.append(tag_col.lower())
+    tag_columns_lower = {t.lower() for t in (tag_column or [])}
 
-    column_schemas = []
-    for col_name in data_columns:
-        col_dtype = dataframe[col_name].dtype
-        ts_data_type = TSDataType.from_pandas_datatype(col_dtype)
-        ts_data_type = check_string_or_blob(ts_data_type, col_dtype, dataframe[col_name])
+    if time_column is not None:
+        time_col_name = time_column.lower()
+    elif 'time' in df.columns:
+        time_col_name = 'time'
+    else:
+        time_col_name = None
+
+    if time_col_name is not None:
+        if not is_integer_dtype(df[time_col_name].dtype):
+            raise TypeError(
+                f"Time column '{time_col_name}' must be integer type (int64 or int), got {df[time_col_name].dtype}")
 
-        if col_name.lower() in tag_columns_lower:
-            category = ColumnCategory.TAG
+    column_schemas = []
+    if time_col_name is not None:
+        column_schemas.append(ColumnSchema(time_col_name, TSDataType.TIMESTAMP, ColumnCategory.TIME))
+
+    for col in df.columns:
+        if col == time_col_name:
+            continue
+        col_dtype = df[col].dtype
+        if is_object_dtype(col_dtype):
+            ts_data_type = infer_object_column_type(df[col])
         else:
-            category = ColumnCategory.FIELD
+            ts_data_type = TSDataType.from_pandas_datatype(col_dtype)
 
-        column_schemas.append(ColumnSchema(col_name, ts_data_type, category))
+        category = ColumnCategory.TAG if col in tag_columns_lower else ColumnCategory.FIELD
+        column_schemas.append(ColumnSchema(col, ts_data_type, category))
 
-    table_schema = TableSchema(table_name, column_schemas)
+    if len(column_schemas) == 0:
+        raise ValueError("DataFrame must have at least one data column besides the time column")
 
-    if time_col_name is not None and time_col_name != 'time':
-        df_to_write = dataframe.rename(columns={time_col_name: 'time'})
-    else:
-        df_to_write = dataframe
+    table_schema = TableSchema(table_name, column_schemas)
 
     with TsFileTableWriter(file_path, table_schema) as writer:
-        writer.write_dataframe(df_to_write)
+        writer.write_dataframe(df)

From 0f91284c5c7491d5b3f13bace3fd81ed40b1e3ea Mon Sep 17 00:00:00 2001
From: ColinLee <shuolin_l@163.com>
Date: Mon, 9 Feb 2026 09:07:05 +0800
Subject: [PATCH 06/13] tmp code.

---
 python/tsfile/schema.py              | 17 +++++++++++++++--
 python/tsfile/tsfile_py_cpp.pyx      |  3 ---
 python/tsfile/tsfile_table_writer.py |  3 ++-
 python/tsfile/utils.py               |  2 +-
 4 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/python/tsfile/schema.py b/python/tsfile/schema.py
index 379307da5..298f5eecc 100644
--- a/python/tsfile/schema.py
+++ b/python/tsfile/schema.py
@@ -17,6 +17,7 @@
 #
 from typing import List
 
+from . import TypeMismatchError
 from .constants import TSDataType, ColumnCategory, TSEncoding, Compressor
 
 
@@ -88,6 +89,8 @@ def __init__(self, column_name: str, data_type: TSDataType, category: ColumnCate
         if category == ColumnCategory.TIME and data_type not in [TSDataType.INT64, TSDataType.TIMESTAMP]:
             raise TypeError(f"Time Column should have type : INT64/Timestamp,"
                             f" but got {data_type}")
+        elif category == ColumnCategory.TAG and data_type not in [TSDataType.STRING, TSDataType.TEXT]:
+            raise TypeMismatchError(context="Tag column should be string or text")
         self.data_type = data_type
         self.category = category
 
@@ -159,11 +162,21 @@ def get_tag_columns(self):
             if column.get_category() == ColumnCategory.TAG
         ]
 
-
     def add_column(self, column: ColumnSchema):
         if column.get_category() == ColumnCategory.TIME:
+            if self.time_column is not None:
+                raise ValueError(
+                    f"Table '{self.table_name}' cannot have multiple time columns: "
+                    f"'{self.time_column.name}' and '{column.name}'"
+                )
             self.time_column = column
-            self.columns.append(column)
+        else:
+            for col in self.columns:
+                if col.get_column_name() == column.get_column_name():
+                    raise ValueError(
+                        f"Duplicate column name {col.get_column_name()}"
+                    )
+        self.columns.append(column)
 
     def __repr__(self) -> str:
         return f"TableSchema({self.table_name}, {self.columns})"
diff --git a/python/tsfile/tsfile_py_cpp.pyx b/python/tsfile/tsfile_py_cpp.pyx
index 98b28673c..3ca79a2a1 100644
--- a/python/tsfile/tsfile_py_cpp.pyx
+++ b/python/tsfile/tsfile_py_cpp.pyx
@@ -310,9 +310,6 @@ cdef Tablet to_c_tablet(object tablet):
 
     return ctablet
 
-cdef TSDataType pandas_dtype_to_ts_data_type(object dtype):
-    return to_c_data_type(TSDataTypePy.from_pandas_datatype(dtype))
-
 cdef Tablet dataframe_to_c_tablet(object target_name, object dataframe, object table_schema):
     cdef Tablet ctablet
     cdef int max_row_num
diff --git a/python/tsfile/tsfile_table_writer.py b/python/tsfile/tsfile_table_writer.py
index 1561d7c61..0346fd522 100644
--- a/python/tsfile/tsfile_table_writer.py
+++ b/python/tsfile/tsfile_table_writer.py
@@ -73,6 +73,7 @@ def infer_object_column_type(column_series: pd.Series) -> TSDataType:
         f"Cannot infer type from object column: expected str/bytes/date, got {type(value).__name__}: {value!r}"
     )
 
+
 class TsFileTableWriter:
     """
     Facilitates writing structured table data into a TsFile with a specified schema.
@@ -84,7 +85,7 @@ class TsFileTableWriter:
     according to that schema, and serialize this data into a TsFile.
     """
 
-    def __init__(self, path: str, table_schema: TableSchema, memory_threshold=128 * 1024 * 1024):
+    def __init__(self, path: str, table_schema: TableSchema, memory_threshold = 128 * 1024 * 1024):
         """
         :param path: The path of tsfile, will create if it doesn't exist.
         :param table_schema: describes the schema of the tables they want to write.
diff --git a/python/tsfile/utils.py b/python/tsfile/utils.py
index 1bb7aa1d3..71e213462 100644
--- a/python/tsfile/utils.py
+++ b/python/tsfile/utils.py
@@ -188,7 +188,7 @@ def dataframe_to_tsfile(dataframe: pd.DataFrame,
         Path to the TsFile to write. Will be created if it doesn't exist.
 
     table_name : Optional[str], default None
-        Name of the table. If None, defaults to tsfile file name (without extension).
+        Name of the table. If None, defaults to "default_table".
 
     time_column : Optional[str], default None
         Name of the time column. If None, will look for a column named 'time' (case-insensitive),

From e625b76fc9a7a897577659b42968da629f14c690 Mon Sep 17 00:00:00 2001
From: ColinLee <shuolin_l@163.com>
Date: Mon, 9 Feb 2026 09:14:23 +0800
Subject: [PATCH 07/13] fix import error.

---
 python/tests/test_basic.py | 2 +-
 python/tsfile/schema.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/tests/test_basic.py b/python/tests/test_basic.py
index 842a8fb44..675ef837f 100644
--- a/python/tests/test_basic.py
+++ b/python/tests/test_basic.py
@@ -17,7 +17,7 @@
 #
 import numpy as np
 import pytest
-from tsfile import schema, Field
+from tsfile import Field
 from tsfile import Tablet
 from tsfile.constants import *
 from tsfile.schema import *
diff --git a/python/tsfile/schema.py b/python/tsfile/schema.py
index 298f5eecc..91732eee3 100644
--- a/python/tsfile/schema.py
+++ b/python/tsfile/schema.py
@@ -17,7 +17,7 @@
 #
 from typing import List
 
-from . import TypeMismatchError
+from .exceptions import TypeMismatchError
 from .constants import TSDataType, ColumnCategory, TSEncoding, Compressor
 
 

From 9e48483996ee585a64802fcdc5e65cc8442507ae Mon Sep 17 00:00:00 2001
From: ColinLee <shuolin_l@163.com>
Date: Wed, 11 Feb 2026 00:25:38 +0800
Subject: [PATCH 08/13] tmp code.

---
 cpp/src/common/constant/tsfile_constant.h     |   4 +--
 cpp/src/common/global.cc                      |   2 +-
 cpp/src/cwrapper/tsfile_cwrapper.cc           |   5 ----
 cpp/src/utils/db_utils.h                      |   6 +++--
 .../resources/table_with_time_column.tsfile   | Bin 0 -> 644 bytes
 python/tests/test_dataframe.py                |  24 +++++++++---------
 python/tests/test_load_tsfile_from_iotdb.py   |  23 ++++++++++++-----
 python/tests/test_to_tsfile.py                |  12 ++++-----
 python/tests/test_write_and_read.py           |  16 ++++++------
 python/tsfile/constants.py                    |  18 ++++++-------
 python/tsfile/schema.py                       |   8 +++---
 11 files changed, 63 insertions(+), 55 deletions(-)
 create mode 100644 python/tests/resources/table_with_time_column.tsfile

diff --git a/cpp/src/common/constant/tsfile_constant.h b/cpp/src/common/constant/tsfile_constant.h
index d3f4dec1c..096c645ab 100644
--- a/cpp/src/common/constant/tsfile_constant.h
+++ b/cpp/src/common/constant/tsfile_constant.h
@@ -37,15 +37,15 @@ static const std::string BACK_QUOTE_STRING = "`";
 static const std::string DOUBLE_BACK_QUOTE_STRING = "``";
 
 static const unsigned char TIME_COLUMN_MASK = 0x80;
+static const std::string TIME_COLUMN_NAME = "time";
 static const unsigned char VALUE_COLUMN_MASK = 0x40;
-
-static const std::string TIME_COLUMN_ID = "";
 static const int NO_STR_TO_READ = -1;
 
 static const std::regex IDENTIFIER_PATTERN("([a-zA-Z0-9_\\u2E80-\\u9FFF]+)");
 static const std::regex NODE_NAME_PATTERN(
     "(\\*{0,2}[a-zA-Z0-9_\\u2E80-\\u9FFF]+\\*{0,2})");
 static const int DEFAULT_SEGMENT_NUM_FOR_TABLE_NAME = 3;
+
 }  // namespace storage
 
 #endif
diff --git a/cpp/src/common/global.cc b/cpp/src/common/global.cc
index 37b8c1bb8..fd1d0132d 100644
--- a/cpp/src/common/global.cc
+++ b/cpp/src/common/global.cc
@@ -122,7 +122,7 @@ int init_common() {
     g_time_column_schema.data_type_ = INT64;
     g_time_column_schema.encoding_ = PLAIN;
     g_time_column_schema.compression_ = UNCOMPRESSED;
-    g_time_column_schema.column_name_ = std::string("time");
+    g_time_column_schema.column_name_ = storage::TIME_COLUMN_NAME;
     return ret;
 }
 
diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc
index f384698ba..539d5b968 100644
--- a/cpp/src/cwrapper/tsfile_cwrapper.cc
+++ b/cpp/src/cwrapper/tsfile_cwrapper.cc
@@ -116,11 +116,6 @@ TsFileWriter tsfile_writer_new(WriteFile file, TableSchema* schema,
             *err_code = common::E_INVALID_SCHEMA;
             return nullptr;
         }
-        // Ignore time column definition.
-        if (cur_schema.column_category == TIME) {
-            continue;
-        }
-
         column_schemas.emplace_back(
             cur_schema.column_name,
             static_cast<common::TSDataType>(cur_schema.data_type),
diff --git a/cpp/src/utils/db_utils.h b/cpp/src/utils/db_utils.h
index 85d99b1a3..5a1dea8db 100644
--- a/cpp/src/utils/db_utils.h
+++ b/cpp/src/utils/db_utils.h
@@ -37,6 +37,7 @@ namespace common {
 extern TSEncoding get_value_encoder(TSDataType data_type);
 extern CompressionType get_default_compressor();
 
+// TODO: remove this.
 typedef struct FileID {
     int64_t seq_;  // timestamp when create
     int32_t version_;
@@ -64,13 +65,14 @@ typedef struct FileID {
 #endif
 } FileID;
 
+// TODO: remove this.
 typedef uint16_t NodeID;
 struct TsID {
     NodeID db_nid_;
     NodeID device_nid_;
     NodeID measurement_nid_;
 
-    TsID() : db_nid_(0), device_nid_(0), measurement_nid_(0){};
+    TsID() : db_nid_(0), device_nid_(0), measurement_nid_(0) {};
 
     TsID(NodeID db_nid, NodeID device_nid, NodeID measurement_nid)
         : db_nid_(db_nid),
@@ -157,7 +159,7 @@ struct TsID {
  * This enumeration class defines the supported categories for columns within a
  * table schema, distinguishing between tag and field columns.
  */
-enum class ColumnCategory { TAG = 0, FIELD = 1 };
+enum class ColumnCategory { TAG = 0, FIELD = 1, ATTRIBUTE = 2, TIME = 3 };
 
 /**
  * @brief Represents the schema information for a single column.
diff --git a/python/tests/resources/table_with_time_column.tsfile b/python/tests/resources/table_with_time_column.tsfile
new file mode 100644
index 0000000000000000000000000000000000000000..66be782aee311538fd7e485c467ee22827c3cb84
GIT binary patch
literal 644
zcmWG3cFW93Wnp0ADM?JqNi|~2$xm)&FlS?DF*BXSz`!5@#9R!F3?C$}F)%RBabJ7r
z!UkW51TF^0h8PAeMuQwd)`kf+0<4DwuBfuL9<)8>Dpr!3Taa3mSW;S)D#^^wAu9fX
z4QPnu|Ns9RY|orI3uGUD;W%^V0#|{I(%j6H%#uoZ7IqF<*$psN_B&@XGCY_C<R1LS
z#B&A&F0j-C^@BhIP#6T58W`9(Bw=oX(D&v-<QPO8;AXR6Dgv4h0<&<@4lEEGpoUdD
zAbW`!(>#!`kTFO<D^xR(3S@+_cR?78Da?$};ATV)cbF`cE@y<P0=b7NGlheN9U{fR
z45T@WQqwc@^Wrm8xEWyLAZc!-@Mq?L$$;Fz0Sj*y6k(92oeDr{5Tk*K3q%3!6NJ!g
z3{4FT4Gb&|Oss4Ysd>pol?5d}sb#4-Oa>xQu6JrBqZpJOQdyAdm0y&umspUPoRO+m
fQk(`%UwTkky-+__Kj)yxfDl&~V3aY3fKwj;m+iBw

literal 0
HcmV?d00001

diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index 09d0001ba..de49bc1ca 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -22,7 +22,7 @@
 import pandas as pd
 import pytest
 
-from tsfile import ColumnSchema, TableSchema, TSDataType
+from tsfile import ColumnSchema, TableSchema, TSDataType, TIME_COLUMN
 from tsfile import TsFileTableWriter, ColumnCategory
 from tsfile import to_dataframe
 from tsfile.exceptions import ColumnNotExistError, TypeMismatchError
@@ -70,10 +70,10 @@ def test_write_dataframe_basic():
             writer.write_dataframe(df)
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
         assert df_read.shape == (100, 4)
-        assert df_read["time"].equals(df_sorted["time"])
+        assert df_read[TIME_COLUMN].equals(df_sorted["time"])
         assert df_read["device"].equals(df_sorted["device"])
         assert df_read["value"].equals(df_sorted["value"])
         assert df_read["value2"].equals(df_sorted["value2"])
@@ -99,12 +99,12 @@ def test_write_dataframe_with_index():
             df.index = [i * 10 for i in range(50)]  # Set index as timestamps
             writer.write_dataframe(df)
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = df.sort_index()
         df_sorted = convert_to_nullable_types(df_sorted.reset_index(drop=True))
         time_series = pd.Series(df.sort_index().index.values, dtype='Int64')
         assert df_read.shape == (50, 3)
-        assert df_read["time"].equals(time_series)
+        assert df_read[TIME_COLUMN].equals(time_series)
         assert df_read["device"].equals(df_sorted["device"])
         assert df_read["value"].equals(df_sorted["value"])
     finally:
@@ -130,10 +130,10 @@ def test_write_dataframe_case_insensitive():
             writer.write_dataframe(df)
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = convert_to_nullable_types(df.sort_values('Time').reset_index(drop=True))
         assert df_read.shape == (30, 3)
-        assert df_read["time"].equals(df_sorted["Time"])
+        assert df_read[TIME_COLUMN].equals(df_sorted["Time"])
         assert df_read["device"].equals(df_sorted["Device"])
         assert df_read["value"].equals(df_sorted["VALUE"])
     finally:
@@ -218,7 +218,7 @@ def test_write_dataframe_all_datatypes():
             writer.write_dataframe(df)
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
         assert df_read.shape == (50, 11)
         assert df_read["bool_col"].equals(df_sorted["bool_col"])
@@ -257,10 +257,10 @@ def test_write_dataframe_schema_time_column():
             writer.write_dataframe(df)
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
         assert df_read.shape == (50, 3)
-        assert df_read["time"].equals(df_sorted["time"])
+        assert df_read[TIME_COLUMN].equals(df_sorted[TIME_COLUMN])
         assert df_read["device"].equals(df_sorted["device"])
         assert df_read["value"].equals(df_sorted["value"])
     finally:
@@ -286,7 +286,7 @@ def test_write_dataframe_schema_time_and_dataframe_time():
             writer.write_dataframe(df)
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = convert_to_nullable_types(
             df.sort_values('Time').rename(columns=str.lower).reset_index(drop=True)
         )
@@ -312,7 +312,7 @@ def test_write_dataframe_empty():
                 'time': [],
                 'value': []
             })
-            with pytest.raises(ValueError) as err:
+            with pytest.raises(ValueError):
                 writer.write_dataframe(df)
 
     finally:
diff --git a/python/tests/test_load_tsfile_from_iotdb.py b/python/tests/test_load_tsfile_from_iotdb.py
index d865dd357..8dcc0b1c6 100644
--- a/python/tests/test_load_tsfile_from_iotdb.py
+++ b/python/tests/test_load_tsfile_from_iotdb.py
@@ -15,12 +15,13 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-
+import math
 import os
 
 import numpy as np
 
 import tsfile as ts
+from tsfile import TIME_COLUMN
 
 
 def test_load_tsfile_from_iotdb():
@@ -31,8 +32,8 @@ def test_load_tsfile_from_iotdb():
 
     ## --------
     assert len(df) == 105, "row count mismatch"
-    assert df["time"].isna().sum() == 0
-    assert int(df["time"].sum()) == 15960
+    assert df[TIME_COLUMN].isna().sum() == 0
+    assert int(df[TIME_COLUMN].sum()) == 15960
     assert df["temperature"].isna().sum() == 5
     assert df["status"].isna().sum() == 5
     assert (df["status"] == True).sum() == 50
@@ -44,8 +45,8 @@ def test_load_tsfile_from_iotdb():
     df = ts.to_dataframe(simple_tabl1_path)
     ## ---------
     assert len(df) == 60
-    assert df["time"].isna().sum() == 0
-    assert df["time"].sum() == (
+    assert df[TIME_COLUMN].isna().sum() == 0
+    assert df[TIME_COLUMN].sum() == (
             (1760106020000 + 1760106049000) * 30 // 2 +
             (1760106080000 + 1760106109000) * 30 // 2
     )
@@ -78,8 +79,8 @@ def test_load_tsfile_from_iotdb():
     df = ts.to_dataframe(simple_tabl2_path)
     ## ---------
     assert len(df) == 40
-    assert df["time"].isna().sum() == 0
-    assert int(df["time"].sum()) == 70404242080000
+    assert df[TIME_COLUMN].isna().sum() == 0
+    assert int(df[TIME_COLUMN].sum()) == 70404242080000
 
     assert df["s0"].isna().sum() == 0
     assert df["s1"].isna().sum() == 0
@@ -109,3 +110,11 @@ def test_load_tsfile_from_iotdb():
 
     assert df["s9"].isna().sum() == 5
     ## ---------
+    table_with_time_column_path = os.path.join(dir_path, 'table_with_time_column.tsfile')
+    df = ts.to_dataframe(table_with_time_column_path)
+
+    assert len(df) == 25
+    assert math.isclose(df["temperature"].sum(), 2.5, rel_tol=1e-9)
+    assert math.isclose(df["humidity"].sum(), 2.5, rel_tol=1e-9)
+    assert (df["region_id"] == "loc").sum() == 25
+
diff --git a/python/tests/test_to_tsfile.py b/python/tests/test_to_tsfile.py
index c3a970e3c..a35d5e890 100644
--- a/python/tests/test_to_tsfile.py
+++ b/python/tests/test_to_tsfile.py
@@ -22,7 +22,7 @@
 import pandas as pd
 import pytest
 
-from tsfile import to_dataframe, TsFileReader, ColumnCategory
+from tsfile import to_dataframe, TsFileReader, ColumnCategory, TIME_COLUMN
 from tsfile.utils import dataframe_to_tsfile
 
 
@@ -132,11 +132,11 @@ def test_dataframe_to_tsfile_custom_time_column():
         dataframe_to_tsfile(df, tsfile_path, table_name="test_table", time_column="timestamp")
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = convert_to_nullable_types(df.sort_values('timestamp').reset_index(drop=True))
 
         assert df_read.shape == (30, 3)
-        assert df_read["time"].equals(df_sorted["timestamp"])
+        assert df_read[TIME_COLUMN].equals(df_sorted["timestamp"])
         assert df_read["device"].equals(df_sorted["device"])
         assert df_read["value"].equals(df_sorted["value"])
     finally:
@@ -181,7 +181,7 @@ def test_dataframe_to_tsfile_with_tag_columns():
         dataframe_to_tsfile(df, tsfile_path, table_name="test_table", tag_column=["device", "location"])
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
 
         assert df_read.shape == (20, 4)
@@ -214,7 +214,7 @@ def test_dataframe_to_tsfile_tag_time_unsorted():
 
         assert df_read.shape == (10, 3)
         assert df_read["device"].equals(df_expected["device"])
-        assert df_read["time"].equals(df_expected["time"])
+        assert df_read[TIME_COLUMN].equals(df_expected["time"])
         assert df_read["value"].equals(df_expected["value"])
     finally:
         if os.path.exists(tsfile_path):
@@ -244,7 +244,7 @@ def test_dataframe_to_tsfile_all_datatypes():
         dataframe_to_tsfile(df, tsfile_path, table_name="test_table")
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values('time').reset_index(drop=True)
+        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
         df_sorted = convert_to_nullable_types(df.sort_values('time').reset_index(drop=True))
 
         assert df_read.shape == (50, 11)
diff --git a/python/tests/test_write_and_read.py b/python/tests/test_write_and_read.py
index 3cef99c4a..57294a846 100644
--- a/python/tests/test_write_and_read.py
+++ b/python/tests/test_write_and_read.py
@@ -25,7 +25,7 @@
 from pandas import Float64Dtype
 from pandas.core.dtypes.common import is_integer_dtype
 
-from tsfile import ColumnSchema, TableSchema, TSEncoding
+from tsfile import ColumnSchema, TableSchema, TSEncoding, TIME_COLUMN
 from tsfile import Compressor
 from tsfile import TSDataType
 from tsfile import Tablet, RowRecord, Field
@@ -170,7 +170,7 @@ def _extract_device(row, path_columns):
         assert df_all.shape[0] == total_rows
         for measurement in all_measurements:
             assert measurement in df_all.columns
-        assert "time" in df_all.columns
+        assert TIME_COLUMN in df_all.columns
         path_columns = sorted(
             [col for col in df_all.columns if col.startswith("col_")],
             key=lambda name: int(name.split("_")[1]),
@@ -179,7 +179,7 @@ def _extract_device(row, path_columns):
 
         for _, row in df_all.iterrows():
             device = _extract_device(row, path_columns)
-            timestamp = int(row["time"])
+            timestamp = int(row[TIME_COLUMN])
             assert (device, timestamp) in expected_values
             expected_row = expected_values[(device, timestamp)]
             for measurement in all_measurements:
@@ -201,7 +201,7 @@ def _extract_device(row, path_columns):
                 assert measurement not in df_subset.columns
         for _, row in df_subset.iterrows():
             device = _extract_device(row, path_columns)
-            timestamp = int(row["time"])
+            timestamp = int(row[TIME_COLUMN])
             expected_row = expected_values[(device, timestamp)]
             for measurement in requested_columns:
                 value = row.get(measurement)
@@ -227,7 +227,7 @@ def _extract_device(row, path_columns):
         iter_rows = 0
         for batch in iterator:
             assert isinstance(batch, pd.DataFrame)
-            assert set(batch.columns).issuperset({"time", "level"})
+            assert set(batch.columns).issuperset({TIME_COLUMN, "level"})
             iter_rows += len(batch)
         assert iter_rows == 18
 
@@ -242,7 +242,7 @@ def _extract_device(row, path_columns):
         iter_rows = 0
         for batch in iterator:
             assert isinstance(batch, pd.DataFrame)
-            assert set(batch.columns).issuperset({"time", "level"})
+            assert set(batch.columns).issuperset({TIME_COLUMN, "level"})
             iter_rows += len(batch)
         assert iter_rows == 9
 
@@ -384,7 +384,7 @@ def test_table_writer_and_reader():
                                     0, 10) as result:
                 cur_line = 0
                 while result.next():
-                    cur_time = result.get_value_by_name("time")
+                    cur_time = result.get_value_by_name(TIME_COLUMN)
                     assert result.get_value_by_name("device") == "device" + str(cur_time)
                     assert result.is_null_by_name("device") == False
                     assert result.is_null_by_name("value") == False
@@ -545,7 +545,7 @@ def test_tsfile_to_df():
         df1 = to_dataframe("table_write_to_df.tsfile")
         assert df1.shape == (4097, 4)
         assert df1["value2"].sum() == 100 * (1 + 4096) / 2 * 4096
-        assert is_integer_dtype(df1["time"])
+        assert is_integer_dtype(df1[TIME_COLUMN])
         assert df1["value"].dtype == Float64Dtype()
         assert is_integer_dtype(df1["value2"])
         df2 = to_dataframe("table_write_to_df.tsfile", column_names=["device", "value2"])
diff --git a/python/tsfile/constants.py b/python/tsfile/constants.py
index 6f233e271..18da3aef7 100644
--- a/python/tsfile/constants.py
+++ b/python/tsfile/constants.py
@@ -15,10 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-from datetime import datetime
 from enum import unique, IntEnum
+
 import numpy as np
 
+TIME_COLUMN = "time"
+
 @unique
 class TSDataType(IntEnum):
     BOOLEAN = 0
@@ -103,7 +105,7 @@ def from_pandas_datatype(cls, dtype):
                 return cls.STRING
         except (ImportError, AttributeError):
             pass
-        
+
         if hasattr(dtype, 'type'):
             dtype = dtype.type
             if dtype is np.bool_:
@@ -118,12 +120,12 @@ def from_pandas_datatype(cls, dtype):
                 return cls.DOUBLE
             elif dtype is np.object_:
                 return cls.STRING
-        
+
         dtype_str = str(dtype)
 
         if 'stringdtype' in dtype_str.lower() or dtype_str.startswith('string'):
             return cls.STRING
-        
+
         dtype_map = {
             'bool': cls.BOOLEAN,
             'boolean': cls.BOOLEAN,
@@ -137,17 +139,17 @@ def from_pandas_datatype(cls, dtype):
             'object': cls.STRING,
             'string': cls.STRING,
         }
-        
+
         if dtype_str in dtype_map:
             return dtype_map[dtype_str]
-        
+
         dtype_lower = dtype_str.lower()
         if dtype_lower in dtype_map:
             return dtype_map[dtype_lower]
 
         if 'object_' in dtype_lower or dtype_str == "<class 'numpy.object_'>":
             return cls.STRING
-        
+
         if dtype_str.startswith('datetime64'):
             return cls.TIMESTAMP
 
@@ -163,8 +165,6 @@ def from_pandas_datatype(cls, dtype):
 }
 
 
-
-
 @unique
 class TSEncoding(IntEnum):
     PLAIN = 0
diff --git a/python/tsfile/schema.py b/python/tsfile/schema.py
index 91732eee3..f0fa39b1f 100644
--- a/python/tsfile/schema.py
+++ b/python/tsfile/schema.py
@@ -119,15 +119,17 @@ def __init__(self, table_name: str, columns: List[ColumnSchema]):
         self.table_name = table_name.lower()
         if len(columns) == 0:
             raise ValueError("Columns cannot be empty")
-        self.columns = columns
-        for column in self.columns:
+        self.columns = []
+        for column in columns:
             if column.get_category() == ColumnCategory.TIME:
                 if self.time_column is not None:
                     raise ValueError(
                         f"Table '{self.table_name}' cannot have multiple time columns: "
-                        f"'{self.time_column.name}' and '{column.name}'"
+                        f"'{self.time_column.get_column_name()}' and '{column.get_column_name()}'"
                     )
                 self.time_column = column
+            else:
+                self.columns.append(column)
 
     def get_table_name(self):
         return self.table_name

From 6df963f2f7b54c3cbd94230bf2de143a4a4fcbb1 Mon Sep 17 00:00:00 2001
From: ColinLee <shuolin_l@163.com>
Date: Wed, 11 Feb 2026 00:35:03 +0800
Subject: [PATCH 09/13] tmp code.

---
 cpp/src/utils/db_utils.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/utils/db_utils.h b/cpp/src/utils/db_utils.h
index 5a1dea8db..607144af1 100644
--- a/cpp/src/utils/db_utils.h
+++ b/cpp/src/utils/db_utils.h
@@ -72,7 +72,7 @@ struct TsID {
     NodeID device_nid_;
     NodeID measurement_nid_;
 
-    TsID() : db_nid_(0), device_nid_(0), measurement_nid_(0) {};
+    TsID() : db_nid_(0), device_nid_(0), measurement_nid_(0){};
 
     TsID(NodeID db_nid, NodeID device_nid, NodeID measurement_nid)
         : db_nid_(db_nid),

From 1a8f90d53e5fad365044d1e61bff4434ab86ac10 Mon Sep 17 00:00:00 2001
From: ColinLee <shuolin_l@163.com>
Date: Wed, 11 Feb 2026 09:54:10 +0800
Subject: [PATCH 10/13] tmp code.

---
 cpp/src/cwrapper/tsfile_cwrapper.cc | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc
index 539d5b968..fbcf4e6f1 100644
--- a/cpp/src/cwrapper/tsfile_cwrapper.cc
+++ b/cpp/src/cwrapper/tsfile_cwrapper.cc
@@ -686,9 +686,6 @@ ERRNO _tsfile_writer_register_table(TsFileWriter writer, TableSchema* schema) {
     measurement_schemas.resize(schema->column_num);
     for (int i = 0; i < schema->column_num; i++) {
         ColumnSchema* cur_schema = schema->column_schemas + i;
-        if (cur_schema->column_category == TIME) {
-            continue;
-        }
         measurement_schemas[i] = new storage::MeasurementSchema(
             cur_schema->column_name,
             static_cast<common::TSDataType>(cur_schema->data_type));

From 260490647ae3db7b4ed9fd5f399bef445845c20d Mon Sep 17 00:00:00 2001
From: ColinLee <shuolin_l@163.com>
Date: Wed, 11 Feb 2026 10:05:05 +0800
Subject: [PATCH 11/13] add table_with_time_column info.

---
 python/tests/resources/README.md | 34 +++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/python/tests/resources/README.md b/python/tests/resources/README.md
index ca80bb430..cd1a2aa04 100644
--- a/python/tests/resources/README.md
+++ b/python/tests/resources/README.md
@@ -282,4 +282,36 @@ IoTDB:test> select * from test;
 |2025-10-10T22:21:19.000+08:00| b| c|1069|7.9|v69|1970-01-01T08:00:01.069+08:00|  79|16.9|2024-12-18|text69|
 +-----------------------------+--+--+----+---+---+-----------------------------+----+----+----------+------+
 Total line number = 40
-```
\ No newline at end of file
+```
+
+In `table_with_time_column.tsfile`
+
+```
+             time region_id  temperature  humidity
+0   1770729095888       loc          0.1       0.1
+1   1770729096807       loc          0.1       0.1
+2   1770729097233       loc          0.1       0.1
+3   1770729097471       loc          0.1       0.1
+4   1770729097695       loc          0.1       0.1
+5   1770729097910       loc          0.1       0.1
+6   1770729098148       loc          0.1       0.1
+7   1770729098385       loc          0.1       0.1
+8   1770729098599       loc          0.1       0.1
+9   1770729098853       loc          0.1       0.1
+10  1770729099086       loc          0.1       0.1
+11  1770729099327       loc          0.1       0.1
+12  1770729099558       loc          0.1       0.1
+13  1770729099794       loc          0.1       0.1
+14  1770729100017       loc          0.1       0.1
+15  1770729100262       loc          0.1       0.1
+16  1770729100492       loc          0.1       0.1
+17  1770729100729       loc          0.1       0.1
+18  1770729100976       loc          0.1       0.1
+19  1770729101243       loc          0.1       0.1
+20  1770729101494       loc          0.1       0.1
+21  1770729101734       loc          0.1       0.1
+22  1770729102040       loc          0.1       0.1
+23  1770729102333       loc          0.1       0.1
+24  1770729103005       loc          0.1       0.1
+```
+

From 77da19ed6f30c121fcaa641156c9577fc4f5c52a Mon Sep 17 00:00:00 2001
From: ColinLee <shuolin_l@163.com>
Date: Thu, 12 Feb 2026 21:15:40 +0800
Subject: [PATCH 12/13] tmp code.

---
 cpp/src/common/tsblock/tuple_desc.h           |  9 +++
 .../block/single_device_tsblock_reader.cc     |  8 ++
 cpp/src/reader/column_mapping.h               | 11 ++-
 cpp/src/reader/table_query_executor.cc        | 11 +--
 .../table_view/tsfile_reader_table_test.cc    | 81 +++++++++++++++++++
 .../table_view/tsfile_writer_table_test.cc    |  2 +-
 python/tsfile/schema.py                       |  7 ++
 python/tsfile/tsfile_reader.pyx               |  4 +-
 python/tsfile/utils.py                        | 36 +++++++--
 9 files changed, 151 insertions(+), 18 deletions(-)

diff --git a/cpp/src/common/tsblock/tuple_desc.h b/cpp/src/common/tsblock/tuple_desc.h
index 6010d677b..3cd26b3f6 100644
--- a/cpp/src/common/tsblock/tuple_desc.h
+++ b/cpp/src/common/tsblock/tuple_desc.h
@@ -76,6 +76,15 @@ class TupleDesc {
         return column_list_[index].column_category_;
     }
 
+    FORCE_INLINE int get_time_column_index() const {
+        for (uint32_t i = 0; i < column_list_.size(); i++) {
+            if (column_list_[i].get_column_category() == ColumnCategory::TIME) {
+                return i;
+            }
+        }
+        return -1;
+    }
+
     FORCE_INLINE std::string get_column_name(uint32_t index) {
         return column_list_[index].column_name_;
     }
diff --git a/cpp/src/reader/block/single_device_tsblock_reader.cc b/cpp/src/reader/block/single_device_tsblock_reader.cc
index 0e2b350c7..836ab6956 100644
--- a/cpp/src/reader/block/single_device_tsblock_reader.cc
+++ b/cpp/src/reader/block/single_device_tsblock_reader.cc
@@ -164,6 +164,14 @@ int SingleDeviceTsBlockReader::fill_measurements(
         }
         col_appenders_[time_column_index_]->append((const char*)&next_time_,
                                                    sizeof(next_time_));
+        int time_in_query_index = tuple_desc_.get_time_column_index();
+        if (time_in_query_index != -1) {
+            if (!col_appenders_[time_in_query_index]->add_row()) {
+                assert(false);
+            }
+            col_appenders_[time_in_query_index]->append(
+                (const char*)&next_time_, sizeof(next_time_));
+        }
         for (auto& column_context : column_contexts) {
             column_context->fill_into(col_appenders_);
             if (RET_FAIL(advance_column(column_context))) {
diff --git a/cpp/src/reader/column_mapping.h b/cpp/src/reader/column_mapping.h
index abf9eafba..99e153030 100644
--- a/cpp/src/reader/column_mapping.h
+++ b/cpp/src/reader/column_mapping.h
@@ -36,8 +36,10 @@ class ColumnMapping {
 
         if (column_category == common::ColumnCategory::TAG) {
             tag_columns_.insert(column_name);
-        } else {
+        } else if (column_category == common::ColumnCategory::FIELD) {
             field_columns_.insert(column_name);
+        } else if (column_category == common::ColumnCategory::TIME) {
+            time_column_ = column_name;
         }
 
         return common::E_OK;
@@ -64,6 +66,10 @@ class ColumnMapping {
         return field_columns_.find(column_name) != field_columns_.end();
     }
 
+    bool is_time(const std::string& column_name) const {
+        return time_column_ == column_name;
+    }
+
     const std::unordered_set<std::string>& get_id_columns() const {
         return tag_columns_;
     }
@@ -72,8 +78,11 @@ class ColumnMapping {
         return field_columns_;
     }
 
+    const std::string get_time_column() const { return time_column_; }
+
    private:
     std::unordered_map<std::string, std::vector<int>> column_pos_map;
+    std::string time_column_;
     std::unordered_set<std::string> tag_columns_;
     std::unordered_set<std::string> field_columns_;
 };
diff --git a/cpp/src/reader/table_query_executor.cc b/cpp/src/reader/table_query_executor.cc
index 79b636b52..2a01a6d5c 100644
--- a/cpp/src/reader/table_query_executor.cc
+++ b/cpp/src/reader/table_query_executor.cc
@@ -65,9 +65,10 @@ int TableQueryExecutor::query(const std::string& table_name,
     }
     // column_mapping.add(*measurement_filter);
 
-    auto device_task_iterator = std::unique_ptr<DeviceTaskIterator>(
-        new DeviceTaskIterator(columns, table_root, column_mapping,
-                               meta_data_querier_, id_filter, table_schema));
+    auto device_task_iterator =
+        std::unique_ptr<DeviceTaskIterator>(new DeviceTaskIterator(
+            lower_case_column_names, table_root, column_mapping,
+            meta_data_querier_, id_filter, table_schema));
 
     std::unique_ptr<TsBlockReader> tsblock_reader;
     switch (table_query_ordering_) {
@@ -82,8 +83,8 @@ int TableQueryExecutor::query(const std::string& table_name,
             ret = common::E_UNSUPPORTED_ORDER;
     }
     assert(tsblock_reader != nullptr);
-    ret_qds =
-        new TableResultSet(std::move(tsblock_reader), columns, data_types);
+    ret_qds = new TableResultSet(std::move(tsblock_reader),
+                                 lower_case_column_names, data_types);
     return ret;
 }
 
diff --git a/cpp/test/reader/table_view/tsfile_reader_table_test.cc b/cpp/test/reader/table_view/tsfile_reader_table_test.cc
index c281de413..b9f0eb213 100644
--- a/cpp/test/reader/table_view/tsfile_reader_table_test.cc
+++ b/cpp/test/reader/table_view/tsfile_reader_table_test.cc
@@ -707,3 +707,84 @@ TEST_F(TsFileTableReaderTest, TestNullInTable4) {
             ASSERT_EQ(line, max_rows);
         });
 }
+
+TEST_F(TsFileTableReaderTest, TestTimeColumnReader) {
+    std::vector<common::ColumnSchema> column_schemas;
+    column_schemas.emplace_back("s0", TSDataType::INT64,
+                                CompressionType::UNCOMPRESSED,
+                                TSEncoding::PLAIN, ColumnCategory::FIELD);
+    column_schemas.emplace_back("S1", TSDataType::DOUBLE,
+                                CompressionType::UNCOMPRESSED,
+                                TSEncoding::PLAIN, ColumnCategory::FIELD);
+    // No need to manually insert data into the time column.
+    column_schemas.emplace_back("TIME_D", TSDataType::TIMESTAMP,
+                                CompressionType::UNCOMPRESSED,
+                                TSEncoding::PLAIN, ColumnCategory::TIME);
+
+    TableSchema table_schema("testTableTime", column_schemas);
+    auto tsfile_table_writer_ =
+        std::make_shared<TsFileTableWriter>(&write_file_, &table_schema);
+
+    const int num_rows = 20;
+    const int64_t base_time = 1000;
+    storage::Tablet tablet(table_schema.get_table_name(), {"s0", "s1"},
+                           {TSDataType::INT64, TSDataType::DOUBLE},
+                           {ColumnCategory::FIELD, ColumnCategory::FIELD},
+                           num_rows);
+
+    for (int i = 0; i < num_rows; i++) {
+        int64_t t = base_time + i;
+        tablet.add_timestamp(i, t);
+        tablet.add_value(i, 0, static_cast<int64_t>(i * 10));
+        tablet.add_value(i, 1, static_cast<double>(i * 1.5));
+    }
+
+    ASSERT_EQ(tsfile_table_writer_->write_table(tablet), common::E_OK);
+    ASSERT_EQ(tsfile_table_writer_->flush(), common::E_OK);
+    ASSERT_EQ(tsfile_table_writer_->close(), common::E_OK);
+
+    storage::TsFileReader reader;
+    int ret = reader.open(file_name_);
+    ASSERT_EQ(ret, common::E_OK);
+
+    ResultSet* tmp_result_set = nullptr;
+    ret = reader.query(table_schema.get_table_name(), {"s0", "s1", "TIME_D"}, 0,
+                       1000000000000, tmp_result_set);
+    ASSERT_EQ(ret, common::E_OK);
+    ASSERT_NE(tmp_result_set, nullptr);
+
+    auto* table_result_set = dynamic_cast<TableResultSet*>(tmp_result_set);
+    ASSERT_NE(table_result_set, nullptr);
+
+    auto result_set_metadata = table_result_set->get_metadata();
+    ASSERT_EQ(result_set_metadata->get_column_count(),
+              4);  // time + s0 + s1 + TIME_D
+    ASSERT_EQ(result_set_metadata->get_column_name(1), "time");
+    ASSERT_EQ(result_set_metadata->get_column_type(1), TSDataType::INT64);
+    ASSERT_EQ(result_set_metadata->get_column_name(2), "s0");
+    ASSERT_EQ(result_set_metadata->get_column_type(2), TSDataType::INT64);
+    ASSERT_EQ(result_set_metadata->get_column_name(3), "s1");
+    ASSERT_EQ(result_set_metadata->get_column_type(3), TSDataType::DOUBLE);
+    ASSERT_EQ(result_set_metadata->get_column_name(4), "time_d");
+    ASSERT_EQ(result_set_metadata->get_column_type(4), TSDataType::TIMESTAMP);
+
+    bool has_next = false;
+    int row_count = 0;
+    while (IS_SUCC(table_result_set->next(has_next)) && has_next) {
+        int64_t row_time = base_time + row_count;
+        // Column 1 is built-in time
+        ASSERT_EQ(table_result_set->get_value<int64_t>(1), row_time);
+        // s0, s1
+        ASSERT_EQ(table_result_set->get_value<int64_t>(2), row_count * 10);
+        ASSERT_DOUBLE_EQ(table_result_set->get_value<double>(3),
+                         static_cast<double>(row_count * 1.5));
+        // time_d
+        ASSERT_EQ(table_result_set->get_value<int64_t>("TIME_D"), row_time);
+        ASSERT_EQ(table_result_set->get_value<int64_t>(4), row_time);
+        row_count++;
+    }
+    ASSERT_EQ(row_count, num_rows);
+
+    reader.destroy_query_data_set(table_result_set);
+    ASSERT_EQ(reader.close(), common::E_OK);
+}
diff --git a/cpp/test/writer/table_view/tsfile_writer_table_test.cc b/cpp/test/writer/table_view/tsfile_writer_table_test.cc
index d5861ea16..1f8c80ff6 100644
--- a/cpp/test/writer/table_view/tsfile_writer_table_test.cc
+++ b/cpp/test/writer/table_view/tsfile_writer_table_test.cc
@@ -447,7 +447,7 @@ TEST_F(TsFileWriterTableTest, WriteAndReadSimple) {
     ASSERT_EQ(ret_value, 0);
     auto* table_result_set = (TableResultSet*)ret;
     auto metadata = ret->get_metadata();
-    ASSERT_EQ(metadata->get_column_name(column_names.size() + 1), "VALUE");
+    ASSERT_EQ(metadata->get_column_name(column_names.size() + 1), "value");
     bool has_next = false;
     int cur_line = 0;
     while (IS_SUCC(table_result_set->next(has_next)) && has_next) {
diff --git a/python/tsfile/schema.py b/python/tsfile/schema.py
index f0fa39b1f..d8671a33c 100644
--- a/python/tsfile/schema.py
+++ b/python/tsfile/schema.py
@@ -197,6 +197,13 @@ def __init__(self, column_list: List[str], data_types: List[TSDataType]):
     def set_table_name(self, table_name: str):
         self.table_name = table_name
 
+    def add_column_at(self, index: int, column_name: str, data_type: TSDataType):
+        """Insert a column and its data type at the given position (0-based index)."""
+        if index < 0 or index > len(self.column_list):
+            raise IndexError(f"column index {index} out of range (0 to {len(self.column_list)})")
+        self.column_list.insert(index, column_name)
+        self.data_types.insert(index, data_type)
+
     def get_data_type(self, column_index: int) -> TSDataType:
         if column_index < 1 or column_index > len(self.column_list):
             raise OverflowError
diff --git a/python/tsfile/tsfile_reader.pyx b/python/tsfile/tsfile_reader.pyx
index 041764f91..4476d24dc 100644
--- a/python/tsfile/tsfile_reader.pyx
+++ b/python/tsfile/tsfile_reader.pyx
@@ -19,7 +19,6 @@
 #cython: language_level=3
 
 import weakref
-from email.contentmanager import raw_data_manager
 from typing import List
 
 import pandas as pd
@@ -154,7 +153,6 @@ cdef class ResultSetPy:
         # Well when we check is null, id from 0, so there index -1.
         if tsfile_result_set_is_null_by_index(self.result, index):
             return None
-        # data type in metadata is an array, id from 0.
         data_type = self.metadata.get_data_type(index)
         if data_type == TSDataTypePy.INT32:
             return tsfile_result_set_get_value_by_index_int32_t(self.result, index)
@@ -297,7 +295,7 @@ cdef class TsFileReaderPy:
         return pyresult
 
     def query_table_on_tree(self, column_names : List[str],
-                    start_time : int = INT64_MIN, end_time : int = INT64_MAX) -> ResultSetPy:
+                            start_time : int = INT64_MIN, end_time : int = INT64_MAX) -> ResultSetPy:
         """
         Execute a time range query on specified columns on tree structure.
         :return: query result handler.
diff --git a/python/tsfile/utils.py b/python/tsfile/utils.py
index 71e213462..4366ef5be 100644
--- a/python/tsfile/utils.py
+++ b/python/tsfile/utils.py
@@ -99,6 +99,17 @@ def _gen(is_iterator: bool) -> Iterator[pd.DataFrame]:
         _start_time = start_time if start_time is not None else np.iinfo(np.int64).min
         _end_time = end_time if end_time is not None else np.iinfo(np.int64).max
 
+        ## Time column handling (table model):
+        ## 1. Request has no column list (query all):
+        ##    1.1 TsFile has a time column in schema: query only non-time columns; then rename
+        ##        the first column of the returned DataFrame to the schema time column name.
+        ##    1.2 TsFile has no time column in schema: query as-is; first column is "time".
+        ## 2. Request has a column list but no time column:
+        ##    2.1 TsFile has a time column in schema: query with requested columns; rename the
+        ##        first column to the schema time column name.
+        ##    2.2 TsFile has no time column in schema: first column stays "time"; no rename.
+        ## 3. Request has a column list including the time column:
+        ##    3.1 Query with requested columns (including time); do not rename the first column.
         with TsFileReaderPy(file_path) as reader:
             total_rows = 0
             table_schema = reader.get_all_table_schemas()
@@ -117,11 +128,17 @@ def _gen(is_iterator: bool) -> Iterator[pd.DataFrame]:
                         raise TableNotExistError(_table_name)
                     columns = table_schema[_table_name]
 
-                column_names_in_file = columns.get_column_names()
+                column_names_in_file = []
+                time_column = None
+                for column in columns:
+                    if column.get_category() == ColumnCategory.TIME:
+                        time_column = column.get_column_name()
+                    else:
+                        column_names_in_file.append(column.get_column_name())
 
                 if _column_names is not None:
                     for column in _column_names:
-                        if column.lower() not in column_names_in_file:
+                        if column.lower() not in column_names_in_file and column.lower() != time_column :
                             raise ColumnNotExistError(column)
                 else:
                     _column_names = column_names_in_file
@@ -136,18 +153,21 @@ def _gen(is_iterator: bool) -> Iterator[pd.DataFrame]:
             with query_result as result:
                 while result.next():
                     if max_row_num is None:
-                        df = result.read_data_frame()
+                        dataframe = result.read_data_frame()
                     elif is_iterator:
-                        df = result.read_data_frame(max_row_num)
+                        dataframe = result.read_data_frame(max_row_num)
                     else:
                         remaining_rows = max_row_num - total_rows
                         if remaining_rows <= 0:
                             break
-                        df = result.read_data_frame(remaining_rows)
-                    if df is None or df.empty:
+                        dataframe = result.read_data_frame(remaining_rows)
+                    if dataframe is None or dataframe.empty:
                         continue
-                    total_rows += len(df)
-                    yield df
+                    total_rows += len(dataframe)
+                    if time_column is not None:
+                        if _column_names is None or time_column.lower() not in [c.lower() for c in _column_names]:
+                            dataframe = dataframe.rename(columns={dataframe.columns[0]: time_column})
+                    yield dataframe
                     if (not is_iterator) and max_row_num is not None and total_rows >= max_row_num:
                         break
 

From 56aa10499bdb0ab113dd49390a84853ee1ee1378 Mon Sep 17 00:00:00 2001
From: ColinLee <shuolin_l@163.com>
Date: Thu, 12 Feb 2026 23:16:24 +0800
Subject: [PATCH 13/13] support time column

---
 python/tests/resources/README.md            | 70 +++++++++++++--------
 python/tests/test_load_tsfile_from_iotdb.py | 16 ++++-
 python/tests/test_to_tsfile.py              |  4 +-
 python/tsfile/schema.py                     |  4 +-
 python/tsfile/utils.py                      |  9 ++-
 5 files changed, 66 insertions(+), 37 deletions(-)

diff --git a/python/tests/resources/README.md b/python/tests/resources/README.md
index cd1a2aa04..d5ec82b49 100644
--- a/python/tests/resources/README.md
+++ b/python/tests/resources/README.md
@@ -287,31 +287,49 @@ Total line number = 40
 In `table_with_time_column.tsfile`
 
 ```
-             time region_id  temperature  humidity
-0   1770729095888       loc          0.1       0.1
-1   1770729096807       loc          0.1       0.1
-2   1770729097233       loc          0.1       0.1
-3   1770729097471       loc          0.1       0.1
-4   1770729097695       loc          0.1       0.1
-5   1770729097910       loc          0.1       0.1
-6   1770729098148       loc          0.1       0.1
-7   1770729098385       loc          0.1       0.1
-8   1770729098599       loc          0.1       0.1
-9   1770729098853       loc          0.1       0.1
-10  1770729099086       loc          0.1       0.1
-11  1770729099327       loc          0.1       0.1
-12  1770729099558       loc          0.1       0.1
-13  1770729099794       loc          0.1       0.1
-14  1770729100017       loc          0.1       0.1
-15  1770729100262       loc          0.1       0.1
-16  1770729100492       loc          0.1       0.1
-17  1770729100729       loc          0.1       0.1
-18  1770729100976       loc          0.1       0.1
-19  1770729101243       loc          0.1       0.1
-20  1770729101494       loc          0.1       0.1
-21  1770729101734       loc          0.1       0.1
-22  1770729102040       loc          0.1       0.1
-23  1770729102333       loc          0.1       0.1
-24  1770729103005       loc          0.1       0.1
+IoTDB:mydb> select * from table2;
++-----------------------------+---------+-----------+--------+
+|                           id|region_id|temperature|humidity|
++-----------------------------+---------+-----------+--------+
+|2026-02-10T21:11:35.888+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:36.807+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:37.233+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:37.471+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:37.695+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:37.910+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:38.148+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:38.385+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:38.599+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:38.853+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:39.086+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:39.327+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:39.558+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:39.794+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:40.017+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:40.262+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:40.492+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:40.729+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:40.976+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:41.243+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:41.494+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:41.734+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:42.040+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:42.333+08:00|      loc|        0.1|     0.1|
+|2026-02-10T21:11:43.005+08:00|      loc|        0.1|     0.1|
++-----------------------------+---------+-----------+--------+
+Total line number = 25
+It costs 0.042s
+IoTDB:mydb> describe table2
++-----------+---------+--------+
+| ColumnName| DataType|Category|
++-----------+---------+--------+
+|         id|TIMESTAMP|    TIME|
+|  region_id|   STRING|     TAG|
+|temperature|    FLOAT|   FIELD|
+|   humidity|   DOUBLE|   FIELD|
++-----------+---------+--------+
+Total line number = 4
+It costs 0.065s
+IoTDB:mydb> 
 ```
 
diff --git a/python/tests/test_load_tsfile_from_iotdb.py b/python/tests/test_load_tsfile_from_iotdb.py
index 8dcc0b1c6..50ca0baf4 100644
--- a/python/tests/test_load_tsfile_from_iotdb.py
+++ b/python/tests/test_load_tsfile_from_iotdb.py
@@ -111,10 +111,24 @@ def test_load_tsfile_from_iotdb():
     assert df["s9"].isna().sum() == 5
     ## ---------
     table_with_time_column_path = os.path.join(dir_path, 'table_with_time_column.tsfile')
-    df = ts.to_dataframe(table_with_time_column_path)
 
+    df = ts.to_dataframe(table_with_time_column_path)
+    assert list(df.columns)[0] == "id"
     assert len(df) == 25
     assert math.isclose(df["temperature"].sum(), 2.5, rel_tol=1e-9)
     assert math.isclose(df["humidity"].sum(), 2.5, rel_tol=1e-9)
     assert (df["region_id"] == "loc").sum() == 25
 
+    df = ts.to_dataframe(table_with_time_column_path, table_name="table2", column_names=["region_id", "temperature", "humidity"])
+    assert list(df.columns)[0] == "id"
+    assert len(df) == 25
+    assert math.isclose(df["temperature"].sum(), 2.5, rel_tol=1e-9)
+    assert (df["region_id"] == "loc").sum() == 25
+
+    df = ts.to_dataframe(table_with_time_column_path, table_name="table2", column_names=["id", "temperature", "humidity"])
+    assert list(df.columns)[0] == "time"
+    assert df["id"].equals(df["time"])
+    assert len(df) == 25
+    assert math.isclose(df["temperature"].sum(), 2.5, rel_tol=1e-9)
+    assert math.isclose(df["humidity"].sum(), 2.5, rel_tol=1e-9)
+
diff --git a/python/tests/test_to_tsfile.py b/python/tests/test_to_tsfile.py
index a35d5e890..4e0481883 100644
--- a/python/tests/test_to_tsfile.py
+++ b/python/tests/test_to_tsfile.py
@@ -132,11 +132,11 @@ def test_dataframe_to_tsfile_custom_time_column():
         dataframe_to_tsfile(df, tsfile_path, table_name="test_table", time_column="timestamp")
 
         df_read = to_dataframe(tsfile_path, table_name="test_table")
-        df_read = df_read.sort_values(TIME_COLUMN).reset_index(drop=True)
+        df_read = df_read.sort_values("timestamp").reset_index(drop=True)
         df_sorted = convert_to_nullable_types(df.sort_values('timestamp').reset_index(drop=True))
 
         assert df_read.shape == (30, 3)
-        assert df_read[TIME_COLUMN].equals(df_sorted["timestamp"])
+        assert df_read["timestamp"].equals(df_sorted["timestamp"])
         assert df_read["device"].equals(df_sorted["device"])
         assert df_read["value"].equals(df_sorted["value"])
     finally:
diff --git a/python/tsfile/schema.py b/python/tsfile/schema.py
index d8671a33c..c89649bf3 100644
--- a/python/tsfile/schema.py
+++ b/python/tsfile/schema.py
@@ -119,7 +119,7 @@ def __init__(self, table_name: str, columns: List[ColumnSchema]):
         self.table_name = table_name.lower()
         if len(columns) == 0:
             raise ValueError("Columns cannot be empty")
-        self.columns = []
+        self.columns = columns
         for column in columns:
             if column.get_category() == ColumnCategory.TIME:
                 if self.time_column is not None:
@@ -128,8 +128,6 @@ def __init__(self, table_name: str, columns: List[ColumnSchema]):
                         f"'{self.time_column.get_column_name()}' and '{column.get_column_name()}'"
                     )
                 self.time_column = column
-            else:
-                self.columns.append(column)
 
     def get_table_name(self):
         return self.table_name
diff --git a/python/tsfile/utils.py b/python/tsfile/utils.py
index 4366ef5be..6044ddbb6 100644
--- a/python/tsfile/utils.py
+++ b/python/tsfile/utils.py
@@ -115,22 +115,21 @@ def _gen(is_iterator: bool) -> Iterator[pd.DataFrame]:
             table_schema = reader.get_all_table_schemas()
 
             is_tree_model = len(table_schema) == 0
-
+            time_column = None
             if is_tree_model:
                 if _column_names is None:
                     print("columns name is None, return all columns")
             else:
                 if _table_name is None:
-                    _table_name, columns = next(iter(table_schema.items()))
+                    _table_name, table_schema = next(iter(table_schema.items()))
                 else:
                     _table_name = _table_name.lower()
                     if _table_name.lower() not in table_schema:
                         raise TableNotExistError(_table_name)
-                    columns = table_schema[_table_name]
+                    table_schema = table_schema[_table_name]
 
                 column_names_in_file = []
-                time_column = None
-                for column in columns:
+                for column in table_schema.get_columns():
                     if column.get_category() == ColumnCategory.TIME:
                         time_column = column.get_column_name()
                     else: