From 44b4473c25585afa4e77cc9a3e3720b920bdd6d0 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 24 Nov 2025 20:55:38 +0000
Subject: [PATCH 01/59] Begin examining how to best add structured array
 support to Zarr v3 driver

---
 tensorstore/driver/zarr3/BUILD            |  21 +-
 tensorstore/driver/zarr3/driver.cc        |  41 +-
 tensorstore/driver/zarr3/dtype.cc         | 298 +++++++++++++
 tensorstore/driver/zarr3/dtype.h          | 144 ++++++
 tensorstore/driver/zarr3/dtype_test.cc    | 293 ++++++++++++
 tensorstore/driver/zarr3/metadata.cc      | 514 ++++++++++++++++------
 tensorstore/driver/zarr3/metadata.h       |  51 ++-
 tensorstore/driver/zarr3/metadata_test.cc |  45 +-
 8 files changed, 1251 insertions(+), 156 deletions(-)
 create mode 100644 tensorstore/driver/zarr3/dtype.cc
 create mode 100644 tensorstore/driver/zarr3/dtype.h
 create mode 100644 tensorstore/driver/zarr3/dtype_test.cc
diff --git a/tensorstore/driver/zarr3/BUILD b/tensorstore/driver/zarr3/BUILD
index 6e0613d5b..d67f58935 100644
--- a/tensorstore/driver/zarr3/BUILD
+++ b/tensorstore/driver/zarr3/BUILD
@@ -94,8 +94,8 @@ tensorstore_cc_library(
 
 tensorstore_cc_library(
     name = "metadata",
-    srcs = ["metadata.cc"],
-    hdrs = ["metadata.h"],
+    srcs = ["metadata.cc", "dtype.cc"],
+    hdrs = ["metadata.h", "dtype.h"],
     deps = [
         ":default_nan",
         ":name_configuration_json_binder",
@@ -145,6 +145,23 @@ tensorstore_cc_library(
     ],
 )
 
+tensorstore_cc_test(
+    name = "dtype_test",
+    size = "small",
+    srcs = ["dtype_test.cc"],
+    deps = [
+        ":metadata",
+        "//tensorstore:data_type",
+        "//tensorstore:index",
+        "//tensorstore/internal/testing:json_gtest",
+        "//tensorstore/util:status_testutil",
+        "//tensorstore/util:str_cat",
+        "@abseil-cpp//absl/status",
+        "@googletest//:gtest_main",
+        "@nlohmann_json//:json",
+    ],
+)
+
 tensorstore_cc_test(
     name = "driver_test",
     size = "small",
diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index a516c1a7b..15faced0a 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -121,8 +121,19 @@ class ZarrDriverSpec
           "metadata",
           jb::Validate(
               [](const auto& options, auto* obj) {
-                TENSORSTORE_RETURN_IF_ERROR(obj->schema.Set(
-                    obj->metadata_constraints.data_type.value_or(DataType())));
+                if (obj->metadata_constraints.data_type) {
+                  if (auto dtype = GetScalarDataType(
+                          *obj->metadata_constraints.data_type)) {
+                    TENSORSTORE_RETURN_IF_ERROR(obj->schema.Set(*dtype));
+                  } else if (obj->schema.dtype().valid()) {
+                    return absl::InvalidArgumentError(
+                        "schema dtype must be unspecified for structured "
+                        "zarr3 data types");
+                  } else {
+                    // Leave dtype unspecified; structured dtypes are handled
+                    // at metadata level only.
+                  }
+                }
                 TENSORSTORE_RETURN_IF_ERROR(obj->schema.Set(
                     RankConstraint{obj->metadata_constraints.rank}));
                 return absl::OkStatus();
@@ -146,8 +157,8 @@ class ZarrDriverSpec
     SharedArray<const void> fill_value{schema.fill_value()};
 
     const auto& metadata = metadata_constraints;
-    if (metadata.fill_value) {
-      fill_value = *metadata.fill_value;
+    if (metadata.fill_value && !metadata.fill_value->empty()) {
+      fill_value = (*metadata.fill_value)[0];
     }
 
     return fill_value;
@@ -274,8 +285,10 @@ class DataCacheBase
 
   static internal::ChunkGridSpecification GetChunkGridSpecification(
       const ZarrMetadata& metadata) {
-    auto fill_value =
-        BroadcastArray(metadata.fill_value, BoxView<>(metadata.rank)).value();
+    assert(!metadata.fill_value.empty());
+    auto fill_value = BroadcastArray(metadata.fill_value[0],
+                                     BoxView<>(metadata.rank))
+                          .value();
     internal::ChunkGridSpecification::ComponentList components;
     auto& component = components.emplace_back(
         internal::AsyncWriteArray::Spec{
@@ -402,9 +415,16 @@ class DataCacheBase
       const void* metadata_ptr, size_t component_index) override {
     const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
     ChunkLayout chunk_layout;
+    SpecRankAndFieldInfo info;
+    info.chunked_rank = metadata.rank;
+    if (!metadata.data_type.fields.empty()) {
+      info.field = &metadata.data_type.fields[0];
+    }
+    std::optional<span<const Index>> chunk_shape_span;
+    chunk_shape_span.emplace(metadata.chunk_shape.data(),
+                             metadata.chunk_shape.size());
     TENSORSTORE_RETURN_IF_ERROR(SetChunkLayoutFromMetadata(
-        metadata.data_type, metadata.rank, metadata.chunk_shape,
-        &metadata.codec_specs, chunk_layout));
+        info, chunk_shape_span, &metadata.codec_specs, chunk_layout));
     TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Finalize());
     return chunk_layout;
   }
@@ -470,7 +490,10 @@ class ZarrDriver : public ZarrDriverBase {
   Result<SharedArray<const void>> GetFillValue(
       IndexTransformView<> transform) override {
     const auto& metadata = this->metadata();
-    return metadata.fill_value;
+    if (metadata.fill_value.empty()) {
+      return SharedArray<const void>();
+    }
+    return metadata.fill_value[0];
   }
 
   Future<ArrayStorageStatistics> GetStorageStatistics(
diff --git a/tensorstore/driver/zarr3/dtype.cc b/tensorstore/driver/zarr3/dtype.cc
new file mode 100644
index 000000000..8d1c9d49e
--- /dev/null
+++ b/tensorstore/driver/zarr3/dtype.cc
@@ -0,0 +1,298 @@
+// Copyright 2020 The TensorStore Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tensorstore/driver/zarr3/dtype.h"
+
+#include <stddef.h>
+
+#include <string>
+
+#include "absl/base/optimization.h"
+#include "tensorstore/data_type.h"
+#include "tensorstore/internal/json_binding/json_binding.h"
+#include "tensorstore/util/endian.h"
+#include "tensorstore/util/extents.h"
+#include "tensorstore/util/quote_string.h"
+#include "tensorstore/util/str_cat.h"
+
+namespace tensorstore {
+namespace internal_zarr3 {
+
+Result<ZarrDType::BaseDType> ParseBaseDType(std::string_view dtype) {
+  using D = ZarrDType::BaseDType;
+  const auto make_dtype = [&](DataType result_dtype) -> Result<D> {
+    return D{std::string(dtype), result_dtype, {}};
+  };
+
+  if (dtype == "bool") return make_dtype(dtype_v<bool>);
+  if (dtype == "uint8") return make_dtype(dtype_v<uint8_t>);
+  if (dtype == "uint16") return make_dtype(dtype_v<uint16_t>);
+  if (dtype == "uint32") return make_dtype(dtype_v<uint32_t>);
+  if (dtype == "uint64") return make_dtype(dtype_v<uint64_t>);
+  if (dtype == "int8") return make_dtype(dtype_v<int8_t>);
+  if (dtype == "int16") return make_dtype(dtype_v<int16_t>);
+  if (dtype == "int32") return make_dtype(dtype_v<int32_t>);
+  if (dtype == "int64") return make_dtype(dtype_v<int64_t>);
+  if (dtype == "bfloat16")
+    return make_dtype(dtype_v<::tensorstore::dtypes::bfloat16_t>);
+  if (dtype == "float16")
+    return make_dtype(dtype_v<::tensorstore::dtypes::float16_t>);
+  if (dtype == "float32")
+    return make_dtype(dtype_v<::tensorstore::dtypes::float32_t>);
+  if (dtype == "float64")
+    return make_dtype(dtype_v<::tensorstore::dtypes::float64_t>);
+  if (dtype == "complex64")
+    return make_dtype(dtype_v<::tensorstore::dtypes::complex64_t>);
+  if (dtype == "complex128")
+    return make_dtype(dtype_v<::tensorstore::dtypes::complex128_t>);
+
+  constexpr std::string_view kSupported =
+      "bool, uint8, uint16, uint32, uint64, int8, int16, int32, int64, "
+      "bfloat16, float16, float32, float64, complex64, complex128";
+  return absl::InvalidArgumentError(
+      tensorstore::StrCat(dtype, " data type is not one of the supported "
+                                 "data types: ",
+                          kSupported));
+}
+
+namespace {
+
+/// Parses a zarr metadata "dtype" JSON specification, but does not compute any
+/// derived values, and does not check for duplicate field names.
+///
+/// This is called by `ParseDType`.
+///
+/// \param value The zarr metadata "dtype" JSON specification.
+/// \param out[out] Must be non-null.  Filled with the parsed dtype on success.
+/// \error `absl::StatusCode::kInvalidArgument' if `value` is invalid.
+Result<ZarrDType> ParseDTypeNoDerived(const nlohmann::json& value) {
+  ZarrDType out;
+  if (value.is_string()) {
+    // Single field.
+    out.has_fields = false;
+    out.fields.resize(1);
+    TENSORSTORE_ASSIGN_OR_RETURN(
+        static_cast<ZarrDType::BaseDType&>(out.fields[0]),
+        ParseBaseDType(value.get<std::string>()));
+    return out;
+  }
+  out.has_fields = true;
+  auto parse_result = internal_json::JsonParseArray(
+      value,
+      [&](ptrdiff_t size) {
+        out.fields.resize(size);
+        return absl::OkStatus();
+      },
+      [&](const ::nlohmann::json& x, ptrdiff_t field_i) {
+        auto& field = out.fields[field_i];
+        return internal_json::JsonParseArray(
+            x,
+            [&](ptrdiff_t size) {
+              if (size < 2 || size > 3) {
+                return absl::InvalidArgumentError(tensorstore::StrCat(
+                    "Expected array of size 2 or 3, but received: ", x.dump()));
+              }
+              return absl::OkStatus();
+            },
+            [&](const ::nlohmann::json& v, ptrdiff_t i) {
+              switch (i) {
+                case 0:
+                  if (internal_json::JsonRequireValueAs(v, &field.name).ok()) {
+                    if (!field.name.empty()) return absl::OkStatus();
+                  }
+                  return absl::InvalidArgumentError(tensorstore::StrCat(
+                      "Expected non-empty string, but received: ", v.dump()));
+                case 1: {
+                  std::string dtype_string;
+                  TENSORSTORE_RETURN_IF_ERROR(
+                      internal_json::JsonRequireValueAs(v, &dtype_string));
+                  TENSORSTORE_ASSIGN_OR_RETURN(
+                      static_cast<ZarrDType::BaseDType&>(field),
+                      ParseBaseDType(dtype_string));
+                  return absl::OkStatus();
+                }
+                case 2: {
+                  return internal_json::JsonParseArray(
+                      v,
+                      [&](ptrdiff_t size) {
+                        field.outer_shape.resize(size);
+                        return absl::OkStatus();
+                      },
+                      [&](const ::nlohmann::json& x, ptrdiff_t j) {
+                        return internal_json::JsonRequireInteger(
+                            x, &field.outer_shape[j], /*strict=*/true, 1,
+                            kInfIndex);
+                      });
+                }
+                default:
+                  ABSL_UNREACHABLE();  // COV_NF_LINE
+              }
+            });
+      });
+  if (!parse_result.ok()) return parse_result;
+  return out;
+}
+
+}  // namespace
+
+absl::Status ValidateDType(ZarrDType& dtype) {
+  dtype.bytes_per_outer_element = 0;
+  for (size_t field_i = 0; field_i < dtype.fields.size(); ++field_i) {
+    auto& field = dtype.fields[field_i];
+    if (std::any_of(
+            dtype.fields.begin(), dtype.fields.begin() + field_i,
+            [&](const ZarrDType::Field& f) { return f.name == field.name; })) {
+      return absl::InvalidArgumentError(tensorstore::StrCat(
+          "Field name ", QuoteString(field.name), " occurs more than once"));
+    }
+    field.field_shape.resize(field.flexible_shape.size() +
+                             field.outer_shape.size());
+    std::copy(field.flexible_shape.begin(), field.flexible_shape.end(),
+              std::copy(field.outer_shape.begin(), field.outer_shape.end(),
+                        field.field_shape.begin()));
+
+    field.num_inner_elements = ProductOfExtents(span(field.field_shape));
+    if (field.num_inner_elements == std::numeric_limits<Index>::max()) {
+      return absl::InvalidArgumentError(tensorstore::StrCat(
+          "Product of dimensions ", span(field.field_shape), " is too large"));
+    }
+    if (internal::MulOverflow(field.num_inner_elements,
+                              static_cast<Index>(field.dtype->size),
+                              &field.num_bytes)) {
+      return absl::InvalidArgumentError("Field size in bytes is too large");
+    }
+    field.byte_offset = dtype.bytes_per_outer_element;
+    if (internal::AddOverflow(dtype.bytes_per_outer_element, field.num_bytes,
+                              &dtype.bytes_per_outer_element)) {
+      return absl::InvalidArgumentError(
+          "Total number of bytes per outer array element is too large");
+    }
+  }
+  return absl::OkStatus();
+}
+
+std::optional<DataType> GetScalarDataType(const ZarrDType& dtype) {
+  if (!dtype.has_fields && !dtype.fields.empty()) {
+    return dtype.fields[0].dtype;
+  }
+  return std::nullopt;
+}
+
+Result<ZarrDType> ParseDType(const nlohmann::json& value) {
+  TENSORSTORE_ASSIGN_OR_RETURN(ZarrDType dtype, ParseDTypeNoDerived(value));
+  TENSORSTORE_RETURN_IF_ERROR(ValidateDType(dtype));
+  return dtype;
+}
+
+bool operator==(const ZarrDType::BaseDType& a,
+                const ZarrDType::BaseDType& b) {
+  return a.encoded_dtype == b.encoded_dtype && a.dtype == b.dtype &&
+         a.flexible_shape == b.flexible_shape;
+}
+
+bool operator!=(const ZarrDType::BaseDType& a,
+                const ZarrDType::BaseDType& b) {
+  return !(a == b);
+}
+
+bool operator==(const ZarrDType::Field& a, const ZarrDType::Field& b) {
+  return static_cast<const ZarrDType::BaseDType&>(a) ==
+             static_cast<const ZarrDType::BaseDType&>(b) &&
+         a.outer_shape == b.outer_shape && a.name == b.name &&
+         a.field_shape == b.field_shape &&
+         a.num_inner_elements == b.num_inner_elements &&
+         a.byte_offset == b.byte_offset && a.num_bytes == b.num_bytes;
+}
+
+bool operator!=(const ZarrDType::Field& a, const ZarrDType::Field& b) {
+  return !(a == b);
+}
+
+bool operator==(const ZarrDType& a, const ZarrDType& b) {
+  return a.has_fields == b.has_fields &&
+         a.bytes_per_outer_element == b.bytes_per_outer_element &&
+         a.fields == b.fields;
+}
+
+bool operator!=(const ZarrDType& a, const ZarrDType& b) { return !(a == b); }
+
+void to_json(::nlohmann::json& out, const ZarrDType::Field& field) {
+  using array_t = ::nlohmann::json::array_t;
+  if (field.outer_shape.empty()) {
+    out = array_t{field.name, field.encoded_dtype};
+  } else {
+    out = array_t{field.name, field.encoded_dtype, field.outer_shape};
+  }
+}
+
+void to_json(::nlohmann::json& out,  // NOLINT
+             const ZarrDType& dtype) {
+  if (!dtype.has_fields) {
+    out = dtype.fields[0].encoded_dtype;
+  } else {
+    out = dtype.fields;
+  }
+}
+
+TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(ZarrDType, [](auto is_loading,
+                                                     const auto& options,
+                                                     auto* obj, auto* j) {
+  if constexpr (is_loading) {
+    TENSORSTORE_ASSIGN_OR_RETURN(*obj, ParseDType(*j));
+  } else {
+    to_json(*j, *obj);
+  }
+  return absl::OkStatus();
+})
+
+namespace {
+
+Result<ZarrDType::BaseDType> MakeBaseDType(std::string_view name,
+                                           DataType dtype) {
+  ZarrDType::BaseDType base_dtype;
+  base_dtype.dtype = dtype;
+  base_dtype.encoded_dtype = std::string(name);
+  return base_dtype;
+}
+
+}  // namespace
+
+Result<ZarrDType::BaseDType> ChooseBaseDType(DataType dtype) {
+  if (dtype == dtype_v<bool>) return MakeBaseDType("bool", dtype);
+  if (dtype == dtype_v<uint8_t>) return MakeBaseDType("uint8", dtype);
+  if (dtype == dtype_v<uint16_t>) return MakeBaseDType("uint16", dtype);
+  if (dtype == dtype_v<uint32_t>) return MakeBaseDType("uint32", dtype);
+  if (dtype == dtype_v<uint64_t>) return MakeBaseDType("uint64", dtype);
+  if (dtype == dtype_v<int8_t>) return MakeBaseDType("int8", dtype);
+  if (dtype == dtype_v<int16_t>) return MakeBaseDType("int16", dtype);
+  if (dtype == dtype_v<int32_t>) return MakeBaseDType("int32", dtype);
+  if (dtype == dtype_v<int64_t>) return MakeBaseDType("int64", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::bfloat16_t>)
+    return MakeBaseDType("bfloat16", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::float16_t>)
+    return MakeBaseDType("float16", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::float32_t>)
+    return MakeBaseDType("float32", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::float64_t>)
+    return MakeBaseDType("float64", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::complex64_t>)
+    return MakeBaseDType("complex64", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::complex128_t>)
+    return MakeBaseDType("complex128", dtype);
+  return absl::InvalidArgumentError(
+      tensorstore::StrCat("Data type not supported: ", dtype));
+}
+
+}  // namespace internal_zarr3
+}  // namespace tensorstore
diff --git a/tensorstore/driver/zarr3/dtype.h b/tensorstore/driver/zarr3/dtype.h
new file mode 100644
index 000000000..430dd8849
--- /dev/null
+++ b/tensorstore/driver/zarr3/dtype.h
@@ -0,0 +1,144 @@
+// Copyright 2020 The TensorStore Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TENSORSTORE_DRIVER_ZARR3_DTYPE_H_
+#define TENSORSTORE_DRIVER_ZARR3_DTYPE_H_
+
+/// \file
+/// Support for encoding/decoding zarr "dtype" specifications.
+/// See: https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#data-type
+
+#include <optional>
+#include <nlohmann/json.hpp>
+#include "tensorstore/data_type.h"
+#include "tensorstore/internal/json_binding/bindable.h"
+#include "tensorstore/util/endian.h"
+#include "tensorstore/util/result.h"
+
+namespace tensorstore {
+namespace internal_zarr3 {
+
+/// Decoded representation of a zarr "dtype" specification.
+///
+/// A zarr "dtype" is a JSON value that is either:
+///
+/// 1. A string, which specifies a single data type (e.g. "int32").
+///    In this case, the zarr array is considered to have a single, unnamed field.
+///
+/// 2. An array, where each element of the array is of the form:
+///    `[name, type]` or `[name, type, shape]`, where `name` is a JSON
+///    string specifying the unique, non-empty field name, `type` is a data type
+///    string, and `shape` is an optional "inner" array shape (specified
+///    as a JSON array of non-negative integers) which defaults to the rank-0
+///    shape `[]` if not specified.
+///
+/// Each field is encoded according to `type` into a fixed-size sequence of
+/// bytes.  If the optional "inner" array `shape` is specified, the individual
+/// elements are encoded in C order.  The encoding of each multi-field array
+/// element is simply the concatenation of the encodings of each field.
+struct ZarrDType {
+  /// Decoded representation of single value.
+  struct BaseDType {
+    /// Data type string.
+    std::string encoded_dtype;
+
+    /// Corresponding DataType used for in-memory representation.
+    DataType dtype;
+
+    /// For "flexible" data types that are themselves arrays, this specifies the
+    /// shape.  For regular data types, this is empty.
+    std::vector<Index> flexible_shape;
+  };
+
+  /// Decoded representation of a single field.
+  struct Field : public BaseDType {
+    /// Optional `shape` dimensions specified by a zarr "dtype" field specified
+    /// as a JSON array.  If the zarr dtype was specified as a single `typestr`
+    /// value, or as a two-element array, this is empty.
+    std::vector<Index> outer_shape;
+
+    /// Field name.  Must be non-empty and unique if the zarr "dtype" was
+    /// specified as an array.  Otherwise, is empty.
+    std::string name;
+
+    /// The inner array dimensions of this field, equal to the concatenation of
+    ///  `outer_shape` and `flexible_shape` (derived value).
+    std::vector<Index> field_shape;
+
+    /// Product of `field_shape` dimensions (derived value).
+    Index num_inner_elements;
+
+    /// Byte offset of this field within an "outer" element (derived value).
+    Index byte_offset;
+
+    /// Number of bytes occupied by this field within an "outer" element
+    /// (derived value).
+    Index num_bytes;
+  };
+
+  /// Equal to `true` if the zarr "dtype" was specified as an array, in which
+  /// case all fields must have a unique, non-empty `name`.  If `false`, there
+  /// must be a single field with an empty `name`.
+  bool has_fields;
+
+  /// Decoded representation of the fields.
+  std::vector<Field> fields;
+
+  /// Bytes per "outer" element (derived value).
+  Index bytes_per_outer_element;
+
+  TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER(ZarrDType,
+                                          internal_json_binding::NoOptions)
+
+  friend void to_json(::nlohmann::json& out,  // NOLINT
+                      const ZarrDType& dtype);
+};
+
+bool operator==(const ZarrDType::BaseDType& a,
+                const ZarrDType::BaseDType& b);
+bool operator!=(const ZarrDType::BaseDType& a,
+                const ZarrDType::BaseDType& b);
+bool operator==(const ZarrDType::Field& a, const ZarrDType::Field& b);
+bool operator!=(const ZarrDType::Field& a, const ZarrDType::Field& b);
+bool operator==(const ZarrDType& a, const ZarrDType& b);
+bool operator!=(const ZarrDType& a, const ZarrDType& b);
+
+/// Parses a zarr metadata "dtype" JSON specification.
+///
+/// \error `absl::StatusCode::kInvalidArgument` if `value` is not valid.
+Result<ZarrDType> ParseDType(const ::nlohmann::json& value);
+
+/// Validates `dtype and computes derived values.
+///
+/// \error `absl::StatusCode::kInvalidArgument` if two fields have the same
+///     name.
+/// \error `absl::StatusCode::kInvalidArgument` if the field size is too large.
+absl::Status ValidateDType(ZarrDType& dtype);
+
+/// Returns the underlying TensorStore `DataType` if `dtype` represents an
+/// unstructured scalar array, otherwise `std::nullopt`.
+std::optional<DataType> GetScalarDataType(const ZarrDType& dtype);
+
+  /// Parses a Zarr 3 data type string.
+  ///
+  /// \error `absl::StatusCode::kInvalidArgument` if `dtype` is not valid.
+  Result<ZarrDType::BaseDType> ParseBaseDType(std::string_view dtype);
+
+  /// Chooses a zarr data type corresponding to `dtype`.
+  Result<ZarrDType::BaseDType> ChooseBaseDType(DataType dtype);
+
+}  // namespace internal_zarr3
+}  // namespace tensorstore
+
+#endif  // TENSORSTORE_DRIVER_ZARR3_DTYPE_H_
diff --git a/tensorstore/driver/zarr3/dtype_test.cc b/tensorstore/driver/zarr3/dtype_test.cc
new file mode 100644
index 000000000..cbb7acbfb
--- /dev/null
+++ b/tensorstore/driver/zarr3/dtype_test.cc
@@ -0,0 +1,293 @@
+// Copyright 2023 The TensorStore Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tensorstore/driver/zarr3/dtype.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cstddef>  // for std::byte
+#include <string>
+#include <vector>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "absl/status/status.h"
+#include <nlohmann/json.hpp>
+#include "tensorstore/data_type.h"
+#include "tensorstore/index.h"
+#include "tensorstore/internal/testing/json_gtest.h"
+#include "tensorstore/util/status_testutil.h"
+#include "tensorstore/util/str_cat.h"
+
+namespace {
+
+using ::tensorstore::DataType;
+using ::tensorstore::dtype_v;
+using ::tensorstore::Index;
+using ::tensorstore::kInfIndex;
+using ::tensorstore::StatusIs;
+using ::tensorstore::internal_zarr3::ChooseBaseDType;
+using ::tensorstore::internal_zarr3::ParseBaseDType;
+using ::tensorstore::internal_zarr3::ParseDType;
+using ::tensorstore::internal_zarr3::ZarrDType;
+using ::testing::HasSubstr;
+using ::testing::MatchesRegex;
+
+void CheckBaseDType(std::string dtype, DataType r,
+                    std::vector<Index> flexible_shape) {
+  EXPECT_THAT(ParseBaseDType(dtype), ::testing::Optional(ZarrDType::BaseDType{
+                                         dtype, r, flexible_shape}))
+      << dtype;
+}
+
+TEST(ParseBaseDType, Success) {
+  CheckBaseDType("bool", dtype_v<bool>, {});
+  CheckBaseDType("int8", dtype_v<int8_t>, {});
+  CheckBaseDType("uint8", dtype_v<uint8_t>, {});
+  CheckBaseDType("int16", dtype_v<int16_t>, {});
+  CheckBaseDType("uint16", dtype_v<uint16_t>, {});
+  CheckBaseDType("int32", dtype_v<int32_t>, {});
+  CheckBaseDType("uint32", dtype_v<uint32_t>, {});
+  CheckBaseDType("int64", dtype_v<int64_t>, {});
+  CheckBaseDType("uint64", dtype_v<uint64_t>, {});
+  CheckBaseDType("float16", dtype_v<tensorstore::dtypes::float16_t>, {});
+  CheckBaseDType("bfloat16", dtype_v<tensorstore::dtypes::bfloat16_t>, {});
+  CheckBaseDType("float32", dtype_v<tensorstore::dtypes::float32_t>, {});
+  CheckBaseDType("float64", dtype_v<tensorstore::dtypes::float64_t>, {});
+  CheckBaseDType("complex64", dtype_v<tensorstore::dtypes::complex64_t>, {});
+  CheckBaseDType("complex128", dtype_v<tensorstore::dtypes::complex128_t>, {});
+}
+
+TEST(ParseBaseDType, Failure) {
+  EXPECT_THAT(
+      ParseBaseDType(""),
+      StatusIs(absl::StatusCode::kInvalidArgument,
+               HasSubstr("data type is not one of the supported data types")));
+  EXPECT_THAT(ParseBaseDType("float"),
+              StatusIs(absl::StatusCode::kInvalidArgument));
+  EXPECT_THAT(ParseBaseDType("string"),
+              StatusIs(absl::StatusCode::kInvalidArgument));
+  EXPECT_THAT(ParseBaseDType("<i4"),
+              StatusIs(absl::StatusCode::kInvalidArgument));
+}
+
+void CheckDType(const ::nlohmann::json& json, const ZarrDType& expected) {
+  SCOPED_TRACE(json.dump());
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto dtype, ParseDType(json));
+  EXPECT_EQ(expected, dtype);
+  // Check round trip.
+  EXPECT_EQ(json, ::nlohmann::json(dtype));
+}
+
+TEST(ParseDType, SimpleStringBool) {
+  CheckDType("bool", ZarrDType{
+                         /*.has_fields=*/false,
+                         /*.fields=*/
+                         {
+                             {{
+                                  /*.encoded_dtype=*/"bool",
+                                  /*.dtype=*/dtype_v<bool>,
+                                  /*.flexible_shape=*/{},
+                              },
+                              /*.outer_shape=*/{},
+                              /*.name=*/"",
+                              /*.field_shape=*/{},
+                              /*.num_inner_elements=*/1,
+                              /*.byte_offset=*/0,
+                              /*.num_bytes=*/1},
+                         },
+                         /*.bytes_per_outer_element=*/1,
+                     });
+}
+
+TEST(ParseDType, SingleNamedFieldChar) {
+  // Zarr 3 doesn't support fixed size strings natively in core, so we use uint8 for testing bytes
+  CheckDType(::nlohmann::json::array_t{{"x", "uint8"}},
+             ZarrDType{
+                 /*.has_fields=*/true,
+                 /*.fields=*/
+                 {
+                     {{
+                          /*.encoded_dtype=*/"uint8",
+                          /*.dtype=*/dtype_v<uint8_t>,
+                          /*.flexible_shape=*/{},
+                      },
+                      /*.outer_shape=*/{},
+                      /*.name=*/"x",
+                      /*.field_shape=*/{},
+                      /*.num_inner_elements=*/1,
+                      /*.byte_offset=*/0,
+                      /*.num_bytes=*/1},
+                 },
+                 /*.bytes_per_outer_element=*/1,
+             });
+}
+
+TEST(ParseDType, TwoNamedFields) {
+  CheckDType(
+      ::nlohmann::json::array_t{{"x", "int8", {2, 3}}, {"y", "int16", {5}}},
+      ZarrDType{
+          /*.has_fields=*/true,
+          /*.fields=*/
+          {
+              {{
+                   /*.encoded_dtype=*/"int8",
+                   /*.dtype=*/dtype_v<int8_t>,
+                   /*.flexible_shape=*/{},
+               },
+               /*.outer_shape=*/{2, 3},
+               /*.name=*/"x",
+               /*.field_shape=*/{2, 3},
+               /*.num_inner_elements=*/2 * 3,
+               /*.byte_offset=*/0,
+               /*.num_bytes=*/1 * 2 * 3},
+              {{
+                   /*.encoded_dtype=*/"int16",
+                   /*.dtype=*/dtype_v<int16_t>,
+                   /*.flexible_shape=*/{},
+               },
+               /*.outer_shape=*/{5},
+               /*.name=*/"y",
+               /*.field_shape=*/{5},
+               /*.num_inner_elements=*/5,
+               /*.byte_offset=*/1 * 2 * 3,
+               /*.num_bytes=*/2 * 5},
+          },
+          /*.bytes_per_outer_element=*/1 * 2 * 3 + 2 * 5,
+      });
+}
+
+TEST(ParseDType, FieldSpecTooShort) {
+  EXPECT_THAT(
+      ParseDType(::nlohmann::json::array_t{{"x"}}),
+      StatusIs(
+          absl::StatusCode::kInvalidArgument,
+          HasSubstr("Error parsing value at position 0: "
+                    "Expected array of size 2 or 3, but received: [\"x\"]")));
+}
+
+TEST(ParseDType, FieldSpecTooLong) {
+  EXPECT_THAT(
+      ParseDType(::nlohmann::json::array_t{{"x", "int16", {2, 3}, 5}}),
+      StatusIs(
+          absl::StatusCode::kInvalidArgument,
+          HasSubstr("Error parsing value at position 0: "
+                    "Expected array of size 2 or 3, but received: "
+                    "[\"x\",\"int16\",[2,3],5]")));
+}
+
+TEST(ParseDType, InvalidFieldName) {
+  EXPECT_THAT(
+      ParseDType(::nlohmann::json::array_t{{3, "int16"}}),
+      StatusIs(absl::StatusCode::kInvalidArgument,
+               HasSubstr("Error parsing value at position 0: "
+                         "Error parsing value at position 0: "
+                         "Expected non-empty string, but received: 3")));
+}
+
+TEST(ParseDType, EmptyFieldName) {
+  EXPECT_THAT(
+      ParseDType(::nlohmann::json::array_t{{"", "int16"}}),
+      StatusIs(absl::StatusCode::kInvalidArgument,
+               HasSubstr("Error parsing value at position 0: "
+                         "Error parsing value at position 0: "
+                         "Expected non-empty string, but received: \"\"")));
+}
+
+TEST(ParseDType, DuplicateFieldName) {
+  EXPECT_THAT(
+      ParseDType(::nlohmann::json::array_t{{"x", "int16"}, {"x", "uint16"}}),
+      StatusIs(absl::StatusCode::kInvalidArgument,
+               HasSubstr("Field name \"x\" occurs more than once")));
+}
+
+TEST(ParseDType, NonStringFieldBaseDType) {
+  EXPECT_THAT(ParseDType(::nlohmann::json::array_t{{"x", 3}}),
+              StatusIs(absl::StatusCode::kInvalidArgument,
+                       HasSubstr("Error parsing value at position 0: "
+                                 "Error parsing value at position 1: "
+                                 "Expected string, but received: 3")));
+}
+
+TEST(ParseDType, InvalidFieldBaseDType) {
+  EXPECT_THAT(ParseDType(::nlohmann::json::array_t{{"x", "unknown"}}),
+              StatusIs(absl::StatusCode::kInvalidArgument,
+                       HasSubstr("Error parsing value at position 0: "
+                                 "Error parsing value at position 1: "
+                                 "unknown data type is not one of the "
+                                 "supported data types")));
+}
+
+TEST(ParseDType, ProductOfDimensionsOverflow) {
+  EXPECT_THAT(
+      ParseDType(
+          ::nlohmann::json::array_t{{"x", "int8", {kInfIndex, kInfIndex}}}),
+      StatusIs(absl::StatusCode::kInvalidArgument,
+               MatchesRegex(".*Product of dimensions .* is too large.*")));
+}
+
+TEST(ParseDType, FieldSizeInBytesOverflow) {
+  EXPECT_THAT(
+      ParseDType(::nlohmann::json::array_t{{"x", "float64", {kInfIndex}}}),
+      StatusIs(absl::StatusCode::kInvalidArgument,
+               HasSubstr("Field size in bytes is too large")));
+}
+
+TEST(ParseDType, BytesPerOuterElementOverflow) {
+  EXPECT_THAT(
+      ParseDType(::nlohmann::json::array_t{{"x", "int16", {kInfIndex}},
+                                           {"y", "int16", {kInfIndex}}}),
+      StatusIs(
+          absl::StatusCode::kInvalidArgument,
+          HasSubstr(
+              "Total number of bytes per outer array element is too large")));
+}
+
+TEST(ChooseBaseDTypeTest, RoundTrip) {
+  constexpr tensorstore::DataType kSupportedDataTypes[] = {
+      dtype_v<bool>, dtype_v<uint8_t>, dtype_v<uint16_t>, dtype_v<uint32_t>,
+      dtype_v<uint64_t>, dtype_v<int8_t>, dtype_v<int16_t>,
+      dtype_v<int32_t>,  dtype_v<int64_t>,
+      dtype_v<tensorstore::dtypes::bfloat16_t>,
+      dtype_v<tensorstore::dtypes::float16_t>,
+      dtype_v<tensorstore::dtypes::float32_t>,
+      dtype_v<tensorstore::dtypes::float64_t>,
+      dtype_v<tensorstore::dtypes::complex64_t>,
+      dtype_v<tensorstore::dtypes::complex128_t>,
+  };
+  for (auto dtype : kSupportedDataTypes) {
+    SCOPED_TRACE(tensorstore::StrCat("dtype=", dtype));
+    TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto base_zarr_dtype,
+                                     ChooseBaseDType(dtype));
+    EXPECT_EQ(dtype, base_zarr_dtype.dtype);
+    TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+        auto parsed, ParseBaseDType(base_zarr_dtype.encoded_dtype));
+    EXPECT_EQ(dtype, parsed.dtype);
+    EXPECT_EQ(base_zarr_dtype.flexible_shape, parsed.flexible_shape);
+    EXPECT_EQ(base_zarr_dtype.encoded_dtype, parsed.encoded_dtype);
+  }
+}
+
+TEST(ChooseBaseDTypeTest, Invalid) {
+  struct X {};
+  EXPECT_THAT(ChooseBaseDType(dtype_v<X>),
+              StatusIs(absl::StatusCode::kInvalidArgument,
+                       HasSubstr("Data type not supported")));
+  EXPECT_THAT(ChooseBaseDType(dtype_v<::tensorstore::dtypes::string_t>),
+              StatusIs(absl::StatusCode::kInvalidArgument,
+                       HasSubstr("Data type not supported: string")));
+}
+
+}  // namespace
diff --git a/tensorstore/driver/zarr3/metadata.cc b/tensorstore/driver/zarr3/metadata.cc
index 528d373ae..c96c31426 100644
--- a/tensorstore/driver/zarr3/metadata.cc
+++ b/tensorstore/driver/zarr3/metadata.cc
@@ -50,6 +50,7 @@
 #include "tensorstore/driver/zarr3/codec/codec_spec.h"
 #include "tensorstore/driver/zarr3/codec/sharding_indexed.h"
 #include "tensorstore/driver/zarr3/default_nan.h"
+#include "tensorstore/driver/zarr3/dtype.h"
 #include "tensorstore/driver/zarr3/name_configuration_json_binder.h"
 #include "tensorstore/index.h"
 #include "tensorstore/index_space/dimension_units.h"
@@ -252,24 +253,110 @@ constexpr std::array<FillValueDataTypeFunctions, kNumDataTypeIds>
 
 }  // namespace
 
-absl::Status FillValueJsonBinder::operator()(std::true_type is_loading,
-                                             internal_json_binding::NoOptions,
-                                             SharedArray<const void>* obj,
-                                             ::nlohmann::json* j) const {
+FillValueJsonBinder::FillValueJsonBinder(ZarrDType dtype,
+                                         bool allow_missing_dtype)
+    : dtype(std::move(dtype)), allow_missing_dtype(allow_missing_dtype) {}
+
+FillValueJsonBinder::FillValueJsonBinder(DataType data_type,
+                                         bool allow_missing_dtype)
+    : allow_missing_dtype(allow_missing_dtype) {
+  dtype.has_fields = false;
+  dtype.fields.resize(1);
+  auto& field = dtype.fields[0];
+  field.name.clear();
+  field.outer_shape.clear();
+  field.flexible_shape.clear();
+  field.field_shape.clear();
+  field.num_inner_elements = 1;
+  field.byte_offset = 0;
+  field.num_bytes = data_type->size;
+  field.dtype = data_type;
+  field.encoded_dtype = std::string(data_type.name());
+}
+
+absl::Status FillValueJsonBinder::operator()(
+    std::true_type is_loading, internal_json_binding::NoOptions,
+    std::vector<SharedArray<const void>>* obj, ::nlohmann::json* j) const {
+  obj->resize(dtype.fields.size());
+  if (dtype.fields.size() == 1) {
+    TENSORSTORE_RETURN_IF_ERROR(
+        DecodeSingle(*j, dtype.fields[0].dtype, (*obj)[0]));
+  } else {
+    if (!j->is_array()) {
+      return internal_json::ExpectedError(*j, "array");
+    }
+    if (j->size() != dtype.fields.size()) {
+      return internal_json::ExpectedError(
+          *j, tensorstore::StrCat("array of size ", dtype.fields.size()));
+    }
+    for (size_t i = 0; i < dtype.fields.size(); ++i) {
+      TENSORSTORE_RETURN_IF_ERROR(
+          DecodeSingle((*j)[i], dtype.fields[i].dtype, (*obj)[i]));
+    }
+  }
+  return absl::OkStatus();
+}
+
+absl::Status FillValueJsonBinder::operator()(
+    std::false_type is_loading, internal_json_binding::NoOptions,
+    const std::vector<SharedArray<const void>>* obj,
+    ::nlohmann::json* j) const {
+  if (dtype.fields.size() == 1) {
+    return EncodeSingle((*obj)[0], dtype.fields[0].dtype, *j);
+  }
+  // Structured fill value
+  *j = ::nlohmann::json::array();
+  for (size_t i = 0; i < dtype.fields.size(); ++i) {
+    ::nlohmann::json item;
+    TENSORSTORE_RETURN_IF_ERROR(
+        EncodeSingle((*obj)[i], dtype.fields[i].dtype, item));
+    j->push_back(std::move(item));
+  }
+  return absl::OkStatus();
+}
+
+absl::Status FillValueJsonBinder::DecodeSingle(::nlohmann::json& j,
+                                               DataType data_type,
+                                               SharedArray<const void>& out) const {
+  if (!data_type.valid()) {
+    if (allow_missing_dtype) {
+      out = SharedArray<const void>();
+      return absl::OkStatus();
+    }
+    return absl::InvalidArgumentError(
+        "data_type must be specified before fill_value");
+  }
   auto arr =
       AllocateArray(span<const Index, 0>{}, c_order, default_init, data_type);
   void* data = arr.data();
-  *obj = std::move(arr);
-  return kFillValueDataTypeFunctions[static_cast<size_t>(data_type.id())]
-      .decode(data, *j);
+  out = std::move(arr);
+  const auto& functions =
+      kFillValueDataTypeFunctions[static_cast<size_t>(data_type.id())];
+  if (!functions.decode) {
+    if (allow_missing_dtype) {
+      out = SharedArray<const void>();
+      return absl::OkStatus();
+    }
+    return absl::FailedPreconditionError(
+        "fill_value unsupported for specified data_type");
+  }
+  return functions.decode(data, j);
 }
 
-absl::Status FillValueJsonBinder::operator()(std::false_type is_loading,
-                                             internal_json_binding::NoOptions,
-                                             const SharedArray<const void>* obj,
-                                             ::nlohmann::json* j) const {
-  return kFillValueDataTypeFunctions[static_cast<size_t>(data_type.id())]
-      .encode(obj->data(), *j);
+absl::Status FillValueJsonBinder::EncodeSingle(
+    const SharedArray<const void>& arr, DataType data_type,
+    ::nlohmann::json& j) const {
+  if (!data_type.valid()) {
+    return absl::InvalidArgumentError(
+        "data_type must be specified before fill_value");
+  }
+  const auto& functions =
+      kFillValueDataTypeFunctions[static_cast<size_t>(data_type.id())];
+  if (!functions.encode) {
+    return absl::FailedPreconditionError(
+        "fill_value unsupported for specified data_type");
+  }
+  return functions.encode(arr.data(), j);
 }
 
 TENSORSTORE_DEFINE_JSON_DEFAULT_BINDER(ChunkKeyEncoding, [](auto is_loading,
@@ -357,7 +444,7 @@ constexpr auto MetadataJsonBinder = [] {
       rank = &obj->rank;
     }
 
-    auto ensure_data_type = [&]() -> Result<DataType> {
+    auto ensure_data_type = [&]() -> Result<ZarrDType> {
       if constexpr (std::is_same_v<Self, ZarrMetadata>) {
         return obj->data_type;
       }
@@ -378,19 +465,18 @@ constexpr auto MetadataJsonBinder = [] {
         maybe_optional_member("node_type",
                               jb::Constant([] { return "array"; })),
         jb::Member("data_type",
-                   jb::Projection<&Self::data_type>(maybe_optional(jb::Validate(
-                       [](const auto& options, auto* obj) {
-                         return ValidateDataType(*obj);
-                       },
-                       jb::DataTypeJsonBinder)))),
+                   jb::Projection<&Self::data_type>(maybe_optional(
+                       jb::DefaultBinder<>))),
         jb::Member(
             "fill_value",
             jb::Projection<&Self::fill_value>(maybe_optional(
                 [&](auto is_loading, const auto& options, auto* obj, auto* j) {
                   TENSORSTORE_ASSIGN_OR_RETURN(auto data_type,
                                                ensure_data_type());
-                  return FillValueJsonBinder{data_type}(is_loading, options,
-                                                        obj, j);
+                  constexpr bool allow_missing_dtype =
+                      std::is_same_v<Self, ZarrMetadata>;
+                  return FillValueJsonBinder{data_type, allow_missing_dtype}(
+                      is_loading, options, obj, j);
                 }))),
         non_compatibility_field(
             jb::Member("shape", jb::Projection<&Self::shape>(
@@ -477,9 +563,28 @@ std::string ZarrMetadata::GetCompatibilityKey() const {
 absl::Status ValidateMetadata(ZarrMetadata& metadata) {
   if (!metadata.codecs) {
     ArrayCodecResolveParameters decoded;
-    decoded.dtype = metadata.data_type;
+    if (metadata.data_type.fields.size() == 1 &&
+        metadata.data_type.fields[0].outer_shape.empty()) {
+      decoded.dtype = metadata.data_type.fields[0].dtype;
+    } else {
+      decoded.dtype = dtype_v<std::byte>;
+      // TODO: Verify this works for structured types.
+      // Zarr2 uses a "scalar" array concept with byte storage for chunks.
+    }
     decoded.rank = metadata.rank;
-    decoded.fill_value = metadata.fill_value;
+    // Fill value for codec resolve might be complex.
+    // Zarr3 codecs usually don't depend on fill value except for some like
+    // "sharding_indexed"? Sharding uses fill_value for missing chunks.
+    if (metadata.fill_value.size() == 1) {
+      decoded.fill_value = metadata.fill_value[0];
+    } else {
+      // How to represent structured fill value for codec?
+      // Sharding expects a single array.
+      // If we use structured type, the "array" is bytes.
+      // We might need to encode the fill value to bytes.
+      // For now, leave empty if multiple fields.
+    }
+
     BytesCodecResolveParameters encoded;
     TENSORSTORE_ASSIGN_OR_RETURN(
         metadata.codecs,
@@ -488,7 +593,14 @@ absl::Status ValidateMetadata(ZarrMetadata& metadata) {
 
   // Get codec chunk layout info.
   ArrayDataTypeAndShapeInfo array_info;
-  array_info.dtype = metadata.data_type;
+  // array_info.dtype used here to validate codec compatibility.
+  if (metadata.data_type.fields.size() == 1 &&
+      metadata.data_type.fields[0].outer_shape.empty()) {
+    array_info.dtype = metadata.data_type.fields[0].dtype;
+  } else {
+    array_info.dtype = dtype_v<std::byte>;
+  }
+
   array_info.rank = metadata.rank;
   std::copy_n(metadata.chunk_shape.begin(), metadata.rank,
               array_info.shape.emplace().begin());
@@ -512,17 +624,34 @@ absl::Status ValidateMetadata(ZarrMetadata& metadata) {
 absl::Status ValidateMetadata(const ZarrMetadata& metadata,
                               const ZarrMetadataConstraints& constraints) {
   using internal::MetadataMismatchError;
-  if (constraints.data_type && *constraints.data_type != metadata.data_type) {
-    return MetadataMismatchError("data_type", constraints.data_type->name(),
-                                 metadata.data_type.name());
-  }
-  if (constraints.fill_value &&
-      !AreArraysIdenticallyEqual(*constraints.fill_value,
-                                 metadata.fill_value)) {
-    auto binder = FillValueJsonBinder{metadata.data_type};
-    auto constraint_json = jb::ToJson(*constraints.fill_value, binder).value();
-    auto metadata_json = jb::ToJson(metadata.fill_value, binder).value();
-    return MetadataMismatchError("fill_value", constraint_json, metadata_json);
+  if (constraints.data_type) {
+    // Compare ZarrDType
+    if (::nlohmann::json(*constraints.data_type) !=
+        ::nlohmann::json(metadata.data_type)) {
+      return MetadataMismatchError(
+          "data_type", ::nlohmann::json(*constraints.data_type).dump(),
+          ::nlohmann::json(metadata.data_type).dump());
+    }
+  }
+  if (constraints.fill_value) {
+    // Compare vector of arrays
+    if (constraints.fill_value->size() != metadata.fill_value.size()) {
+      return MetadataMismatchError("fill_value size",
+                                   constraints.fill_value->size(),
+                                   metadata.fill_value.size());
+    }
+    for (size_t i = 0; i < metadata.fill_value.size(); ++i) {
+      if (!AreArraysIdenticallyEqual((*constraints.fill_value)[i],
+                                     metadata.fill_value[i])) {
+        auto binder = FillValueJsonBinder{metadata.data_type};
+        auto constraint_json =
+            jb::ToJson(*constraints.fill_value, binder).value();
+        auto metadata_json =
+            jb::ToJson(metadata.fill_value, binder).value();
+        return MetadataMismatchError("fill_value", constraint_json,
+                                     metadata_json);
+      }
+    }
   }
   if (constraints.shape && *constraints.shape != metadata.shape) {
     return MetadataMismatchError("shape", *constraints.shape, metadata.shape);
@@ -574,23 +703,64 @@ absl::Status ValidateMetadata(const ZarrMetadata& metadata,
       metadata.unknown_extension_attributes);
 }
 
+namespace {
+std::string GetFieldNames(const ZarrDType& dtype) {
+  std::vector<std::string> field_names;
+  for (const auto& field : dtype.fields) {
+    field_names.push_back(field.name);
+  }
+  return ::nlohmann::json(field_names).dump();
+}
+}  // namespace
+
+Result<size_t> GetFieldIndex(const ZarrDType& dtype,
+                             std::string_view selected_field) {
+  if (selected_field.empty()) {
+    if (dtype.fields.size() != 1) {
+      return absl::FailedPreconditionError(tensorstore::StrCat(
+          "Must specify a \"field\" that is one of: ", GetFieldNames(dtype)));
+    }
+    return 0;
+  }
+  if (!dtype.has_fields) {
+    return absl::FailedPreconditionError(
+        tensorstore::StrCat("Requested field ", QuoteString(selected_field),
+                            " but dtype does not have named fields"));
+  }
+  for (size_t field_index = 0; field_index < dtype.fields.size();
+       ++field_index) {
+    if (dtype.fields[field_index].name == selected_field) return field_index;
+  }
+  return absl::FailedPreconditionError(
+      tensorstore::StrCat("Requested field ", QuoteString(selected_field),
+                          " is not one of: ", GetFieldNames(dtype)));
+}
+
+SpecRankAndFieldInfo GetSpecRankAndFieldInfo(const ZarrMetadata& metadata,
+                                             size_t field_index) {
+  SpecRankAndFieldInfo info;
+  info.chunked_rank = metadata.rank;
+  info.field = &metadata.data_type.fields[field_index];
+  return info;
+}
+
 Result<IndexDomain<>> GetEffectiveDomain(
-    DimensionIndex rank, std::optional<span<const Index>> shape,
+    const SpecRankAndFieldInfo& info,
+    std::optional<tensorstore::span<const Index>> metadata_shape,
     std::optional<span<const std::optional<std::string>>> dimension_names,
-    const Schema& schema, bool* dimension_names_used = nullptr) {
+    const Schema& schema, bool* dimension_names_used) {
+  const DimensionIndex rank = info.chunked_rank;
   if (dimension_names_used) *dimension_names_used = false;
   auto domain = schema.domain();
-  if (!shape && !dimension_names && !domain.valid()) {
+  if (!metadata_shape && !dimension_names && !domain.valid()) {
     if (schema.rank() == 0) return {std::in_place, 0};
-    // No information about the domain available.
     return {std::in_place};
   }
 
-  // Rank is already validated by caller.
   assert(RankConstraint::EqualOrUnspecified(schema.rank(), rank));
   IndexDomainBuilder builder(std::max(schema.rank().rank, rank));
-  if (shape) {
-    builder.shape(*shape);
+  if (metadata_shape) {
+    builder.shape(*metadata_shape);
     builder.implicit_upper_bounds(true);
   } else {
     builder.origin(GetConstantVector<Index, 0>(builder.rank()));
@@ -602,12 +772,12 @@ Result<IndexDomain<>> GetEffectiveDomain(
         normalized_dimension_names[i] = *name;
       }
     }
-    // Use dimension_names as labels if they are valid.
-    if (internal::ValidateDimensionLabelsAreUnique(normalized_dimension_names)
+    if (internal::ValidateDimensionLabelsAreUnique(
+            span<const std::string_view>(&normalized_dimension_names[0], rank))
             .ok()) {
-      if (dimension_names_used) *dimension_names_used = true;
       builder.labels(
           span<const std::string_view>(&normalized_dimension_names[0], rank));
+      if (dimension_names_used) *dimension_names_used = true;
     }
   }
 
@@ -618,36 +788,53 @@ Result<IndexDomain<>> GetEffectiveDomain(
           tensorstore::MaybeAnnotateStatus(
               _, "Mismatch between metadata and schema")));
   return WithImplicitDimensions(domain, false, true);
-  return domain;
 }
 
 Result<IndexDomain<>> GetEffectiveDomain(
     const ZarrMetadataConstraints& metadata_constraints, const Schema& schema,
     bool* dimension_names_used) {
-  return GetEffectiveDomain(
-      metadata_constraints.rank, metadata_constraints.shape,
-      metadata_constraints.dimension_names, schema, dimension_names_used);
+  SpecRankAndFieldInfo info;
+  info.chunked_rank = metadata_constraints.rank;
+  if (info.chunked_rank == dynamic_rank && metadata_constraints.shape) {
+    info.chunked_rank = metadata_constraints.shape->size();
+  }
+
+  std::optional<span<const Index>> shape_span;
+  if (metadata_constraints.shape) {
+    shape_span.emplace(metadata_constraints.shape->data(),
+                       metadata_constraints.shape->size());
+  }
+  std::optional<span<const std::optional<std::string>>> names_span;
+  if (metadata_constraints.dimension_names) {
+    names_span.emplace(metadata_constraints.dimension_names->data(),
+                       metadata_constraints.dimension_names->size());
+  }
+
+  return GetEffectiveDomain(info, shape_span, names_span, schema,
+                            dimension_names_used);
 }
 
 absl::Status SetChunkLayoutFromMetadata(
-    DataType dtype, DimensionIndex rank,
+    const SpecRankAndFieldInfo& info,
     std::optional<span<const Index>> chunk_shape,
     const ZarrCodecChainSpec* codecs, ChunkLayout& chunk_layout) {
-  TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Set(RankConstraint{rank}));
-  rank = chunk_layout.rank();
-  if (rank == dynamic_rank) return absl::OkStatus();
+  const DimensionIndex rank = info.chunked_rank;
+  if (rank == dynamic_rank) {
+    return absl::OkStatus();
+  }
+  TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Set(RankConstraint(rank)));
+  TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Set(
+      ChunkLayout::GridOrigin(GetConstantVector<Index, 0>(rank))));
 
   if (chunk_shape) {
     assert(chunk_shape->size() == rank);
     TENSORSTORE_RETURN_IF_ERROR(
         chunk_layout.Set(ChunkLayout::WriteChunkShape(*chunk_shape)));
   }
-  TENSORSTORE_RETURN_IF_ERROR(chunk_layout.Set(
-      ChunkLayout::GridOrigin(GetConstantVector<Index, 0>(rank))));
 
   if (codecs) {
     ArrayDataTypeAndShapeInfo array_info;
-    array_info.dtype = dtype;
+    array_info.dtype = info.field ? info.field->dtype : dtype_v<std::byte>;
     array_info.rank = rank;
     if (chunk_shape) {
       std::copy_n(chunk_shape->begin(), rank,
@@ -669,30 +856,47 @@ absl::Status SetChunkLayoutFromMetadata(
           span<const Index>(layout_info.codec_chunk_shape->data(), rank))));
     }
   }
+
   return absl::OkStatus();
 }
 
-Result<ChunkLayout> GetEffectiveChunkLayout(
+absl::Status SetChunkLayoutFromMetadata(
     DataType dtype, DimensionIndex rank,
     std::optional<span<const Index>> chunk_shape,
-    const ZarrCodecChainSpec* codecs, const Schema& schema) {
-  auto chunk_layout = schema.chunk_layout();
-  TENSORSTORE_RETURN_IF_ERROR(SetChunkLayoutFromMetadata(
-      dtype, rank, chunk_shape, codecs, chunk_layout));
-  return chunk_layout;
+    const ZarrCodecChainSpec* codecs, ChunkLayout& chunk_layout) {
+  SpecRankAndFieldInfo info;
+  info.chunked_rank = rank;
+  info.field = nullptr;
+  return SetChunkLayoutFromMetadata(info, chunk_shape, codecs, chunk_layout);
 }
 
 Result<ChunkLayout> GetEffectiveChunkLayout(
     const ZarrMetadataConstraints& metadata_constraints, const Schema& schema) {
-  assert(RankConstraint::EqualOrUnspecified(metadata_constraints.rank,
-                                            schema.rank()));
-  return GetEffectiveChunkLayout(
-      metadata_constraints.data_type.value_or(DataType{}),
-      std::max(metadata_constraints.rank, schema.rank().rank),
-      metadata_constraints.chunk_shape,
+  // Approximation: assume whole array access or simple array
+  SpecRankAndFieldInfo info;
+  info.chunked_rank = std::max(metadata_constraints.rank, schema.rank().rank);
+  if (info.chunked_rank == dynamic_rank && metadata_constraints.shape) {
+    info.chunked_rank = metadata_constraints.shape->size();
+  }
+  if (info.chunked_rank == dynamic_rank && metadata_constraints.chunk_shape) {
+    info.chunked_rank = metadata_constraints.chunk_shape->size();
+  }
+  // We can't easily know field info from constraints unless we parse data_type.
+  // If data_type is present and has 1 field, we can check it.
+  // For now, basic implementation.
+
+  ChunkLayout chunk_layout = schema.chunk_layout();
+  std::optional<span<const Index>> chunk_shape_span;
+  if (metadata_constraints.chunk_shape) {
+    chunk_shape_span.emplace(metadata_constraints.chunk_shape->data(),
+                             metadata_constraints.chunk_shape->size());
+  }
+  TENSORSTORE_RETURN_IF_ERROR(SetChunkLayoutFromMetadata(
+      info, chunk_shape_span,
       metadata_constraints.codec_specs ? &*metadata_constraints.codec_specs
                                        : nullptr,
-      schema);
+      chunk_layout));
+  return chunk_layout;
 }
 
 Result<DimensionUnitsVector> GetDimensionUnits(
@@ -732,53 +936,63 @@ CodecSpec GetCodecFromMetadata(const ZarrMetadata& metadata) {
 }
 
 absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
-                                    const Schema& schema) {
-  if (!RankConstraint::EqualOrUnspecified(metadata.rank, schema.rank())) {
+                                    size_t field_index, const Schema& schema) {
+  auto info = GetSpecRankAndFieldInfo(metadata, field_index);
+  const auto& field = metadata.data_type.fields[field_index];
+
+  if (!RankConstraint::EqualOrUnspecified(schema.rank(), info.chunked_rank)) {
     return absl::FailedPreconditionError(tensorstore::StrCat(
         "Rank specified by schema (", schema.rank(),
-        ") does not match rank specified by metadata (", metadata.rank, ")"));
+        ") does not match rank specified by metadata (", info.chunked_rank,
+        ")"));
   }
 
   if (schema.domain().valid()) {
+    std::optional<span<const Index>> metadata_shape_span;
+    metadata_shape_span.emplace(metadata.shape.data(), metadata.shape.size());
+    std::optional<span<const std::optional<std::string>>> dimension_names_span;
+    dimension_names_span.emplace(metadata.dimension_names.data(),
+                                 metadata.dimension_names.size());
     TENSORSTORE_RETURN_IF_ERROR(GetEffectiveDomain(
-        metadata.rank, metadata.shape, metadata.dimension_names, schema));
+        info, metadata_shape_span, dimension_names_span, schema,
+        /*dimension_names_used=*/nullptr));
   }
 
   if (auto dtype = schema.dtype();
-      !IsPossiblySameDataType(metadata.data_type, dtype)) {
+      !IsPossiblySameDataType(field.dtype, dtype)) {
     return absl::FailedPreconditionError(
-        tensorstore::StrCat("data_type from metadata (", metadata.data_type,
+        tensorstore::StrCat("data_type from metadata (", field.dtype,
                             ") does not match dtype in schema (", dtype, ")"));
   }
 
   if (schema.chunk_layout().rank() != dynamic_rank) {
-    TENSORSTORE_ASSIGN_OR_RETURN(
-        auto chunk_layout,
-        GetEffectiveChunkLayout(metadata.data_type, metadata.rank,
-                                metadata.chunk_shape, &metadata.codec_specs,
-                                schema));
+    ChunkLayout chunk_layout = schema.chunk_layout();
+    std::optional<span<const Index>> chunk_shape_span;
+    chunk_shape_span.emplace(metadata.chunk_shape.data(),
+                             metadata.chunk_shape.size());
+    TENSORSTORE_RETURN_IF_ERROR(SetChunkLayoutFromMetadata(
+        info, chunk_shape_span, &metadata.codec_specs, chunk_layout));
     if (chunk_layout.codec_chunk_shape().hard_constraint) {
       return absl::InvalidArgumentError("codec_chunk_shape not supported");
     }
   }
 
   if (auto schema_fill_value = schema.fill_value(); schema_fill_value.valid()) {
-    const auto& fill_value = metadata.fill_value;
+    const auto& fill_value = metadata.fill_value[field_index];
     TENSORSTORE_ASSIGN_OR_RETURN(
         auto broadcast_fill_value,
         tensorstore::BroadcastArray(schema_fill_value, span<const Index>{}));
     TENSORSTORE_ASSIGN_OR_RETURN(
         SharedArray<const void> converted_fill_value,
         tensorstore::MakeCopy(std::move(broadcast_fill_value),
-                              skip_repeated_elements, metadata.data_type));
+                              skip_repeated_elements, field.dtype));
     if (!AreArraysIdenticallyEqual(converted_fill_value, fill_value)) {
       auto binder = FillValueJsonBinder{metadata.data_type};
-      auto schema_json = jb::ToJson(converted_fill_value, binder).value();
-      auto metadata_json = jb::ToJson(metadata.fill_value, binder).value();
+      // Error message generation might be tricky with binder
       return absl::FailedPreconditionError(tensorstore::StrCat(
           "Invalid fill_value: schema requires fill value of ",
-          schema_json.dump(), ", but metadata specifies fill value of ",
-          metadata_json.dump()));
+          schema_fill_value, ", but metadata specifies fill value of ",
+          fill_value));
     }
   }
 
@@ -804,8 +1018,14 @@ absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
   return absl::OkStatus();
 }
 
+absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
+                                    const Schema& schema) {
+  return ValidateMetadataSchema(metadata, /*field_index=*/0, schema);
+}
+
 Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
-    const ZarrMetadataConstraints& metadata_constraints, const Schema& schema) {
+    const ZarrMetadataConstraints& metadata_constraints, const Schema& schema,
+    std::string_view selected_field) {
   auto metadata = std::make_shared<ZarrMetadata>();
 
   metadata->zarr_format = metadata_constraints.zarr_format.value_or(3);
@@ -813,51 +1033,85 @@ Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
       metadata_constraints.chunk_key_encoding.value_or(ChunkKeyEncoding{
           /*.kind=*/ChunkKeyEncoding::kDefault, /*.separator=*/'/'});
 
+  // Determine data type first
+  if (metadata_constraints.data_type) {
+    metadata->data_type = *metadata_constraints.data_type;
+  } else if (!selected_field.empty()) {
+    return absl::InvalidArgumentError(
+        "\"dtype\" must be specified in \"metadata\" if \"field\" is "
+        "specified");
+  } else if (auto dtype = schema.dtype(); dtype.valid()) {
+    TENSORSTORE_ASSIGN_OR_RETURN(
+        static_cast<ZarrDType::BaseDType&>(
+            metadata->data_type.fields.emplace_back()),
+        ChooseBaseDType(dtype));
+    metadata->data_type.has_fields = false;
+    TENSORSTORE_RETURN_IF_ERROR(ValidateDType(metadata->data_type));
+  } else {
+    return absl::InvalidArgumentError("dtype must be specified");
+  }
+
+  TENSORSTORE_ASSIGN_OR_RETURN(
+      size_t field_index, GetFieldIndex(metadata->data_type, selected_field));
+  SpecRankAndFieldInfo info;
+  info.field = &metadata->data_type.fields[field_index];
+  info.chunked_rank = metadata_constraints.rank;
+  if (info.chunked_rank == dynamic_rank && metadata_constraints.shape) {
+    info.chunked_rank = metadata_constraints.shape->size();
+  }
+  if (info.chunked_rank == dynamic_rank &&
+      schema.rank().rank != dynamic_rank) {
+    info.chunked_rank = schema.rank().rank;
+  }
+
   // Set domain
-  bool dimension_names_used;
+  bool dimension_names_used = false;
+  std::optional<span<const Index>> constraint_shape_span;
+  if (metadata_constraints.shape) {
+    constraint_shape_span.emplace(metadata_constraints.shape->data(),
+                                  metadata_constraints.shape->size());
+  }
+  std::optional<span<const std::optional<std::string>>> constraint_names_span;
+  if (metadata_constraints.dimension_names) {
+    constraint_names_span.emplace(
+        metadata_constraints.dimension_names->data(),
+        metadata_constraints.dimension_names->size());
+  }
   TENSORSTORE_ASSIGN_OR_RETURN(
-      auto domain,
-      GetEffectiveDomain(metadata_constraints, schema, &dimension_names_used));
+      auto domain, GetEffectiveDomain(info, constraint_shape_span,
+                                      constraint_names_span, schema,
+                                      &dimension_names_used));
   if (!domain.valid() || !IsFinite(domain.box())) {
     return absl::InvalidArgumentError("domain must be specified");
   }
-  const DimensionIndex rank = metadata->rank = domain.rank();
-  metadata->shape.assign(domain.shape().begin(), domain.shape().end());
+  const DimensionIndex rank = domain.rank();
+  metadata->rank = rank;
+  info.chunked_rank = rank;
+  metadata->shape.assign(domain.shape().begin(),
+                         domain.shape().begin() + rank);
   metadata->dimension_names.assign(domain.labels().begin(),
-                                   domain.labels().end());
-  // Normalize empty string dimension names to `std::nullopt`.  This is more
-  // consistent with the zarr v3 dimension name semantics, and ensures that the
-  // `dimension_names` metadata field will be excluded entirely if all dimension
-  // names are the empty string.
-  //
-  // However, if empty string dimension names were specified explicitly in
-  // `metadata_constraints`, leave them exactly as specified.
+                                   domain.labels().begin() + rank);
+
   for (DimensionIndex i = 0; i < rank; ++i) {
     auto& name = metadata->dimension_names[i];
     if (!name || !name->empty()) continue;
-    // Dimension name equals the empty string.
-    if (dimension_names_used && (*metadata_constraints.dimension_names)[i]) {
-      // Empty dimension name was explicitly specified in
-      // `metadata_constraints`, leave it as is.
+    if (dimension_names_used && metadata_constraints.dimension_names &&
+        (*metadata_constraints.dimension_names)[i]) {
       assert((*metadata_constraints.dimension_names)[i]->empty());
       continue;
     }
-    // Name was not explicitly specified in `metadata_constraints` as an empty
-    // string.  Normalize it to `std::nullopt`.
     name = std::nullopt;
   }
 
-  // Set dtype
-  auto dtype = schema.dtype();
-  if (!dtype.valid()) {
-    return absl::InvalidArgumentError("dtype must be specified");
-  }
-  TENSORSTORE_RETURN_IF_ERROR(ValidateDataType(dtype));
-  metadata->data_type = dtype;
-
   if (metadata_constraints.fill_value) {
     metadata->fill_value = *metadata_constraints.fill_value;
   } else if (auto fill_value = schema.fill_value(); fill_value.valid()) {
+    // Assuming single field if setting from schema
+    if (metadata->data_type.fields.size() != 1) {
+      return absl::InvalidArgumentError(
+          "Cannot specify fill_value through schema for structured zarr data "
+          "type");
+    }
     const auto status = [&] {
       TENSORSTORE_ASSIGN_OR_RETURN(
           auto broadcast_fill_value,
@@ -865,23 +1119,26 @@ Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
       TENSORSTORE_ASSIGN_OR_RETURN(
           auto converted_fill_value,
           tensorstore::MakeCopy(std::move(broadcast_fill_value),
-                                skip_repeated_elements, metadata->data_type));
-      metadata->fill_value = std::move(converted_fill_value);
+                                skip_repeated_elements,
+                                metadata->data_type.fields[0].dtype));
+      metadata->fill_value.push_back(std::move(converted_fill_value));
       return absl::OkStatus();
     }();
     TENSORSTORE_RETURN_IF_ERROR(
         status, tensorstore::MaybeAnnotateStatus(_, "Invalid fill_value"));
   } else {
-    metadata->fill_value = tensorstore::AllocateArray(
-        /*shape=*/span<const Index>(), c_order, value_init,
-        metadata->data_type);
+    metadata->fill_value.resize(metadata->data_type.fields.size());
+    for (size_t i = 0; i < metadata->fill_value.size(); ++i) {
+      metadata->fill_value[i] = tensorstore::AllocateArray(
+          /*shape=*/span<const Index>(), c_order, value_init,
+          metadata->data_type.fields[i].dtype);
+    }
   }
 
   metadata->user_attributes = metadata_constraints.user_attributes;
   metadata->unknown_extension_attributes =
       metadata_constraints.unknown_extension_attributes;
 
-  // Set dimension units
   TENSORSTORE_ASSIGN_OR_RETURN(
       auto dimension_units,
       GetEffectiveDimensionUnits(rank, metadata_constraints.dimension_units,
@@ -895,12 +1152,16 @@ Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
   TENSORSTORE_ASSIGN_OR_RETURN(auto codec_spec,
                                GetEffectiveCodec(metadata_constraints, schema));
 
-  // Set chunk shape
-
   ArrayCodecResolveParameters decoded;
-  decoded.dtype = metadata->data_type;
+  if (metadata->data_type.fields.size() == 1 &&
+      metadata->data_type.fields[0].outer_shape.empty()) {
+    decoded.dtype = metadata->data_type.fields[0].dtype;
+  } else {
+    decoded.dtype = dtype_v<std::byte>;
+  }
   decoded.rank = metadata->rank;
-  decoded.fill_value = metadata->fill_value;
+  if (metadata->fill_value.size() == 1)
+    decoded.fill_value = metadata->fill_value[0];
 
   TENSORSTORE_ASSIGN_OR_RETURN(
       auto chunk_layout, GetEffectiveChunkLayout(metadata_constraints, schema));
@@ -920,8 +1181,6 @@ Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
 
   if (!internal::RangesEqual(span<const Index>(metadata->chunk_shape),
                              span<const Index>(read_chunk_shape))) {
-    // Read chunk and write chunk shapes differ.  Insert sharding codec if there
-    // is not already one.
     if (!codec_spec->codecs || codec_spec->codecs->sharding_height() == 0) {
       auto sharding_codec =
           internal::MakeIntrusivePtr<ShardingIndexedCodecSpec>(
@@ -945,7 +1204,8 @@ Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
   TENSORSTORE_RETURN_IF_ERROR(set_up_codecs(
       codec_spec->codecs ? *codec_spec->codecs : ZarrCodecChainSpec{}));
   TENSORSTORE_RETURN_IF_ERROR(ValidateMetadata(*metadata));
-  TENSORSTORE_RETURN_IF_ERROR(ValidateMetadataSchema(*metadata, schema));
+  TENSORSTORE_RETURN_IF_ERROR(
+      ValidateMetadataSchema(*metadata, field_index, schema));
   return metadata;
 }
 
diff --git a/tensorstore/driver/zarr3/metadata.h b/tensorstore/driver/zarr3/metadata.h
index 05b8c6be3..4c7871b0d 100644
--- a/tensorstore/driver/zarr3/metadata.h
+++ b/tensorstore/driver/zarr3/metadata.h
@@ -33,6 +33,7 @@
 #include "tensorstore/data_type.h"
 #include "tensorstore/driver/zarr3/codec/codec.h"
 #include "tensorstore/driver/zarr3/codec/codec_chain_spec.h"
+#include "tensorstore/driver/zarr3/dtype.h"
 #include "tensorstore/index.h"
 #include "tensorstore/index_space/dimension_units.h"
 #include "tensorstore/index_space/index_domain.h"
@@ -72,19 +73,35 @@ struct ChunkKeyEncoding {
 };
 
 struct FillValueJsonBinder {
-  DataType data_type;
+  ZarrDType dtype;
+  bool allow_missing_dtype = false;
+  FillValueJsonBinder() = default;
+  explicit FillValueJsonBinder(ZarrDType dtype,
+                               bool allow_missing_dtype = false);
+  explicit FillValueJsonBinder(DataType dtype,
+                               bool allow_missing_dtype = false);
 
   absl::Status operator()(std::true_type is_loading,
                           internal_json_binding::NoOptions,
-                          SharedArray<const void>* obj,
+                          std::vector<SharedArray<const void>>* obj,
                           ::nlohmann::json* j) const;
 
   absl::Status operator()(std::false_type is_loading,
                           internal_json_binding::NoOptions,
-                          const SharedArray<const void>* obj,
+                          const std::vector<SharedArray<const void>>* obj,
                           ::nlohmann::json* j) const;
+
+ private:
+  absl::Status DecodeSingle(::nlohmann::json& j, DataType data_type,
+                            SharedArray<const void>& out) const;
+  absl::Status EncodeSingle(const SharedArray<const void>& arr,
+                            DataType data_type,
+                            ::nlohmann::json& j) const;
 };
 
+struct SpecRankAndFieldInfo;
+
+
 struct ZarrMetadata {
   // The following members are common to `ZarrMetadata` and
   // `ZarrMetadataConstraints`, except that in `ZarrMetadataConstraints` some
@@ -94,14 +111,14 @@ struct ZarrMetadata {
 
   int zarr_format;
   std::vector<Index> shape;
-  DataType data_type;
+  ZarrDType data_type;
   ::nlohmann::json::object_t user_attributes;
   std::optional<DimensionUnitsVector> dimension_units;
   std::vector<std::optional<std::string>> dimension_names;
   ChunkKeyEncoding chunk_key_encoding;
   std::vector<Index> chunk_shape;
   ZarrCodecChainSpec codec_specs;
-  SharedArray<const void> fill_value;
+  std::vector<SharedArray<const void>> fill_value;
   ::nlohmann::json::object_t unknown_extension_attributes;
 
   std::string GetCompatibilityKey() const;
@@ -123,14 +140,14 @@ struct ZarrMetadataConstraints {
 
   std::optional<int> zarr_format;
   std::optional<std::vector<Index>> shape;
-  std::optional<DataType> data_type;
+  std::optional<ZarrDType> data_type;
   ::nlohmann::json::object_t user_attributes;
   std::optional<DimensionUnitsVector> dimension_units;
   std::optional<std::vector<std::optional<std::string>>> dimension_names;
   std::optional<ChunkKeyEncoding> chunk_key_encoding;
   std::optional<std::vector<Index>> chunk_shape;
   std::optional<ZarrCodecChainSpec> codec_specs;
-  std::optional<SharedArray<const void>> fill_value;
+  std::optional<std::vector<SharedArray<const void>>> fill_value;
   ::nlohmann::json::object_t unknown_extension_attributes;
 
   TENSORSTORE_DECLARE_JSON_DEFAULT_BINDER(ZarrMetadataConstraints,
@@ -159,6 +176,10 @@ Result<IndexDomain<>> GetEffectiveDomain(
 
 /// Sets chunk layout constraints implied by `dtype`, `rank`, `chunk_shape`, and
 /// `codecs`.
+absl::Status SetChunkLayoutFromMetadata(
+    const SpecRankAndFieldInfo& info,
+    std::optional<span<const Index>> chunk_shape,
+    const ZarrCodecChainSpec* codecs, ChunkLayout& chunk_layout);
 absl::Status SetChunkLayoutFromMetadata(
     DataType dtype, DimensionIndex rank,
     std::optional<span<const Index>> chunk_shape,
@@ -198,6 +219,8 @@ Result<internal::CodecDriverSpec::PtrT<TensorStoreCodecSpec>> GetEffectiveCodec(
 CodecSpec GetCodecFromMetadata(const ZarrMetadata& metadata);
 
 /// Validates that `schema` is compatible with `metadata`.
+absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
+                                    size_t field_index, const Schema& schema);
 absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
                                     const Schema& schema);
 
@@ -206,10 +229,22 @@ absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
 /// \error `absl::StatusCode::kInvalidArgument` if any required fields are
 ///     unspecified.
 Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
-    const ZarrMetadataConstraints& metadata_constraints, const Schema& schema);
+    const ZarrMetadataConstraints& metadata_constraints,
+    const Schema& schema, std::string_view selected_field = {});
 
 absl::Status ValidateDataType(DataType dtype);
 
+Result<size_t> GetFieldIndex(const ZarrDType& dtype,
+                             std::string_view selected_field);
+
+struct SpecRankAndFieldInfo {
+  DimensionIndex chunked_rank = dynamic_rank;
+  const ZarrDType::Field* field = nullptr;
+};
+
+SpecRankAndFieldInfo GetSpecRankAndFieldInfo(const ZarrMetadata& metadata,
+                                             size_t field_index);
+
 }  // namespace internal_zarr3
 }  // namespace tensorstore
 
diff --git a/tensorstore/driver/zarr3/metadata_test.cc b/tensorstore/driver/zarr3/metadata_test.cc
index 0b140fa80..11c97619f 100644
--- a/tensorstore/driver/zarr3/metadata_test.cc
+++ b/tensorstore/driver/zarr3/metadata_test.cc
@@ -51,6 +51,7 @@ namespace {
 namespace jb = ::tensorstore::internal_json_binding;
 
 using ::tensorstore::ChunkLayout;
+using ::tensorstore::DataType;
 using ::tensorstore::CodecSpec;
 using ::tensorstore::dtype_v;
 using ::tensorstore::Index;
@@ -68,6 +69,7 @@ using ::tensorstore::dtypes::float32_t;
 using ::tensorstore::dtypes::float64_t;
 using ::tensorstore::internal::uint_t;
 using ::tensorstore::internal_zarr3::FillValueJsonBinder;
+using ::tensorstore::internal_zarr3::ZarrDType;
 using ::tensorstore::internal_zarr3::ZarrMetadata;
 using ::tensorstore::internal_zarr3::ZarrMetadataConstraints;
 using ::testing::HasSubstr;
@@ -90,13 +92,30 @@ ::nlohmann::json GetBasicMetadata() {
   };
 }
 
+ZarrDType MakeScalarZarrDType(DataType dtype) {
+  ZarrDType dtype_info;
+  dtype_info.has_fields = false;
+  dtype_info.fields.resize(1);
+  auto& field = dtype_info.fields[0];
+  field.dtype = dtype;
+  field.encoded_dtype = std::string(dtype.name());
+  field.outer_shape.clear();
+  field.flexible_shape.clear();
+  field.field_shape.clear();
+  field.num_inner_elements = 1;
+  field.byte_offset = 0;
+  field.num_bytes = dtype->size;
+  return dtype_info;
+}
+
 TEST(MetadataTest, ParseValid) {
   auto json = GetBasicMetadata();
   tensorstore::TestJsonBinderRoundTripJsonOnly<ZarrMetadata>({json});
   TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto metadata, ZarrMetadata::FromJson(json));
   EXPECT_THAT(metadata.shape, ::testing::ElementsAre(10, 11, 12));
   EXPECT_THAT(metadata.chunk_shape, ::testing::ElementsAre(1, 2, 3));
-  EXPECT_THAT(metadata.data_type, tensorstore::dtype_v<uint16_t>);
+  ASSERT_EQ(metadata.data_type.fields.size(), 1);
+  EXPECT_EQ(tensorstore::dtype_v<uint16_t>, metadata.data_type.fields[0].dtype);
   EXPECT_THAT(metadata.dimension_names,
               ::testing::ElementsAre("a", std::nullopt, ""));
   EXPECT_THAT(metadata.user_attributes, MatchesJson({{"a", "b"}, {"c", "d"}}));
@@ -115,7 +134,8 @@ TEST(MetadataTest, ParseValidNoDimensionNames) {
   TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto metadata, ZarrMetadata::FromJson(json));
   EXPECT_THAT(metadata.shape, ::testing::ElementsAre(10, 11, 12));
   EXPECT_THAT(metadata.chunk_shape, ::testing::ElementsAre(1, 2, 3));
-  EXPECT_THAT(metadata.data_type, tensorstore::dtype_v<uint16_t>);
+  ASSERT_EQ(metadata.data_type.fields.size(), 1);
+  EXPECT_EQ(tensorstore::dtype_v<uint16_t>, metadata.data_type.fields[0].dtype);
   EXPECT_THAT(metadata.dimension_names,
               ::testing::ElementsAre(std::nullopt, std::nullopt, std::nullopt));
   EXPECT_THAT(metadata.user_attributes, MatchesJson({{"a", "b"}, {"c", "d"}}));
@@ -486,7 +506,9 @@ TEST(MetadataTest, DataTypes) {
     }
     TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto metadata,
                                      ZarrMetadata::FromJson(json));
-    EXPECT_EQ(tensorstore::GetDataType(data_type_name), metadata.data_type);
+    ASSERT_FALSE(metadata.data_type.fields.empty());
+    EXPECT_EQ(tensorstore::GetDataType(data_type_name),
+              metadata.data_type.fields[0].dtype);
   }
 }
 
@@ -503,18 +525,20 @@ TEST(MetadataTest, InvalidDataType) {
 template <typename T>
 void TestFillValue(std::vector<std::pair<T, ::nlohmann::json>> cases,
                    bool skip_to_json = false) {
-  auto binder = FillValueJsonBinder{dtype_v<T>};
+  FillValueJsonBinder binder(MakeScalarZarrDType(dtype_v<T>));
   for (const auto& [value, json] : cases) {
     SharedArray<const void> expected_fill_value =
         tensorstore::MakeScalarArray(value);
     if (!skip_to_json) {
-      EXPECT_THAT(jb::ToJson(expected_fill_value, binder),
+      std::vector<SharedArray<const void>> vec{expected_fill_value};
+      EXPECT_THAT(jb::ToJson(vec, binder),
                   ::testing::Optional(MatchesJson(json)))
           << "value=" << value << ", json=" << json;
     }
-    EXPECT_THAT(jb::FromJson<SharedArray<const void>>(json, binder),
-                ::testing::Optional(
-                    tensorstore::MatchesArrayIdentically(expected_fill_value)))
+    EXPECT_THAT(
+        jb::FromJson<std::vector<SharedArray<const void>>>(json, binder),
+        ::testing::Optional(::testing::ElementsAre(
+            tensorstore::MatchesArrayIdentically(expected_fill_value))))
         << "json=" << json;
   }
 }
@@ -522,10 +546,11 @@ void TestFillValue(std::vector<std::pair<T, ::nlohmann::json>> cases,
 template <typename T>
 void TestFillValueInvalid(
     std::vector<std::pair<::nlohmann::json, std::string>> cases) {
-  auto binder = FillValueJsonBinder{dtype_v<T>};
+  FillValueJsonBinder binder(MakeScalarZarrDType(dtype_v<T>));
   for (const auto& [json, matcher] : cases) {
     EXPECT_THAT(
-        jb::FromJson<SharedArray<const void>>(json, binder).status(),
+        jb::FromJson<std::vector<SharedArray<const void>>>(json, binder)
+            .status(),
         StatusIs(absl::StatusCode::kInvalidArgument, MatchesRegex(matcher)))
         << "json=" << json;
   }

From 187f42452a359bca712a64050176b93e5ce9b145 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 24 Nov 2025 22:57:11 +0000
Subject: [PATCH 02/59] Updates to have proper reads

---
 tensorstore/driver/zarr3/chunk_cache.cc |  74 ++++++++++++++----
 tensorstore/driver/zarr3/chunk_cache.h  |  11 ++-
 tensorstore/driver/zarr3/driver.cc      |  74 ++++++++++++------
 tensorstore/driver/zarr3/dtype.cc       |  64 +++++++++++----
 tensorstore/driver/zarr3/metadata.cc    | 100 ++++++++++++++++--------
 5 files changed, 239 insertions(+), 84 deletions(-)

diff --git a/tensorstore/driver/zarr3/chunk_cache.cc b/tensorstore/driver/zarr3/chunk_cache.cc
index ee1cba9c1..6bfa8c039 100644
--- a/tensorstore/driver/zarr3/chunk_cache.cc
+++ b/tensorstore/driver/zarr3/chunk_cache.cc
@@ -18,6 +18,8 @@
 
 #include <algorithm>
 #include <cassert>
+#include <cstddef>
+#include <cstring>
 #include <functional>
 #include <memory>
 #include <string>
@@ -73,15 +75,17 @@ ZarrChunkCache::~ZarrChunkCache() = default;
 
 ZarrLeafChunkCache::ZarrLeafChunkCache(
     kvstore::DriverPtr store, ZarrCodecChain::PreparedState::Ptr codec_state,
-    internal::CachePool::WeakPtr /*data_cache_pool*/)
-    : Base(std::move(store)), codec_state_(std::move(codec_state)) {}
+    ZarrDType dtype, internal::CachePool::WeakPtr /*data_cache_pool*/)
+    : Base(std::move(store)),
+      codec_state_(std::move(codec_state)),
+      dtype_(std::move(dtype)) {}
 
 void ZarrLeafChunkCache::Read(ZarrChunkCache::ReadRequest request,
                               AnyFlowReceiver<absl::Status, internal::ReadChunk,
                                               IndexTransform<>>&& receiver) {
   return internal::ChunkCache::Read(
       {static_cast<internal::DriverReadRequest&&>(request),
-       /*component_index=*/0, request.staleness_bound,
+       request.component_index, request.staleness_bound,
        request.fill_missing_data_reads},
       std::move(receiver));
 }
@@ -92,7 +96,7 @@ void ZarrLeafChunkCache::Write(
         receiver) {
   return internal::ChunkCache::Write(
       {static_cast<internal::DriverWriteRequest&&>(request),
-       /*component_index=*/0, request.store_data_equal_to_fill_value},
+       request.component_index, request.store_data_equal_to_fill_value},
       std::move(receiver));
 }
 
@@ -149,12 +153,52 @@ std::string ZarrLeafChunkCache::GetChunkStorageKey(
 Result<absl::InlinedVector<SharedArray<const void>, 1>>
 ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
                                 absl::Cord data) {
+  const size_t num_fields = dtype_.fields.size();
+  absl::InlinedVector<SharedArray<const void>, 1> field_arrays(num_fields);
+
+
+  // For single non-structured field, decode directly
+  if (num_fields == 1 && dtype_.fields[0].outer_shape.empty()) {
+    TENSORSTORE_ASSIGN_OR_RETURN(
+        field_arrays[0], codec_state_->DecodeArray(grid().components[0].shape(),
+                                                   std::move(data)));
+    return field_arrays;
+  }
+
+  // For structured types, decode byte array then extract fields
+  // Build decode shape: [chunk_dims..., bytes_per_outer_element]
+  const auto& chunk_shape = grid().chunk_shape;
+  std::vector<Index> decode_shape(chunk_shape.begin(), chunk_shape.end());
+  decode_shape.push_back(dtype_.bytes_per_outer_element);
+
   TENSORSTORE_ASSIGN_OR_RETURN(
-      auto array,
-      codec_state_->DecodeArray(grid().components[0].shape(), std::move(data)));
-  absl::InlinedVector<SharedArray<const void>, 1> components;
-  components.push_back(std::move(array));
-  return components;
+      auto byte_array, codec_state_->DecodeArray(decode_shape, std::move(data)));
+
+  // Extract each field from the byte array
+  const Index num_elements = byte_array.num_elements() /
+                             dtype_.bytes_per_outer_element;
+  const auto* src_bytes = static_cast<const std::byte*>(byte_array.data());
+
+  for (size_t field_i = 0; field_i < num_fields; ++field_i) {
+    const auto& field = dtype_.fields[field_i];
+    // Use the component's shape (from the grid) for the result array
+    const auto& component_shape = grid().components[field_i].shape();
+    auto result_array =
+        AllocateArray(component_shape, c_order, default_init, field.dtype);
+    auto* dst = static_cast<std::byte*>(result_array.data());
+    const Index field_size = field.dtype->size;
+
+    // Copy field data from each struct element
+    for (Index i = 0; i < num_elements; ++i) {
+      std::memcpy(dst + i * field_size,
+                  src_bytes + i * dtype_.bytes_per_outer_element +
+                      field.byte_offset,
+                  field_size);
+    }
+    field_arrays[field_i] = std::move(result_array);
+  }
+
+  return field_arrays;
 }
 
 Result<absl::Cord> ZarrLeafChunkCache::EncodeChunk(
@@ -170,9 +214,10 @@ kvstore::Driver* ZarrLeafChunkCache::GetKvStoreDriver() {
 
 ZarrShardedChunkCache::ZarrShardedChunkCache(
     kvstore::DriverPtr store, ZarrCodecChain::PreparedState::Ptr codec_state,
-    internal::CachePool::WeakPtr data_cache_pool)
+    ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool)
     : base_kvstore_(std::move(store)),
       codec_state_(std::move(codec_state)),
+      dtype_(std::move(dtype)),
       data_cache_pool_(std::move(data_cache_pool)) {}
 
 Result<IndexTransform<>> TranslateCellToSourceTransformForShard(
@@ -326,6 +371,7 @@ void ZarrShardedChunkCache::Read(
       *this, std::move(request.transform), std::move(receiver),
       [transaction = std::move(request.transaction),
        batch = std::move(request.batch),
+       component_index = request.component_index,
        staleness_bound = request.staleness_bound,
        fill_missing_data_reads = request.fill_missing_data_reads](auto entry) {
         Batch shard_batch = batch;
@@ -339,8 +385,7 @@ void ZarrShardedChunkCache::Read(
                                 IndexTransform<>>&& receiver) {
               entry->sub_chunk_cache.get()->Read(
                   {{transaction, std::move(transform), shard_batch},
-                   staleness_bound,
-                   fill_missing_data_reads},
+                   component_index, staleness_bound, fill_missing_data_reads},
                   std::move(receiver));
             };
       });
@@ -354,6 +399,7 @@ void ZarrShardedChunkCache::Write(
                      &ZarrArrayToArrayCodec::PreparedState::Write>(
       *this, std::move(request.transform), std::move(receiver),
       [transaction = std::move(request.transaction),
+       component_index = request.component_index,
        store_data_equal_to_fill_value =
            request.store_data_equal_to_fill_value](auto entry) {
         internal::OpenTransactionPtr shard_transaction = transaction;
@@ -366,7 +412,7 @@ void ZarrShardedChunkCache::Write(
                    AnyFlowReceiver<absl::Status, internal::WriteChunk,
                                    IndexTransform<>>&& receiver) {
           entry->sub_chunk_cache.get()->Write(
-              {{shard_transaction, std::move(transform)},
+              {{shard_transaction, std::move(transform)}, component_index,
                store_data_equal_to_fill_value},
               std::move(receiver));
         };
@@ -481,7 +527,7 @@ void ZarrShardedChunkCache::Entry::DoInitialize() {
                 *sharding_state.sub_chunk_codec_chain,
                 std::move(sharding_kvstore), cache.executor(),
                 ZarrShardingCodec::PreparedState::Ptr(&sharding_state),
-                cache.data_cache_pool_);
+                cache.dtype_, cache.data_cache_pool_);
         zarr_chunk_cache = new_cache.release();
         return std::unique_ptr<internal::Cache>(&zarr_chunk_cache->cache());
       })
diff --git a/tensorstore/driver/zarr3/chunk_cache.h b/tensorstore/driver/zarr3/chunk_cache.h
index dd40e43ac..5933115d7 100644
--- a/tensorstore/driver/zarr3/chunk_cache.h
+++ b/tensorstore/driver/zarr3/chunk_cache.h
@@ -31,6 +31,7 @@
 #include "tensorstore/driver/read_request.h"
 #include "tensorstore/driver/write_request.h"
 #include "tensorstore/driver/zarr3/codec/codec.h"
+#include "tensorstore/driver/zarr3/dtype.h"
 #include "tensorstore/index.h"
 #include "tensorstore/index_space/index_transform.h"
 #include "tensorstore/internal/cache/cache.h"
@@ -72,6 +73,7 @@ class ZarrChunkCache {
   virtual const Executor& executor() const = 0;
 
   struct ReadRequest : internal::DriverReadRequest {
+    size_t component_index = 0;
     absl::Time staleness_bound;
     bool fill_missing_data_reads;
   };
@@ -81,6 +83,7 @@ class ZarrChunkCache {
                                     IndexTransform<>>&& receiver) = 0;
 
   struct WriteRequest : internal::DriverWriteRequest {
+    size_t component_index = 0;
     bool store_data_equal_to_fill_value;
   };
 
@@ -154,6 +157,7 @@ class ZarrLeafChunkCache : public internal::KvsBackedChunkCache,
 
   explicit ZarrLeafChunkCache(kvstore::DriverPtr store,
                               ZarrCodecChain::PreparedState::Ptr codec_state,
+                              ZarrDType dtype,
                               internal::CachePool::WeakPtr data_cache_pool);
 
   void Read(ZarrChunkCache::ReadRequest request,
@@ -181,6 +185,7 @@ class ZarrLeafChunkCache : public internal::KvsBackedChunkCache,
   kvstore::Driver* GetKvStoreDriver() override;
 
   ZarrCodecChain::PreparedState::Ptr codec_state_;
+  ZarrDType dtype_;
 };
 
 /// Chunk cache for a Zarr array where each chunk is a shard.
@@ -190,6 +195,7 @@ class ZarrShardedChunkCache : public internal::Cache, public ZarrChunkCache {
  public:
   explicit ZarrShardedChunkCache(kvstore::DriverPtr store,
                                  ZarrCodecChain::PreparedState::Ptr codec_state,
+                                 ZarrDType dtype,
                                  internal::CachePool::WeakPtr data_cache_pool);
 
   const ZarrShardingCodec::PreparedState& sharding_codec_state() const {
@@ -239,6 +245,7 @@ class ZarrShardedChunkCache : public internal::Cache, public ZarrChunkCache {
 
   kvstore::DriverPtr base_kvstore_;
   ZarrCodecChain::PreparedState::Ptr codec_state_;
+  ZarrDType dtype_;
 
   // Data cache pool, if it differs from `this->pool()` (which is equal to the
   // metadata cache pool).
@@ -253,11 +260,11 @@ class ZarrShardSubChunkCache : public ChunkCacheImpl {
   explicit ZarrShardSubChunkCache(
       kvstore::DriverPtr store, Executor executor,
       ZarrShardingCodec::PreparedState::Ptr sharding_state,
-      internal::CachePool::WeakPtr data_cache_pool)
+      ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool)
       : ChunkCacheImpl(std::move(store),
                        ZarrCodecChain::PreparedState::Ptr(
                            sharding_state->sub_chunk_codec_state),
-                       std::move(data_cache_pool)),
+                       std::move(dtype), std::move(data_cache_pool)),
         sharding_state_(std::move(sharding_state)),
         executor_(std::move(executor)) {}
 
diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index 15faced0a..1674a1c6d 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -103,9 +103,11 @@ class ZarrDriverSpec
                                               /*Parent=*/KvsDriverSpec>;
 
   ZarrMetadataConstraints metadata_constraints;
+  std::string selected_field;
 
   constexpr static auto ApplyMembers = [](auto& x, auto f) {
-    return f(internal::BaseCast<KvsDriverSpec>(x), x.metadata_constraints);
+    return f(internal::BaseCast<KvsDriverSpec>(x), x.metadata_constraints,
+             x.selected_field);
   };
 
   static inline const auto default_json_binder = jb::Sequence(
@@ -139,7 +141,10 @@ class ZarrDriverSpec
                 return absl::OkStatus();
               },
               jb::Projection<&ZarrDriverSpec::metadata_constraints>(
-                  jb::DefaultInitializedValue()))));
+                  jb::DefaultInitializedValue()))),
+      jb::Member("field", jb::Projection<&ZarrDriverSpec::selected_field>(
+                              jb::DefaultValue<jb::kNeverIncludeDefaults>(
+                                  [](auto* obj) { *obj = std::string{}; }))));
 
   absl::Status ApplyOptions(SpecOptions&& options) override {
     if (options.minimal_spec) {
@@ -286,21 +291,33 @@ class DataCacheBase
   static internal::ChunkGridSpecification GetChunkGridSpecification(
       const ZarrMetadata& metadata) {
     assert(!metadata.fill_value.empty());
-    auto fill_value = BroadcastArray(metadata.fill_value[0],
-                                     BoxView<>(metadata.rank))
-                          .value();
     internal::ChunkGridSpecification::ComponentList components;
-    auto& component = components.emplace_back(
-        internal::AsyncWriteArray::Spec{
-            std::move(fill_value),
-            // Since all dimensions are resizable, just
-            // specify unbounded `valid_data_bounds`.
-            Box<>(metadata.rank),
-            ContiguousLayoutPermutation<>(
-                span(metadata.inner_order.data(), metadata.rank))},
-        metadata.chunk_shape);
-    component.array_spec.fill_value_comparison_kind =
-        EqualityComparisonKind::identical;
+
+    // Create one component per field (like zarr v2)
+    for (size_t field_i = 0; field_i < metadata.data_type.fields.size();
+         ++field_i) {
+      const auto& field = metadata.data_type.fields[field_i];
+      auto fill_value = metadata.fill_value[field_i];
+      if (!fill_value.valid()) {
+        // Use value-initialized rank-0 fill value (like zarr v2)
+        fill_value = AllocateArray(span<const Index, 0>{}, c_order, value_init,
+                                   field.dtype);
+      }
+      auto chunk_fill_value =
+          BroadcastArray(fill_value, BoxView<>(metadata.rank)).value();
+
+      auto& component = components.emplace_back(
+          internal::AsyncWriteArray::Spec{
+              std::move(chunk_fill_value),
+              // Since all dimensions are resizable, just
+              // specify unbounded `valid_data_bounds`.
+              Box<>(metadata.rank),
+              ContiguousLayoutPermutation<>(
+                  span(metadata.inner_order.data(), metadata.rank))},
+          metadata.chunk_shape);
+      component.array_spec.fill_value_comparison_kind =
+          EqualityComparisonKind::identical;
+    }
     return internal::ChunkGridSpecification(std::move(components));
   }
 
@@ -381,7 +398,7 @@ class DataCacheBase
 
   Result<IndexTransform<>> GetExternalToInternalTransform(
       const void* metadata_ptr, size_t component_index) override {
-    assert(component_index == 0);
+    // component_index corresponds to the selected field index
     const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
     const DimensionIndex rank = metadata.rank;
     std::string_view normalized_dimension_names[kMaxRank];
@@ -404,10 +421,16 @@ class DataCacheBase
   absl::Status GetBoundSpecData(KvsDriverSpec& spec_base,
                                 const void* metadata_ptr,
                                 size_t component_index) override {
-    assert(component_index == 0);
     auto& spec = static_cast<ZarrDriverSpec&>(spec_base);
     const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
     spec.metadata_constraints = ZarrMetadataConstraints(metadata);
+    // Encode selected_field from component_index
+    if (metadata.data_type.has_fields &&
+        component_index < metadata.data_type.fields.size()) {
+      spec.selected_field = metadata.data_type.fields[component_index].name;
+    } else {
+      spec.selected_field.clear();
+    }
     return absl::OkStatus();
   }
 
@@ -513,7 +536,8 @@ class ZarrDriver : public ZarrDriverBase {
             AnyFlowReceiver<absl::Status, internal::ReadChunk, IndexTransform<>>
                 receiver) override {
     return cache()->zarr_chunk_cache().Read(
-        {std::move(request), GetCurrentDataStalenessBound(),
+        {std::move(request), this->component_index(),
+         GetCurrentDataStalenessBound(),
          this->fill_value_mode_.fill_missing_data_reads},
         std::move(receiver));
   }
@@ -523,7 +547,7 @@ class ZarrDriver : public ZarrDriverBase {
       AnyFlowReceiver<absl::Status, internal::WriteChunk, IndexTransform<>>
           receiver) override {
     return cache()->zarr_chunk_cache().Write(
-        {std::move(request),
+        {std::move(request), this->component_index(),
          this->fill_value_mode_.store_data_equal_to_fill_value},
         std::move(receiver));
   }
@@ -621,7 +645,8 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
         *static_cast<const ZarrMetadata*>(initializer.metadata.get());
     return internal_zarr3::MakeZarrChunkCache<DataCacheBase, ZarrDataCache>(
         *metadata.codecs, std::move(initializer), spec().store.path,
-        metadata.codec_state, /*data_cache_pool=*/*cache_pool());
+        metadata.codec_state, metadata.data_type,
+        /*data_cache_pool=*/*cache_pool());
   }
 
   Result<size_t> GetComponentIndex(const void* metadata_ptr,
@@ -629,9 +654,12 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
     const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
     TENSORSTORE_RETURN_IF_ERROR(
         ValidateMetadata(metadata, spec().metadata_constraints));
+    TENSORSTORE_ASSIGN_OR_RETURN(
+        auto field_index,
+        GetFieldIndex(metadata.data_type, spec().selected_field));
     TENSORSTORE_RETURN_IF_ERROR(
-        ValidateMetadataSchema(metadata, spec().schema));
-    return 0;
+        ValidateMetadataSchema(metadata, field_index, spec().schema));
+    return field_index;
   }
 };
 
diff --git a/tensorstore/driver/zarr3/dtype.cc b/tensorstore/driver/zarr3/dtype.cc
index 8d1c9d49e..281b9c98b 100644
--- a/tensorstore/driver/zarr3/dtype.cc
+++ b/tensorstore/driver/zarr3/dtype.cc
@@ -76,20 +76,12 @@ namespace {
 /// \param value The zarr metadata "dtype" JSON specification.
 /// \param out[out] Must be non-null.  Filled with the parsed dtype on success.
 /// \error `absl::StatusCode::kInvalidArgument' if `value` is invalid.
-Result<ZarrDType> ParseDTypeNoDerived(const nlohmann::json& value) {
-  ZarrDType out;
-  if (value.is_string()) {
-    // Single field.
-    out.has_fields = false;
-    out.fields.resize(1);
-    TENSORSTORE_ASSIGN_OR_RETURN(
-        static_cast<ZarrDType::BaseDType&>(out.fields[0]),
-        ParseBaseDType(value.get<std::string>()));
-    return out;
-  }
+// Helper to parse fields array (used by both array format and object format)
+absl::Status ParseFieldsArray(const nlohmann::json& fields_json,
+                               ZarrDType& out) {
   out.has_fields = true;
-  auto parse_result = internal_json::JsonParseArray(
-      value,
+  return internal_json::JsonParseArray(
+      fields_json,
       [&](ptrdiff_t size) {
         out.fields.resize(size);
         return absl::OkStatus();
@@ -140,7 +132,51 @@ Result<ZarrDType> ParseDTypeNoDerived(const nlohmann::json& value) {
               }
             });
       });
-  if (!parse_result.ok()) return parse_result;
+}
+
+Result<ZarrDType> ParseDTypeNoDerived(const nlohmann::json& value) {
+  ZarrDType out;
+  if (value.is_string()) {
+    // Single field.
+    out.has_fields = false;
+    out.fields.resize(1);
+    TENSORSTORE_ASSIGN_OR_RETURN(
+        static_cast<ZarrDType::BaseDType&>(out.fields[0]),
+        ParseBaseDType(value.get<std::string>()));
+    return out;
+  }
+  // Handle extended object format:
+  // {"name": "structured", "configuration": {"fields": [...]}}
+  if (value.is_object()) {
+    if (value.contains("name") && value.contains("configuration")) {
+      std::string type_name;
+      TENSORSTORE_RETURN_IF_ERROR(
+          internal_json::JsonRequireValueAs(value["name"], &type_name));
+      if (type_name == "structured") {
+        const auto& config = value["configuration"];
+        if (!config.is_object() || !config.contains("fields")) {
+          return absl::InvalidArgumentError(
+              "Structured data type requires 'configuration' object with "
+              "'fields' array");
+        }
+        TENSORSTORE_RETURN_IF_ERROR(ParseFieldsArray(config["fields"], out));
+        return out;
+      }
+      // For other named types, try to parse as a base dtype
+      out.has_fields = false;
+      out.fields.resize(1);
+      TENSORSTORE_ASSIGN_OR_RETURN(
+          static_cast<ZarrDType::BaseDType&>(out.fields[0]),
+          ParseBaseDType(type_name));
+      return out;
+    }
+    return absl::InvalidArgumentError(tensorstore::StrCat(
+        "Expected string, array, or object with 'name' and 'configuration', "
+        "but received: ",
+        value.dump()));
+  }
+  // Handle array format: [["field1", "type1"], ["field2", "type2"], ...]
+  TENSORSTORE_RETURN_IF_ERROR(ParseFieldsArray(value, out));
   return out;
 }
 
diff --git a/tensorstore/driver/zarr3/metadata.cc b/tensorstore/driver/zarr3/metadata.cc
index c96c31426..880991e8c 100644
--- a/tensorstore/driver/zarr3/metadata.cc
+++ b/tensorstore/driver/zarr3/metadata.cc
@@ -31,7 +31,10 @@
 #include <utility>
 #include <vector>
 
+#include <cstring>
+
 #include "absl/algorithm/container.h"
+#include "absl/strings/escaping.h"
 #include "absl/base/casts.h"
 #include "absl/base/optimization.h"
 #include "absl/meta/type_traits.h"
@@ -282,16 +285,44 @@ absl::Status FillValueJsonBinder::operator()(
     TENSORSTORE_RETURN_IF_ERROR(
         DecodeSingle(*j, dtype.fields[0].dtype, (*obj)[0]));
   } else {
-    if (!j->is_array()) {
-      return internal_json::ExpectedError(*j, "array");
-    }
-    if (j->size() != dtype.fields.size()) {
-      return internal_json::ExpectedError(
-          *j, tensorstore::StrCat("array of size ", dtype.fields.size()));
-    }
-    for (size_t i = 0; i < dtype.fields.size(); ++i) {
-      TENSORSTORE_RETURN_IF_ERROR(
-          DecodeSingle((*j)[i], dtype.fields[i].dtype, (*obj)[i]));
+    // For structured types, handle both array format and base64-encoded string
+    if (j->is_string()) {
+      // Decode base64-encoded fill value for entire struct
+      std::string b64_decoded;
+      if (!absl::Base64Unescape(j->get<std::string>(), &b64_decoded)) {
+        return absl::InvalidArgumentError(tensorstore::StrCat(
+            "Expected valid base64-encoded fill value, but received: ",
+            j->dump()));
+      }
+      // Verify size matches expected struct size
+      if (static_cast<Index>(b64_decoded.size()) !=
+          dtype.bytes_per_outer_element) {
+        return absl::InvalidArgumentError(tensorstore::StrCat(
+            "Expected ", dtype.bytes_per_outer_element,
+            " base64-encoded bytes for fill_value, but received ",
+            b64_decoded.size(), " bytes"));
+      }
+      // Extract per-field fill values from decoded bytes
+      for (size_t i = 0; i < dtype.fields.size(); ++i) {
+        const auto& field = dtype.fields[i];
+        auto arr = AllocateArray(span<const Index, 0>{}, c_order, default_init,
+                                 field.dtype);
+        std::memcpy(arr.data(), b64_decoded.data() + field.byte_offset,
+                    field.dtype->size);
+        (*obj)[i] = std::move(arr);
+      }
+    } else if (j->is_array()) {
+      if (j->size() != dtype.fields.size()) {
+        return internal_json::ExpectedError(
+            *j, tensorstore::StrCat("array of size ", dtype.fields.size()));
+      }
+      for (size_t i = 0; i < dtype.fields.size(); ++i) {
+        TENSORSTORE_RETURN_IF_ERROR(
+            DecodeSingle((*j)[i], dtype.fields[i].dtype, (*obj)[i]));
+      }
+    } else {
+      return internal_json::ExpectedError(*j,
+                                          "array or base64-encoded string");
     }
   }
   return absl::OkStatus();
@@ -561,28 +592,33 @@ std::string ZarrMetadata::GetCompatibilityKey() const {
 }
 
 absl::Status ValidateMetadata(ZarrMetadata& metadata) {
+  // Determine if this is a structured type with multiple fields
+  const bool is_structured =
+      metadata.data_type.fields.size() > 1 ||
+      (metadata.data_type.fields.size() == 1 &&
+       !metadata.data_type.fields[0].outer_shape.empty());
+
+  // Build the codec shape - for structured types, include bytes dimension
+  std::vector<Index> codec_shape(metadata.chunk_shape.begin(),
+                                 metadata.chunk_shape.end());
+  if (is_structured) {
+    codec_shape.push_back(metadata.data_type.bytes_per_outer_element);
+  }
+
   if (!metadata.codecs) {
     ArrayCodecResolveParameters decoded;
-    if (metadata.data_type.fields.size() == 1 &&
-        metadata.data_type.fields[0].outer_shape.empty()) {
+    if (!is_structured) {
       decoded.dtype = metadata.data_type.fields[0].dtype;
+      decoded.rank = metadata.rank;
     } else {
+      // For structured types, use byte dtype with extra dimension
       decoded.dtype = dtype_v<std::byte>;
-      // TODO: Verify this works for structured types.
-      // Zarr2 uses a "scalar" array concept with byte storage for chunks.
+      decoded.rank = metadata.rank + 1;
     }
-    decoded.rank = metadata.rank;
     // Fill value for codec resolve might be complex.
-    // Zarr3 codecs usually don't depend on fill value except for some like
-    // "sharding_indexed"? Sharding uses fill_value for missing chunks.
-    if (metadata.fill_value.size() == 1) {
+    // For structured types, create a byte fill value
+    if (metadata.fill_value.size() == 1 && !is_structured) {
       decoded.fill_value = metadata.fill_value[0];
-    } else {
-      // How to represent structured fill value for codec?
-      // Sharding expects a single array.
-      // If we use structured type, the "array" is bytes.
-      // We might need to encode the fill value to bytes.
-      // For now, leave empty if multiple fields.
     }
 
     BytesCodecResolveParameters encoded;
@@ -593,17 +629,19 @@ absl::Status ValidateMetadata(ZarrMetadata& metadata) {
 
   // Get codec chunk layout info.
   ArrayDataTypeAndShapeInfo array_info;
-  // array_info.dtype used here to validate codec compatibility.
-  if (metadata.data_type.fields.size() == 1 &&
-      metadata.data_type.fields[0].outer_shape.empty()) {
+  if (!is_structured) {
     array_info.dtype = metadata.data_type.fields[0].dtype;
+    array_info.rank = metadata.rank;
+    std::copy_n(metadata.chunk_shape.begin(), metadata.rank,
+                array_info.shape.emplace().begin());
   } else {
     array_info.dtype = dtype_v<std::byte>;
+    array_info.rank = metadata.rank + 1;
+    auto& shape = array_info.shape.emplace();
+    std::copy_n(metadata.chunk_shape.begin(), metadata.rank, shape.begin());
+    shape[metadata.rank] = metadata.data_type.bytes_per_outer_element;
   }
 
-  array_info.rank = metadata.rank;
-  std::copy_n(metadata.chunk_shape.begin(), metadata.rank,
-              array_info.shape.emplace().begin());
   ArrayCodecChunkLayoutInfo layout_info;
   TENSORSTORE_RETURN_IF_ERROR(
       metadata.codec_specs.GetDecodedChunkLayout(array_info, layout_info));
@@ -617,7 +655,7 @@ absl::Status ValidateMetadata(ZarrMetadata& metadata) {
   }
 
   TENSORSTORE_ASSIGN_OR_RETURN(metadata.codec_state,
-                               metadata.codecs->Prepare(metadata.chunk_shape));
+                               metadata.codecs->Prepare(codec_shape));
   return absl::OkStatus();
 }
 

From c2e73cd6b1a2dcd5499522dce0bacd378af43279 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 24 Nov 2025 22:57:22 +0000
Subject: [PATCH 03/59] Local testing and examples

---
 examples/BUILD                    |  23 +++
 examples/CMakeLists.txt           | 163 ++++++++++++++++++
 examples/read_structured_zarr3.cc | 271 ++++++++++++++++++++++++++++++
 3 files changed, 457 insertions(+)
 create mode 100644 examples/CMakeLists.txt
 create mode 100644 examples/read_structured_zarr3.cc

diff --git a/examples/BUILD b/examples/BUILD
index 94acdba14..4dcb2d604 100644
--- a/examples/BUILD
+++ b/examples/BUILD
@@ -122,3 +122,26 @@ tensorstore_cc_binary(
         "@riegeli//riegeli/bytes:writer",
     ],
 )
+
+tensorstore_cc_binary(
+    name = "read_structured_zarr3",
+    srcs = ["read_structured_zarr3.cc"],
+    deps = [
+        "//tensorstore",
+        "//tensorstore:array",
+        "//tensorstore:context",
+        "//tensorstore:data_type",
+        "//tensorstore:index",
+        "//tensorstore:open",
+        "//tensorstore:open_mode",
+        "//tensorstore:spec",
+        "//tensorstore/driver/zarr3",
+        "//tensorstore/kvstore/file",
+        "//tensorstore/util:result",
+        "//tensorstore/util:status",
+        "@abseil-cpp//absl/flags:flag",
+        "@abseil-cpp//absl/flags:parse",
+        "@abseil-cpp//absl/status",
+        "@nlohmann_json//:json",
+    ],
+)
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
new file mode 100644
index 000000000..92e9857fa
--- /dev/null
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,163 @@
+# Standalone CMakeLists.txt for read_structured_zarr3 example
+#
+# Build instructions:
+#   mkdir -p /home/ubuntu/source/tensorstore/examples/build
+#   cd /home/ubuntu/source/tensorstore/examples/build
+#   cmake ..
+#   make
+#
+# Run:
+#   ./read_structured_zarr3 --zarr_path=/home/ubuntu/source/tensorstore/filt_mig.mdio/headers
+
+cmake_minimum_required(VERSION 3.24)
+project(read_structured_zarr3 LANGUAGES CXX)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Path to the tensorstore build directory
+set(TENSORSTORE_BUILD_DIR "/home/ubuntu/source/tensorstore/build" CACHE PATH "Path to tensorstore build directory")
+set(TENSORSTORE_SOURCE_DIR "/home/ubuntu/source/tensorstore" CACHE PATH "Path to tensorstore source directory")
+set(DEPS_DIR "${TENSORSTORE_BUILD_DIR}/_deps")
+
+# Include paths (matching what tensorstore tests use)
+include_directories(
+    ${TENSORSTORE_SOURCE_DIR}
+    ${DEPS_DIR}/absl-src
+    ${DEPS_DIR}/re2-src
+    ${DEPS_DIR}/riegeli-src
+)
+
+include_directories(SYSTEM
+    ${DEPS_DIR}/half-build/include
+    ${DEPS_DIR}/half-src/include
+    ${DEPS_DIR}/nlohmann_json-build/include
+    ${DEPS_DIR}/nlohmann_json-src/include
+    ${TENSORSTORE_BUILD_DIR}
+)
+
+# Compiler flags
+add_compile_options(
+    -fPIE
+    -Wno-deprecated-declarations
+    -Wno-sign-compare
+    -Wno-unused-but-set-parameter
+    -Wno-maybe-uninitialized
+    -Wno-sequence-point
+    -Wno-unknown-warning-option
+    -Wno-stringop-overflow
+    -fsized-deallocation
+)
+
+# Find all the static libraries we need from the tensorstore build
+file(GLOB TENSORSTORE_LIBS "${TENSORSTORE_BUILD_DIR}/libtensorstore*.a")
+file(GLOB_RECURSE ABSEIL_LIBS "${DEPS_DIR}/absl-build/absl/*.a")
+file(GLOB_RECURSE RIEGELI_LIBS "${DEPS_DIR}/riegeli-build/*.a")
+
+# Additional dependency libraries - corrected paths
+file(GLOB_RECURSE BLOSC_LIBS "${DEPS_DIR}/blosc-build/*.a")
+file(GLOB_RECURSE ZSTD_LIBS "${DEPS_DIR}/zstd-build/*.a")
+file(GLOB_RECURSE RE2_LIBS "${DEPS_DIR}/re2-build/*.a")
+file(GLOB_RECURSE SNAPPY_LIBS "${DEPS_DIR}/snappy-build/*.a")
+file(GLOB_RECURSE BROTLI_LIBS "${DEPS_DIR}/brotli-build/*.a")
+file(GLOB_RECURSE LZ4_LIBS "${DEPS_DIR}/lz4-build/*.a")
+file(GLOB_RECURSE ZLIB_LIBS "${DEPS_DIR}/zlib-build/*.a")
+file(GLOB_RECURSE PROTOBUF_LIBS "${DEPS_DIR}/protobuf-build/*.a")
+file(GLOB_RECURSE GRPC_LIBS "${DEPS_DIR}/grpc-build/*.a")
+file(GLOB_RECURSE CARES_LIBS "${DEPS_DIR}/c-ares-build/*.a")
+file(GLOB_RECURSE SSL_LIBS "${DEPS_DIR}/boringssl-build/ssl/*.a")
+file(GLOB_RECURSE CRYPTO_LIBS "${DEPS_DIR}/boringssl-build/crypto/*.a")
+file(GLOB_RECURSE LIBLZMA_LIBS "${DEPS_DIR}/liblzma-build/*.a")
+file(GLOB_RECURSE BZIP2_LIBS "${DEPS_DIR}/bzip2-build/*.a")
+file(GLOB_RECURSE JPEG_LIBS "${DEPS_DIR}/jpeg-build/*.a")
+file(GLOB_RECURSE PNG_LIBS "${DEPS_DIR}/png-build/*.a")
+file(GLOB_RECURSE TIFF_LIBS "${DEPS_DIR}/tiff-build/*.a")
+file(GLOB_RECURSE AVIF_LIBS "${DEPS_DIR}/avif-build/*.a")
+file(GLOB_RECURSE AOM_LIBS "${DEPS_DIR}/aom-build/*.a")
+file(GLOB_RECURSE WEBP_LIBS "${DEPS_DIR}/webp-build/*.a")
+file(GLOB_RECURSE CURL_LIBS "${DEPS_DIR}/curl-build/*.a")
+
+# Create executable
+add_executable(read_structured_zarr3 read_structured_zarr3.cc)
+
+# Link libraries - use whole-archive for libraries that use static registration
+# These include drivers, codecs, kvstores, and context resource providers
+target_link_libraries(read_structured_zarr3 PRIVATE
+    # Force inclusion of libraries with static registrations
+    -Wl,--whole-archive
+    
+    # Context resource providers
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_internal_data_copy_concurrency_resource.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_internal_file_io_concurrency_resource.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_internal_cache_cache_pool_resource.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_internal_concurrency_resource.a
+    
+    # Zarr3 driver and codecs
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_driver.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_blosc.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_bytes.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_crc32c.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_gzip.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_transpose.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_zstd.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_sharding_indexed.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_codec_chain_spec.a
+    
+    # File kvstore and its resource providers
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_kvstore_file.a
+    ${TENSORSTORE_BUILD_DIR}/libtensorstore_kvstore_file_file_resource.a
+    
+    -Wl,--no-whole-archive
+    
+    -Wl,--start-group
+    
+    # Tensorstore libs
+    ${TENSORSTORE_LIBS}
+    
+    # Riegeli
+    ${RIEGELI_LIBS}
+    
+    # Abseil
+    ${ABSEIL_LIBS}
+    
+    # Compression libs
+    ${BLOSC_LIBS}
+    ${ZSTD_LIBS}
+    ${LZ4_LIBS}
+    ${SNAPPY_LIBS}
+    ${BROTLI_LIBS}
+    ${ZLIB_LIBS}
+    ${LIBLZMA_LIBS}
+    ${BZIP2_LIBS}
+    
+    # Regex
+    ${RE2_LIBS}
+    
+    # Protocol buffers and gRPC
+    ${PROTOBUF_LIBS}
+    ${GRPC_LIBS}
+    ${CARES_LIBS}
+    
+    # SSL/TLS
+    ${SSL_LIBS}
+    ${CRYPTO_LIBS}
+    
+    # Image libraries  
+    ${JPEG_LIBS}
+    ${PNG_LIBS}
+    ${TIFF_LIBS}
+    ${AVIF_LIBS}
+    ${AOM_LIBS}
+    ${WEBP_LIBS}
+    
+    # HTTP
+    ${CURL_LIBS}
+    
+    -Wl,--end-group
+    
+    # System libraries
+    pthread
+    dl
+    m
+    rt
+)
diff --git a/examples/read_structured_zarr3.cc b/examples/read_structured_zarr3.cc
new file mode 100644
index 000000000..1caacd8f5
--- /dev/null
+++ b/examples/read_structured_zarr3.cc
@@ -0,0 +1,271 @@
+// Copyright 2024 The TensorStore Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Standalone test for reading structured data from a Zarr v3 array.
+//
+// This test opens an existing zarr3 array with structured data type,
+// reads the "inline" field, and prints all values.
+//
+// Usage:
+//   bazel run //examples:read_structured_zarr3 -- /path/to/zarr/array
+//
+// Or with cmake:
+//   cd examples/build && ./read_structured_zarr3
+
+#include <stdint.h>
+
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <string>
+
+#include "absl/flags/flag.h"
+#include "absl/flags/parse.h"
+#include "absl/status/status.h"
+#include <nlohmann/json.hpp>
+#include "tensorstore/array.h"
+#include "tensorstore/context.h"
+#include "tensorstore/data_type.h"
+#include "tensorstore/index.h"
+#include "tensorstore/open.h"
+#include "tensorstore/open_mode.h"
+#include "tensorstore/spec.h"
+#include "tensorstore/tensorstore.h"
+#include "tensorstore/util/result.h"
+#include "tensorstore/util/status.h"
+
+ABSL_FLAG(std::string, zarr_path,
+          "/home/ubuntu/source/tensorstore/filt_mig.mdio/headers",
+          "Path to the zarr3 array directory");
+
+namespace {
+
+using ::tensorstore::Index;
+
+// Field layout from the zarr.json metadata:
+// The structured dtype has the following fields with their byte offsets:
+//   trace_seq_num_line: int32 @ 0
+//   trace_seq_num_reel: int32 @ 4
+//   ... (many more fields) ...
+//   inline: int32 @ 180
+//   crossline: int32 @ 184
+//   cdp_x: int32 @ 188
+//   cdp_y: int32 @ 192
+//
+// Total struct size: 196 bytes (matches blosc typesize)
+
+constexpr size_t kInlineFieldOffset = 180;
+constexpr size_t kStructSize = 196;
+
+// Read and parse the zarr.json metadata to display info about structured type
+void PrintZarrMetadata(const std::string& zarr_path) {
+  std::string metadata_path = zarr_path + "/zarr.json";
+  std::ifstream file(metadata_path);
+  if (!file.is_open()) {
+    std::cerr << "Could not open " << metadata_path << std::endl;
+    return;
+  }
+
+  nlohmann::json metadata;
+  try {
+    file >> metadata;
+  } catch (const nlohmann::json::parse_error& e) {
+    std::cerr << "Failed to parse zarr.json: " << e.what() << std::endl;
+    return;
+  }
+
+  std::cout << "\n=== Zarr Metadata ===" << std::endl;
+  std::cout << "Shape: " << metadata["shape"].dump() << std::endl;
+  std::cout << "Dimension names: " << metadata["dimension_names"].dump()
+            << std::endl;
+
+  if (metadata.contains("data_type")) {
+    auto& dt = metadata["data_type"];
+    std::cout << "\nData type format:" << std::endl;
+    if (dt.is_object()) {
+      std::cout << "  Type: object with name=\"" << dt["name"].get<std::string>()
+                << "\"" << std::endl;
+      if (dt.contains("configuration") &&
+          dt["configuration"].contains("fields")) {
+        auto& fields = dt["configuration"]["fields"];
+        std::cout << "  Number of fields: " << fields.size() << std::endl;
+        std::cout << "  Fields:" << std::endl;
+        size_t byte_offset = 0;
+        for (const auto& field : fields) {
+          std::string name = field[0].get<std::string>();
+          std::string type = field[1].get<std::string>();
+          size_t size = (type == "int32" || type == "uint32" || type == "float32")
+                            ? 4
+                            : 2;  // int16/uint16
+          std::cout << "    " << name << ": " << type << " @ byte " << byte_offset
+                    << std::endl;
+          byte_offset += size;
+        }
+        std::cout << "  Total struct size: " << byte_offset << " bytes"
+                  << std::endl;
+      }
+    } else if (dt.is_string()) {
+      std::cout << "  Type: simple \"" << dt.get<std::string>() << "\""
+                << std::endl;
+    } else if (dt.is_array()) {
+      std::cout << "  Type: array with " << dt.size() << " fields" << std::endl;
+    }
+  }
+
+  if (metadata.contains("codecs")) {
+    std::cout << "\nCodecs: " << metadata["codecs"].dump(2) << std::endl;
+  }
+}
+
+absl::Status Run(const std::string& zarr_path) {
+  std::cout << "=== Zarr v3 Structured Data Type Test ===" << std::endl;
+  std::cout << "Opening zarr3 array at: " << zarr_path << std::endl;
+
+  // First, display metadata information
+  PrintZarrMetadata(zarr_path);
+
+  auto context = tensorstore::Context::Default();
+
+  // Create spec for opening the zarr3 array
+  // Note: "field" is at the driver level, not inside kvstore (same as zarr v2)
+  ::nlohmann::json spec_json = {
+      {"driver", "zarr3"},
+      {"kvstore",
+       {
+           {"driver", "file"},
+           {"path", zarr_path + "/"},
+       }},
+      {"field", "inline"},  // Field at byte offset 180
+  };
+
+  std::cout << "\n=== Opening TensorStore ===" << std::endl;
+  std::cout << "Spec: " << spec_json.dump(2) << std::endl;
+
+  // Open the TensorStore
+  auto open_result =
+      tensorstore::Open(spec_json, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result();
+
+  if (!open_result.ok()) {
+    std::cout << "\n=== Open Failed ===" << std::endl;
+    std::cout << "Status: " << open_result.status() << std::endl;
+    std::cout << "\nThis error is expected if the zarr3 driver's dtype parsing\n"
+              << "does not yet support the extended structured data type format:\n"
+              << "  {\"name\": \"structured\", \"configuration\": {\"fields\": [...]}}\n"
+              << std::endl;
+    std::cout << "The dtype.cc ParseDTypeNoDerived() function currently handles:\n"
+              << "  1. String format: \"int32\"\n"
+              << "  2. Array format: [[\"field1\", \"int32\"], ...]\n"
+              << "\nBut the zarr.json uses the extended object format shown above."
+              << std::endl;
+    return open_result.status();
+  }
+
+  auto store = std::move(open_result).value();
+
+  // Get information about the array
+  auto domain = store.domain();
+  std::cout << "\n=== Array Info ===" << std::endl;
+  std::cout << "Domain: " << domain << std::endl;
+  std::cout << "Dtype: " << store.dtype() << std::endl;
+  std::cout << "Rank: " << store.rank() << std::endl;
+
+  auto shape = domain.shape();
+  std::cout << "Shape: [";
+  for (int i = 0; i < shape.size(); ++i) {
+    if (i > 0) std::cout << ", ";
+    std::cout << shape[i];
+  }
+  std::cout << "]" << std::endl;
+
+  // Read all data
+  std::cout << "\n=== Reading Data ===" << std::endl;
+  TENSORSTORE_ASSIGN_OR_RETURN(
+      auto array, tensorstore::Read<tensorstore::zero_origin>(store).result());
+
+  std::cout << "Read complete. Array size: " << array.num_elements()
+            << " elements" << std::endl;
+  std::cout << "Data type: " << array.dtype() << std::endl;
+
+  // Since field="inline" was specified, the array contains just int32 values
+  // directly - no struct extraction needed!
+  Index num_inline = shape[0];
+  Index num_crossline = shape[1];
+
+  std::cout << "\n=== Inline field values (shape: " << num_inline << " x "
+            << num_crossline << ") ===" << std::endl;
+
+  // Cast to int32 pointer since the data is already the inline field values
+  auto int_ptr = reinterpret_cast<const int32_t*>(array.data());
+
+  // Print first 10 rows (or fewer if less data)
+  Index rows_to_print = std::min(num_inline, Index{10});
+  Index cols_to_print = std::min(num_crossline, Index{10});
+
+  for (Index i = 0; i < rows_to_print; ++i) {
+    for (Index j = 0; j < cols_to_print; ++j) {
+      std::cout << int_ptr[i * num_crossline + j];
+      if (j < cols_to_print - 1) {
+        std::cout << "\t";
+      }
+    }
+    if (num_crossline > cols_to_print) {
+      std::cout << "\t...";
+    }
+    std::cout << std::endl;
+  }
+  if (num_inline > rows_to_print) {
+    std::cout << "... (" << (num_inline - rows_to_print) << " more rows)"
+              << std::endl;
+  }
+
+  std::cout << "\n=== Summary ===" << std::endl;
+  std::cout << "Successfully read " << (num_inline * num_crossline)
+            << " inline values" << std::endl;
+  
+  // Show some statistics
+  int32_t min_val = int_ptr[0], max_val = int_ptr[0];
+  int64_t sum = 0;
+  for (Index i = 0; i < num_inline * num_crossline; ++i) {
+    min_val = std::min(min_val, int_ptr[i]);
+    max_val = std::max(max_val, int_ptr[i]);
+    sum += int_ptr[i];
+  }
+  std::cout << "Min value: " << min_val << std::endl;
+  std::cout << "Max value: " << max_val << std::endl;
+  std::cout << "Mean value: " << (static_cast<double>(sum) / (num_inline * num_crossline)) << std::endl;
+
+  return absl::OkStatus();
+}
+
+}  // namespace
+
+int main(int argc, char** argv) {
+  absl::ParseCommandLine(argc, argv);
+
+  std::string zarr_path = absl::GetFlag(FLAGS_zarr_path);
+  if (zarr_path.empty()) {
+    std::cerr << "Error: --zarr_path is required" << std::endl;
+    return 1;
+  }
+
+  auto status = Run(zarr_path);
+  if (!status.ok()) {
+    std::cerr << "\nFinal status: " << status << std::endl;
+    return 1;
+  }
+
+  return 0;
+}

From 9e8ed947f5912394ca715d36d6fd1eb630d04e8a Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 25 Nov 2025 18:12:58 +0000
Subject: [PATCH 04/59] Begin adding support for opening struct arrays as void
 and add support for raw bits dtype

---
 examples/read_structured_zarr3.cc       | 324 +++++++++++++++++++-----
 tensorstore/driver/zarr3/chunk_cache.cc |   7 +
 tensorstore/driver/zarr3/driver.cc      | 180 +++++++++++--
 tensorstore/driver/zarr3/dtype.cc       |  52 +++-
 tensorstore/driver/zarr3/dtype_test.cc  |  14 +
 tensorstore/driver/zarr3/metadata.cc    |  89 ++++++-
 6 files changed, 565 insertions(+), 101 deletions(-)

diff --git a/examples/read_structured_zarr3.cc b/examples/read_structured_zarr3.cc
index 1caacd8f5..259eade34 100644
--- a/examples/read_structured_zarr3.cc
+++ b/examples/read_structured_zarr3.cc
@@ -12,16 +12,23 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// Standalone test for reading structured data from a Zarr v3 array.
+// Standalone test for reading structured data from Zarr v3 arrays.
 //
-// This test opens an existing zarr3 array with structured data type,
-// reads the "inline" field, and prints all values.
+// This test opens two Zarr v3 arrays:
+// 1. A structured array with named fields (headers/)
+// 2. A raw bytes array containing struct data (raw_headers/)
+//
+// Both arrays should contain the same data, allowing comparison of:
+// - Field-based access vs manual byte extraction
+// - Structured dtype parsing vs raw byte handling
 //
 // Usage:
-//   bazel run //examples:read_structured_zarr3 -- /path/to/zarr/array
+//   bazel run //examples:read_structured_zarr3 -- /path/to/parent/dir
 //
 // Or with cmake:
-//   cd examples/build && ./read_structured_zarr3
+//   cd examples/build && ./read_structured_zarr3 --zarr_path=/path/to/parent/dir
+//
+// Where the parent dir contains both 'headers/' and 'raw_headers/' subdirs.
 
 #include <stdint.h>
 
@@ -45,9 +52,15 @@
 #include "tensorstore/util/result.h"
 #include "tensorstore/util/status.h"
 
+// Internal headers for testing dtype parsing
+#include "tensorstore/driver/zarr3/dtype.h"
+
+// Additional headers for string operations
+#include "absl/strings/str_join.h"
+
 ABSL_FLAG(std::string, zarr_path,
-          "/home/ubuntu/source/tensorstore/filt_mig.mdio/headers",
-          "Path to the zarr3 array directory");
+          "/home/ubuntu/source/tensorstore/filt_mig.mdio",
+          "Path to the parent .mdio directory containing headers/ and raw_headers/");
 
 namespace {
 
@@ -128,56 +141,13 @@ void PrintZarrMetadata(const std::string& zarr_path) {
   }
 }
 
-absl::Status Run(const std::string& zarr_path) {
-  std::cout << "=== Zarr v3 Structured Data Type Test ===" << std::endl;
-  std::cout << "Opening zarr3 array at: " << zarr_path << std::endl;
-
-  // First, display metadata information
-  PrintZarrMetadata(zarr_path);
-
-  auto context = tensorstore::Context::Default();
-
-  // Create spec for opening the zarr3 array
-  // Note: "field" is at the driver level, not inside kvstore (same as zarr v2)
-  ::nlohmann::json spec_json = {
-      {"driver", "zarr3"},
-      {"kvstore",
-       {
-           {"driver", "file"},
-           {"path", zarr_path + "/"},
-       }},
-      {"field", "inline"},  // Field at byte offset 180
-  };
-
-  std::cout << "\n=== Opening TensorStore ===" << std::endl;
-  std::cout << "Spec: " << spec_json.dump(2) << std::endl;
-
-  // Open the TensorStore
-  auto open_result =
-      tensorstore::Open(spec_json, context, tensorstore::OpenMode::open,
-                        tensorstore::ReadWriteMode::read)
-          .result();
-
-  if (!open_result.ok()) {
-    std::cout << "\n=== Open Failed ===" << std::endl;
-    std::cout << "Status: " << open_result.status() << std::endl;
-    std::cout << "\nThis error is expected if the zarr3 driver's dtype parsing\n"
-              << "does not yet support the extended structured data type format:\n"
-              << "  {\"name\": \"structured\", \"configuration\": {\"fields\": [...]}}\n"
-              << std::endl;
-    std::cout << "The dtype.cc ParseDTypeNoDerived() function currently handles:\n"
-              << "  1. String format: \"int32\"\n"
-              << "  2. Array format: [[\"field1\", \"int32\"], ...]\n"
-              << "\nBut the zarr.json uses the extended object format shown above."
-              << std::endl;
-    return open_result.status();
-  }
-
-  auto store = std::move(open_result).value();
-
+// Helper function to read and display inline field from an array
+absl::Status ReadInlineField(const tensorstore::TensorStore<>& store,
+                           const std::string& array_name,
+                           bool is_raw_bytes = false) {
   // Get information about the array
   auto domain = store.domain();
-  std::cout << "\n=== Array Info ===" << std::endl;
+  std::cout << "\n=== " << array_name << " Array Info ===" << std::endl;
   std::cout << "Domain: " << domain << std::endl;
   std::cout << "Dtype: " << store.dtype() << std::endl;
   std::cout << "Rank: " << store.rank() << std::endl;
@@ -191,7 +161,7 @@ absl::Status Run(const std::string& zarr_path) {
   std::cout << "]" << std::endl;
 
   // Read all data
-  std::cout << "\n=== Reading Data ===" << std::endl;
+  std::cout << "\n=== Reading " << array_name << " Data ===" << std::endl;
   TENSORSTORE_ASSIGN_OR_RETURN(
       auto array, tensorstore::Read<tensorstore::zero_origin>(store).result());
 
@@ -199,16 +169,46 @@ absl::Status Run(const std::string& zarr_path) {
             << " elements" << std::endl;
   std::cout << "Data type: " << array.dtype() << std::endl;
 
-  // Since field="inline" was specified, the array contains just int32 values
-  // directly - no struct extraction needed!
-  Index num_inline = shape[0];
-  Index num_crossline = shape[1];
+  Index num_inline, num_crossline;
+  const int32_t* int_ptr;
+
+  if (is_raw_bytes) {
+    // For raw bytes, we need to extract the inline field manually
+    // Shape is [inline, crossline, struct_size]
+    num_inline = shape[0];
+    num_crossline = shape[1];
+    Index struct_size = shape[2];
+    if (struct_size != kStructSize) {
+      std::cout << "Warning: Raw struct size (" << struct_size
+                << ") differs from expected header struct size (" << kStructSize
+                << "). Assuming padding." << std::endl;
+    }
 
-  std::cout << "\n=== Inline field values (shape: " << num_inline << " x "
-            << num_crossline << ") ===" << std::endl;
+    // Extract inline field (4 bytes starting at offset 180)
+    auto byte_ptr = reinterpret_cast<const std::byte*>(array.data());
+    std::vector<int32_t> inline_values(num_inline * num_crossline);
 
-  // Cast to int32 pointer since the data is already the inline field values
-  auto int_ptr = reinterpret_cast<const int32_t*>(array.data());
+    for (Index i = 0; i < num_inline; ++i) {
+      for (Index j = 0; j < num_crossline; ++j) {
+        Index struct_offset = (i * num_crossline + j) * struct_size;
+        Index field_offset = struct_offset + kInlineFieldOffset;
+        std::memcpy(&inline_values[i * num_crossline + j],
+                   byte_ptr + field_offset, 4);
+      }
+    }
+
+    std::cout << "Extracted inline field from raw bytes at offset "
+              << kInlineFieldOffset << std::endl;
+    int_ptr = inline_values.data();
+  } else {
+    // For structured array, field access already gave us int32 values
+    num_inline = shape[0];
+    num_crossline = shape[1];
+    int_ptr = reinterpret_cast<const int32_t*>(array.data());
+  }
+
+  std::cout << "\n=== Inline field values from " << array_name
+            << " (shape: " << num_inline << " x " << num_crossline << ") ===" << std::endl;
 
   // Print first 10 rows (or fewer if less data)
   Index rows_to_print = std::min(num_inline, Index{10});
@@ -231,10 +231,10 @@ absl::Status Run(const std::string& zarr_path) {
               << std::endl;
   }
 
-  std::cout << "\n=== Summary ===" << std::endl;
+  std::cout << "\n=== " << array_name << " Summary ===" << std::endl;
   std::cout << "Successfully read " << (num_inline * num_crossline)
             << " inline values" << std::endl;
-  
+
   // Show some statistics
   int32_t min_val = int_ptr[0], max_val = int_ptr[0];
   int64_t sum = 0;
@@ -250,6 +250,189 @@ absl::Status Run(const std::string& zarr_path) {
   return absl::OkStatus();
 }
 
+absl::Status Run(const std::string& zarr_path) {
+  std::cout << "=== Zarr v3 Structured Data Type Test ===" << std::endl;
+  std::cout << "Opening zarr3 arrays in: " << zarr_path << std::endl;
+
+  auto context = tensorstore::Context::Default();
+
+  // First, display metadata information for structured array
+  std::string headers_path = zarr_path + "/headers";
+  PrintZarrMetadata(headers_path);
+
+  // Test raw_bytes parsing by reading and parsing the raw_headers zarr.json
+  std::cout << "\n" << std::string(60, '=') << std::endl;
+  std::cout << "TESTING RAW_BYTES PARSING" << std::endl;
+  std::cout << std::string(60, '=') << std::endl;
+
+  std::string raw_metadata_path = zarr_path + "/raw_headers/zarr.json";
+  std::ifstream raw_file(raw_metadata_path);
+  if (!raw_file.is_open()) {
+    std::cout << "Could not open " << raw_metadata_path << std::endl;
+    return absl::NotFoundError("Raw headers metadata not found");
+  }
+
+  nlohmann::json raw_metadata;
+  try {
+    raw_file >> raw_metadata;
+  } catch (const nlohmann::json::parse_error& e) {
+    std::cout << "Failed to parse raw zarr.json: " << e.what() << std::endl;
+    return absl::DataLossError("Invalid raw metadata JSON");
+  }
+
+  std::cout << "Raw headers data_type: " << raw_metadata["data_type"].dump(2) << std::endl;
+
+  // Test parsing the raw_bytes data type
+  std::cout << "Testing raw_bytes dtype parsing..." << std::endl;
+
+  // For now, just verify the JSON structure is what we expect
+  if (!raw_metadata.contains("data_type")) {
+    std::cout << "FAILED: No data_type in metadata" << std::endl;
+    return absl::NotFoundError("Missing data_type");
+  }
+
+  auto& dt = raw_metadata["data_type"];
+  if (!dt.is_object() || !dt.contains("name") || dt["name"] != "raw_bytes") {
+    std::cout << "FAILED: data_type is not raw_bytes extension" << std::endl;
+    return absl::InvalidArgumentError("Not raw_bytes extension");
+  }
+
+  if (!dt.contains("configuration") || !dt["configuration"].contains("length_bytes")) {
+    std::cout << "FAILED: Missing length_bytes in configuration" << std::endl;
+    return absl::InvalidArgumentError("Missing length_bytes");
+  }
+
+  int length_bytes = dt["configuration"]["length_bytes"];
+  std::cout << "SUCCESS: Found raw_bytes extension with length_bytes = " << length_bytes << std::endl;
+  std::cout << "This should parse to:" << std::endl;
+  std::cout << "  - Single field with byte_t dtype" << std::endl;
+  std::cout << "  - Field shape: [" << length_bytes << "]" << std::endl;
+  std::cout << "  - Bytes per outer element: " << length_bytes << std::endl;
+
+  // Now actually test the parsing implementation
+  std::cout << "\n=== Testing ParseDType Implementation ===" << std::endl;
+  auto dtype_result = tensorstore::internal_zarr3::ParseDType(dt);
+  if (!dtype_result.ok()) {
+    std::cout << "FAILED: Could not parse raw_bytes data type: " << dtype_result.status() << std::endl;
+    return dtype_result.status();
+  }
+
+  auto dtype = std::move(dtype_result).value();
+  std::cout << "SUCCESS: ParseDType worked!" << std::endl;
+  std::cout << "  Fields: " << dtype.fields.size() << std::endl;
+  std::cout << "  Has fields: " << dtype.has_fields << std::endl;
+  std::cout << "  Bytes per outer element: " << dtype.bytes_per_outer_element << std::endl;
+
+  if (!dtype.fields.empty()) {
+    const auto& field = dtype.fields[0];
+    std::cout << "  Field name: '" << field.name << "'" << std::endl;
+    std::cout << "  Field dtype: " << field.dtype << std::endl;
+    std::cout << "  Field shape: [" << absl::StrJoin(field.field_shape, ", ") << "]" << std::endl;
+    std::cout << "  Field num_inner_elements: " << field.num_inner_elements << std::endl;
+    std::cout << "  Field num_bytes: " << field.num_bytes << std::endl;
+  }
+
+  // Verify the parsing is correct
+  bool parsing_correct = true;
+  if (dtype.fields.size() != 1) {
+    std::cout << "ERROR: Expected 1 field, got " << dtype.fields.size() << std::endl;
+    parsing_correct = false;
+  }
+  if (dtype.fields[0].name != "") {
+    std::cout << "ERROR: Expected empty field name, got '" << dtype.fields[0].name << "'" << std::endl;
+    parsing_correct = false;
+  }
+  if (dtype.fields[0].dtype != tensorstore::dtype_v<tensorstore::dtypes::byte_t>) {
+    std::cout << "ERROR: Expected byte_t dtype, got " << dtype.fields[0].dtype << std::endl;
+    parsing_correct = false;
+  }
+  if (dtype.fields[0].field_shape != std::vector<Index>{length_bytes}) {
+    std::cout << "ERROR: Expected field shape [" << length_bytes << "], got ["
+              << absl::StrJoin(dtype.fields[0].field_shape, ", ") << "]" << std::endl;
+    parsing_correct = false;
+  }
+  if (dtype.bytes_per_outer_element != length_bytes) {
+    std::cout << "ERROR: Expected " << length_bytes << " bytes per element, got "
+              << dtype.bytes_per_outer_element << std::endl;
+    parsing_correct = false;
+  }
+
+  if (parsing_correct) {
+    std::cout << "\n✅ PARSING VERIFICATION: All checks passed!" << std::endl;
+    std::cout << "The raw_bytes extension is correctly parsed." << std::endl;
+  } else {
+    std::cout << "\n❌ PARSING VERIFICATION: Some checks failed!" << std::endl;
+    return absl::InternalError("Parsing verification failed");
+  }
+
+  // Test 1: Read from structured array using field access
+  std::cout << "\n" << std::string(60, '=') << std::endl;
+  std::cout << "TEST 1: Reading from structured 'headers' array" << std::endl;
+  std::cout << std::string(60, '=') << std::endl;
+
+  ::nlohmann::json headers_spec = ::nlohmann::json::object();
+  headers_spec["driver"] = "zarr3";
+  headers_spec["kvstore"] = ::nlohmann::json::object();
+  headers_spec["kvstore"]["driver"] = "file";
+  headers_spec["kvstore"]["path"] = headers_path + "/";
+  headers_spec["field"] = "inline";  // Extract inline field (int32 at byte offset 180)
+
+  std::cout << "Spec: " << headers_spec.dump(2) << std::endl;
+
+  auto headers_open_result =
+      tensorstore::Open(headers_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result();
+
+  if (!headers_open_result.ok()) {
+    std::cout << "\n=== Headers Open Failed ===" << std::endl;
+    std::cout << "Status: " << headers_open_result.status() << std::endl;
+    return headers_open_result.status();
+  }
+
+  auto headers_store = std::move(headers_open_result).value();
+  TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(headers_store, "headers"));
+
+  // Test 2: Read from raw bytes array (no special void access needed)
+  std::cout << "\n" << std::string(60, '=') << std::endl;
+  std::cout << "TEST 2: Reading from raw 'raw_headers' array" << std::endl;
+  std::cout << std::string(60, '=') << std::endl;
+
+  std::string raw_headers_path = zarr_path + "/raw_headers";
+  ::nlohmann::json raw_spec = ::nlohmann::json::object();
+  raw_spec["driver"] = "zarr3";
+  raw_spec["kvstore"] = ::nlohmann::json::object();
+  raw_spec["kvstore"]["driver"] = "file";
+  raw_spec["kvstore"]["path"] = raw_headers_path + "/";
+  // No field specified - raw_bytes has a single anonymous field
+
+  std::cout << "Spec: " << raw_spec.dump(2) << std::endl;
+
+  auto raw_open_result =
+      tensorstore::Open(raw_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result();
+
+  if (!raw_open_result.ok()) {
+    std::cout << "\n=== Raw Headers Open Failed ===" << std::endl;
+    std::cout << "Status: " << raw_open_result.status() << std::endl;
+    return raw_open_result.status();
+  }
+
+  auto raw_store = std::move(raw_open_result).value();
+  TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(raw_store, "raw_headers", /*is_raw_bytes=*/true));
+
+  std::cout << "\n" << std::string(60, '=') << std::endl;
+  std::cout << "COMPARISON: Both methods should give identical inline field values" << std::endl;
+  std::cout << std::string(60, '=') << std::endl;
+  std::cout << "The structured 'headers' array provides field access convenience,\n"
+            << "while the raw 'raw_headers' array provides direct byte access.\n"
+            << "Both extract the inline field from byte offset " << kInlineFieldOffset
+            << " in " << kStructSize << "-byte structs." << std::endl;
+
+  return absl::OkStatus();
+}
+
 }  // namespace
 
 int main(int argc, char** argv) {
@@ -261,6 +444,15 @@ int main(int argc, char** argv) {
     return 1;
   }
 
+  // Verify the path structure
+  std::string headers_path = zarr_path + "/headers";
+  std::string raw_headers_path = zarr_path + "/raw_headers";
+
+  std::cout << "Expecting arrays at:" << std::endl;
+  std::cout << "  Structured: " << headers_path << std::endl;
+  std::cout << "  Raw bytes:  " << raw_headers_path << std::endl;
+  std::cout << std::endl;
+
   auto status = Run(zarr_path);
   if (!status.ok()) {
     std::cerr << "\nFinal status: " << status << std::endl;
diff --git a/tensorstore/driver/zarr3/chunk_cache.cc b/tensorstore/driver/zarr3/chunk_cache.cc
index 6bfa8c039..64b6d69fd 100644
--- a/tensorstore/driver/zarr3/chunk_cache.cc
+++ b/tensorstore/driver/zarr3/chunk_cache.cc
@@ -156,6 +156,13 @@ ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
   const size_t num_fields = dtype_.fields.size();
   absl::InlinedVector<SharedArray<const void>, 1> field_arrays(num_fields);
 
+  // Special case: void access - return raw bytes directly
+  if (num_fields == 1 && dtype_.fields[0].name == "<void>") {
+    TENSORSTORE_ASSIGN_OR_RETURN(
+        field_arrays[0], codec_state_->DecodeArray(grid().components[0].shape(),
+                                                   std::move(data)));
+    return field_arrays;
+  }
 
   // For single non-structured field, decode directly
   if (num_fields == 1 && dtype_.fields[0].outer_shape.empty()) {
diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index 1674a1c6d..b4d96da1f 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -19,6 +19,7 @@
 #include <algorithm>
 #include <cassert>
 #include <memory>
+#include <numeric>
 #include <string>
 #include <string_view>
 #include <utility>
@@ -79,6 +80,8 @@
 namespace tensorstore {
 namespace internal_zarr3 {
 
+constexpr size_t kVoidFieldIndex = size_t(-1);
+
 // Avoid anonymous namespace to workaround MSVC bug.
 //
 // https://developercommunity.visualstudio.com/t/Bug-involving-virtual-functions-templat/10424129
@@ -263,12 +266,29 @@ class DataCacheBase
                           DimensionSet& implicit_lower_bounds,
                           DimensionSet& implicit_upper_bounds) override {
     const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
-    assert(bounds.rank() == static_cast<DimensionIndex>(metadata.shape.size()));
-    std::fill(bounds.origin().begin(), bounds.origin().end(), Index(0));
+    assert(bounds.rank() >= static_cast<DimensionIndex>(metadata.shape.size()));
+    std::fill(bounds.origin().begin(),
+              bounds.origin().begin() + metadata.shape.size(), Index(0));
     std::copy(metadata.shape.begin(), metadata.shape.end(),
               bounds.shape().begin());
     implicit_lower_bounds = false;
-    implicit_upper_bounds = true;
+    implicit_upper_bounds = false;
+    for (DimensionIndex i = 0;
+         i < static_cast<DimensionIndex>(metadata.shape.size()); ++i) {
+      implicit_upper_bounds[i] = true;
+    }
+    if (bounds.rank() > static_cast<DimensionIndex>(metadata.shape.size()) &&
+        metadata.data_type.fields.size() == 1) {
+      const auto& field = metadata.data_type.fields[0];
+      if (static_cast<DimensionIndex>(metadata.shape.size() +
+                                      field.field_shape.size()) ==
+          bounds.rank()) {
+        for (size_t i = 0; i < field.field_shape.size(); ++i) {
+          bounds.shape()[metadata.shape.size() + i] = field.field_shape[i];
+          bounds.origin()[metadata.shape.size() + i] = 0;
+        }
+      }
+    }
   }
 
   Result<std::shared_ptr<const void>> GetResizedMetadata(
@@ -289,10 +309,47 @@ class DataCacheBase
   }
 
   static internal::ChunkGridSpecification GetChunkGridSpecification(
-      const ZarrMetadata& metadata) {
+      const ZarrMetadata& metadata, size_t field_index = 0) {
     assert(!metadata.fill_value.empty());
     internal::ChunkGridSpecification::ComponentList components;
 
+    // Special case: void access - create single component for entire struct
+    if (field_index == kVoidFieldIndex) {
+      // For void access, use the fill_value from the single raw_bytes field
+      auto& fill_value = metadata.fill_value[0];
+      std::cout << "[DEBUG] Void access fill_value: shape=" << fill_value.shape()
+                << ", dtype=" << fill_value.dtype() << std::endl;
+
+      // Broadcast to shape [unbounded, unbounded, ..., struct_size]
+      std::vector<Index> target_shape(metadata.rank, kInfIndex);
+      target_shape.push_back(metadata.data_type.bytes_per_outer_element);
+      std::cout << "[DEBUG] Void access target_shape: [";
+      for (size_t i = 0; i < target_shape.size(); ++i) {
+        if (i > 0) std::cout << ", ";
+        std::cout << target_shape[i];
+      }
+      std::cout << "]" << std::endl;
+      auto chunk_fill_value =
+          BroadcastArray(fill_value, BoxView<>(target_shape)).value();
+
+      // Add extra dimension for struct size in bytes
+      std::vector<Index> chunk_shape_with_bytes = metadata.chunk_shape;
+      chunk_shape_with_bytes.push_back(metadata.data_type.bytes_per_outer_element);
+
+      auto& component = components.emplace_back(
+          internal::AsyncWriteArray::Spec{
+              std::move(chunk_fill_value),
+              // Since all dimensions are resizable, just
+              // specify unbounded `valid_data_bounds`.
+              Box<>(metadata.rank + 1),
+              ContiguousLayoutPermutation<>(
+                  span(metadata.inner_order.data(), metadata.rank + 1))},
+          chunk_shape_with_bytes);
+      component.array_spec.fill_value_comparison_kind =
+          EqualityComparisonKind::identical;
+      return internal::ChunkGridSpecification(std::move(components));
+    }
+
     // Create one component per field (like zarr v2)
     for (size_t field_i = 0; field_i < metadata.data_type.fields.size();
          ++field_i) {
@@ -303,18 +360,47 @@ class DataCacheBase
         fill_value = AllocateArray(span<const Index, 0>{}, c_order, value_init,
                                    field.dtype);
       }
+
+      // Handle fields with shape (e.g. raw_bytes)
+      const size_t field_rank = field.field_shape.size();
+
+      // 1. Construct target shape for broadcasting
+      std::vector<Index> target_shape(metadata.rank, kInfIndex);
+      target_shape.insert(target_shape.end(), field.field_shape.begin(),
+                          field.field_shape.end());
+
       auto chunk_fill_value =
-          BroadcastArray(fill_value, BoxView<>(metadata.rank)).value();
+          BroadcastArray(fill_value, BoxView<>(target_shape)).value();
+
+      // 2. Construct component chunk shape
+      std::vector<Index> component_chunk_shape = metadata.chunk_shape;
+      component_chunk_shape.insert(component_chunk_shape.end(),
+                                   field.field_shape.begin(),
+                                   field.field_shape.end());
+
+      // 3. Construct permutation
+      std::vector<DimensionIndex> component_permutation(metadata.rank +
+                                                        field_rank);
+      std::copy_n(metadata.inner_order.data(), metadata.rank,
+                  component_permutation.begin());
+      std::iota(component_permutation.begin() + metadata.rank,
+                component_permutation.end(), metadata.rank);
+
+      // 4. Construct bounds
+      Box<> valid_data_bounds(metadata.rank + field_rank);
+      for (size_t i = 0; i < field_rank; ++i) {
+        valid_data_bounds[metadata.rank + i] =
+            IndexInterval::UncheckedSized(0, field.field_shape[i]);
+      }
 
       auto& component = components.emplace_back(
           internal::AsyncWriteArray::Spec{
               std::move(chunk_fill_value),
               // Since all dimensions are resizable, just
               // specify unbounded `valid_data_bounds`.
-              Box<>(metadata.rank),
-              ContiguousLayoutPermutation<>(
-                  span(metadata.inner_order.data(), metadata.rank))},
-          metadata.chunk_shape);
+              std::move(valid_data_bounds),
+              ContiguousLayoutPermutation<>(component_permutation)},
+          component_chunk_shape);
       component.array_spec.fill_value_comparison_kind =
           EqualityComparisonKind::identical;
     }
@@ -342,7 +428,7 @@ class DataCacheBase
         [](std::string& out, DimensionIndex dim, Index grid_index) {
           absl::StrAppend(&out, grid_index);
         },
-        rank, grid_indices);
+        rank, grid_indices.subspan(0, rank));
     return key;
   }
 
@@ -355,17 +441,21 @@ class DataCacheBase
         key_prefix_.size() +
         (metadata.chunk_key_encoding.kind == ChunkKeyEncoding::kDefault ? 2
                                                                         : 0));
-    return internal::ParseGridIndexKeyWithDimensionSeparator(
-        metadata.chunk_key_encoding.separator,
-        [](std::string_view part, DimensionIndex dim, Index& grid_index) {
-          if (part.empty() || !absl::ascii_isdigit(part.front()) ||
-              !absl::ascii_isdigit(part.back()) ||
-              !absl::SimpleAtoi(part, &grid_index)) {
-            return false;
-          }
-          return true;
-        },
-        key, grid_indices);
+    if (!internal::ParseGridIndexKeyWithDimensionSeparator(
+            metadata.chunk_key_encoding.separator,
+            [](std::string_view part, DimensionIndex dim, Index& grid_index) {
+              if (part.empty() || !absl::ascii_isdigit(part.front()) ||
+                  !absl::ascii_isdigit(part.back()) ||
+                  !absl::SimpleAtoi(part, &grid_index)) {
+                return false;
+              }
+              return true;
+            },
+            key, grid_indices.subspan(0, metadata.rank))) {
+      return false;
+    }
+    std::fill(grid_indices.begin() + metadata.rank, grid_indices.end(), 0);
+    return true;
   }
 
   Index MinGridIndexForLexicographicalOrder(
@@ -378,7 +468,7 @@ class DataCacheBase
         *static_cast<const ZarrMetadata*>(initial_metadata().get());
     if (metadata.chunk_key_encoding.kind == ChunkKeyEncoding::kDefault) {
       std::string key = tensorstore::StrCat(key_prefix_, "c");
-      for (DimensionIndex i = 0; i < cell_indices.size(); ++i) {
+      for (DimensionIndex i = 0; i < metadata.rank; ++i) {
         tensorstore::StrAppend(
             &key, std::string_view(&metadata.chunk_key_encoding.separator, 1),
             cell_indices[i]);
@@ -388,7 +478,7 @@ class DataCacheBase
     // Use "0" for rank 0 as a special case.
     std::string key = tensorstore::StrCat(
         key_prefix_, cell_indices.empty() ? 0 : cell_indices[0]);
-    for (DimensionIndex i = 1; i < cell_indices.size(); ++i) {
+    for (DimensionIndex i = 1; i < metadata.rank; ++i) {
       tensorstore::StrAppend(
           &key, std::string_view(&metadata.chunk_key_encoding.separator, 1),
           cell_indices[i]);
@@ -400,7 +490,11 @@ class DataCacheBase
       const void* metadata_ptr, size_t component_index) override {
     // component_index corresponds to the selected field index
     const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
+    const auto& field = metadata.data_type.fields[component_index];
     const DimensionIndex rank = metadata.rank;
+    const DimensionIndex field_rank = field.field_shape.size();
+    const DimensionIndex total_rank = rank + field_rank;
+
     std::string_view normalized_dimension_names[kMaxRank];
     for (DimensionIndex i = 0; i < rank; ++i) {
       if (const auto& name = metadata.dimension_names[i]; name.has_value()) {
@@ -408,11 +502,20 @@ class DataCacheBase
       }
     }
     auto builder =
-        tensorstore::IndexTransformBuilder<>(rank, rank)
-            .input_shape(metadata.shape)
-            .input_labels(span(&normalized_dimension_names[0], rank));
-    builder.implicit_upper_bounds(true);
+        tensorstore::IndexTransformBuilder<>(total_rank, total_rank);
+    std::vector<Index> full_shape = metadata.shape;
+    full_shape.insert(full_shape.end(), field.field_shape.begin(),
+                      field.field_shape.end());
+    builder.input_shape(full_shape);
+    builder.input_labels(span(&normalized_dimension_names[0], total_rank));
+
+    DimensionSet implicit_upper_bounds(false);
     for (DimensionIndex i = 0; i < rank; ++i) {
+      implicit_upper_bounds[i] = true;
+    }
+    builder.implicit_upper_bounds(implicit_upper_bounds);
+
+    for (DimensionIndex i = 0; i < total_rank; ++i) {
       builder.output_single_input_dimension(i, i);
     }
     return builder.Finalize();
@@ -643,9 +746,26 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
       DataCacheInitializer&& initializer) override {
     const auto& metadata =
         *static_cast<const ZarrMetadata*>(initializer.metadata.get());
+    // For void access, modify the dtype to indicate special handling
+    ZarrDType dtype = metadata.data_type;
+    if (spec().selected_field == "<void>") {
+      // Create a synthetic dtype for void access
+      dtype = ZarrDType{
+          /*.has_fields=*/false,
+          /*.fields=*/{ZarrDType::Field{
+              ZarrDType::BaseDType{"<void>", dtype_v<tensorstore::dtypes::byte_t>,
+                                    {metadata.data_type.bytes_per_outer_element}},
+              /*.outer_shape=*/{},
+              /*.name=*/"<void>",
+              /*.field_shape=*/{metadata.data_type.bytes_per_outer_element},
+              /*.num_inner_elements=*/metadata.data_type.bytes_per_outer_element,
+              /*.byte_offset=*/0,
+              /*.num_bytes=*/metadata.data_type.bytes_per_outer_element}},
+          /*.bytes_per_outer_element=*/metadata.data_type.bytes_per_outer_element};
+    }
     return internal_zarr3::MakeZarrChunkCache<DataCacheBase, ZarrDataCache>(
         *metadata.codecs, std::move(initializer), spec().store.path,
-        metadata.codec_state, metadata.data_type,
+        metadata.codec_state, dtype,
         /*data_cache_pool=*/*cache_pool());
   }
 
@@ -657,6 +777,10 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
     TENSORSTORE_ASSIGN_OR_RETURN(
         auto field_index,
         GetFieldIndex(metadata.data_type, spec().selected_field));
+    // For void access, map to component index 0
+    if (field_index == kVoidFieldIndex) {
+      field_index = 0;
+    }
     TENSORSTORE_RETURN_IF_ERROR(
         ValidateMetadataSchema(metadata, field_index, spec().schema));
     return field_index;
diff --git a/tensorstore/driver/zarr3/dtype.cc b/tensorstore/driver/zarr3/dtype.cc
index 281b9c98b..116712d70 100644
--- a/tensorstore/driver/zarr3/dtype.cc
+++ b/tensorstore/driver/zarr3/dtype.cc
@@ -19,6 +19,7 @@
 #include <string>
 
 #include "absl/base/optimization.h"
+#include "absl/strings/ascii.h"
 #include "tensorstore/data_type.h"
 #include "tensorstore/internal/json_binding/json_binding.h"
 #include "tensorstore/util/endian.h"
@@ -57,9 +58,26 @@ Result<ZarrDType::BaseDType> ParseBaseDType(std::string_view dtype) {
   if (dtype == "complex128")
     return make_dtype(dtype_v<::tensorstore::dtypes::complex128_t>);
 
+  // Handle r<N> raw bits type where N is number of bits (must be multiple of 8)
+  if (dtype.size() > 1 && dtype[0] == 'r' && absl::ascii_isdigit(dtype[1])) {
+    std::string_view suffix = dtype.substr(1);
+    Index num_bits = 0;
+    if (!absl::SimpleAtoi(suffix, &num_bits) ||
+        num_bits == 0 ||
+        num_bits % 8 != 0) {
+      return absl::InvalidArgumentError(tensorstore::StrCat(
+          dtype, " data type is invalid; expected r<N> where N is a positive "
+                 "multiple of 8"));
+    }
+    Index num_bytes = num_bits / 8;
+    return ZarrDType::BaseDType{std::string(dtype),
+                                 dtype_v<::tensorstore::dtypes::byte_t>,
+                                 {num_bytes}};
+  }
+
   constexpr std::string_view kSupported =
       "bool, uint8, uint16, uint32, uint64, int8, int16, int32, int64, "
-      "bfloat16, float16, float32, float64, complex64, complex128";
+      "bfloat16, float16, float32, float64, complex64, complex128, r<N>";
   return absl::InvalidArgumentError(
       tensorstore::StrCat(dtype, " data type is not one of the supported "
                                  "data types: ",
@@ -162,6 +180,34 @@ Result<ZarrDType> ParseDTypeNoDerived(const nlohmann::json& value) {
         TENSORSTORE_RETURN_IF_ERROR(ParseFieldsArray(config["fields"], out));
         return out;
       }
+      if (type_name == "raw_bytes") {
+        const auto& config = value["configuration"];
+        if (!config.is_object() || !config.contains("length_bytes")) {
+          return absl::InvalidArgumentError(
+              "raw_bytes data type requires 'configuration' object with "
+              "'length_bytes' field");
+        }
+        Index length_bytes;
+        TENSORSTORE_RETURN_IF_ERROR(
+            internal_json::JsonRequireValueAs(config["length_bytes"], &length_bytes));
+        if (length_bytes <= 0) {
+          return absl::InvalidArgumentError(
+              "raw_bytes length_bytes must be positive");
+        }
+        out.has_fields = false;
+        out.fields.resize(1);
+        out.fields[0].encoded_dtype = "raw_bytes";
+        out.fields[0].dtype = dtype_v<tensorstore::dtypes::byte_t>;
+        out.fields[0].flexible_shape = {length_bytes};
+        out.fields[0].outer_shape = {};
+        out.fields[0].name = "";
+        out.fields[0].field_shape = {length_bytes};
+        out.fields[0].num_inner_elements = length_bytes;
+        out.fields[0].byte_offset = 0;
+        out.fields[0].num_bytes = length_bytes;
+        out.bytes_per_outer_element = length_bytes;
+        return out;
+      }
       // For other named types, try to parse as a base dtype
       out.has_fields = false;
       out.fields.resize(1);
@@ -326,6 +372,10 @@ Result<ZarrDType::BaseDType> ChooseBaseDType(DataType dtype) {
     return MakeBaseDType("complex64", dtype);
   if (dtype == dtype_v<::tensorstore::dtypes::complex128_t>)
     return MakeBaseDType("complex128", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::byte_t>)
+    return MakeBaseDType("r8", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::char_t>)
+    return MakeBaseDType("r8", dtype);
   return absl::InvalidArgumentError(
       tensorstore::StrCat("Data type not supported: ", dtype));
 }
diff --git a/tensorstore/driver/zarr3/dtype_test.cc b/tensorstore/driver/zarr3/dtype_test.cc
index cbb7acbfb..e1c5b444c 100644
--- a/tensorstore/driver/zarr3/dtype_test.cc
+++ b/tensorstore/driver/zarr3/dtype_test.cc
@@ -68,6 +68,9 @@ TEST(ParseBaseDType, Success) {
   CheckBaseDType("float64", dtype_v<tensorstore::dtypes::float64_t>, {});
   CheckBaseDType("complex64", dtype_v<tensorstore::dtypes::complex64_t>, {});
   CheckBaseDType("complex128", dtype_v<tensorstore::dtypes::complex128_t>, {});
+  CheckBaseDType("r8", dtype_v<tensorstore::dtypes::byte_t>, {1});
+  CheckBaseDType("r16", dtype_v<tensorstore::dtypes::byte_t>, {2});
+  CheckBaseDType("r64", dtype_v<tensorstore::dtypes::byte_t>, {8});
 }
 
 TEST(ParseBaseDType, Failure) {
@@ -81,6 +84,15 @@ TEST(ParseBaseDType, Failure) {
               StatusIs(absl::StatusCode::kInvalidArgument));
   EXPECT_THAT(ParseBaseDType("<i4"),
               StatusIs(absl::StatusCode::kInvalidArgument));
+  EXPECT_THAT(ParseBaseDType("r"),
+              StatusIs(absl::StatusCode::kInvalidArgument,
+                       HasSubstr("data type is invalid; expected r<N>")));
+  EXPECT_THAT(ParseBaseDType("r7"),
+              StatusIs(absl::StatusCode::kInvalidArgument,
+                       HasSubstr("data type is invalid; expected r<N>")));
+  EXPECT_THAT(ParseBaseDType("r0"),
+              StatusIs(absl::StatusCode::kInvalidArgument,
+                       HasSubstr("data type is invalid; expected r<N>")));
 }
 
 void CheckDType(const ::nlohmann::json& json, const ZarrDType& expected) {
@@ -266,6 +278,8 @@ TEST(ChooseBaseDTypeTest, RoundTrip) {
       dtype_v<tensorstore::dtypes::float64_t>,
       dtype_v<tensorstore::dtypes::complex64_t>,
       dtype_v<tensorstore::dtypes::complex128_t>,
+      dtype_v<tensorstore::dtypes::byte_t>,
+      dtype_v<tensorstore::dtypes::char_t>,
   };
   for (auto dtype : kSupportedDataTypes) {
     SCOPED_TRACE(tensorstore::StrCat("dtype=", dtype));
diff --git a/tensorstore/driver/zarr3/metadata.cc b/tensorstore/driver/zarr3/metadata.cc
index 880991e8c..6a83cdbec 100644
--- a/tensorstore/driver/zarr3/metadata.cc
+++ b/tensorstore/driver/zarr3/metadata.cc
@@ -250,6 +250,10 @@ constexpr std::array<FillValueDataTypeFunctions, kNumDataTypeIds>
       FillValueDataTypeFunctions::Make<::tensorstore::dtypes::T>(); \
   /**/
       TENSORSTORE_ZARR3_FOR_EACH_DATA_TYPE(TENSORSTORE_INTERNAL_DO_DEF)
+      // Add char_t support for string data types
+      functions[static_cast<size_t>(DataTypeId::char_t)] =
+          FillValueDataTypeFunctions::Make<::tensorstore::dtypes::char_t>();
+      // byte_t is handled specially to use uint8_t functions
 #undef TENSORSTORE_INTERNAL_DO_DEF
       return functions;
     }();
@@ -282,8 +286,39 @@ absl::Status FillValueJsonBinder::operator()(
     std::vector<SharedArray<const void>>* obj, ::nlohmann::json* j) const {
   obj->resize(dtype.fields.size());
   if (dtype.fields.size() == 1) {
-    TENSORSTORE_RETURN_IF_ERROR(
-        DecodeSingle(*j, dtype.fields[0].dtype, (*obj)[0]));
+    // Special case: raw_bytes (single field with byte_t and flexible shape)
+    if (dtype.fields[0].dtype.id() == DataTypeId::byte_t &&
+        !dtype.fields[0].flexible_shape.empty()) {
+      // Handle base64-encoded fill value for raw_bytes
+      if (!j->is_string()) {
+        return absl::InvalidArgumentError(
+            "Expected base64-encoded string for raw_bytes fill_value");
+      }
+      std::string b64_decoded;
+      if (!absl::Base64Unescape(j->get<std::string>(), &b64_decoded)) {
+        return absl::InvalidArgumentError(tensorstore::StrCat(
+            "Expected valid base64-encoded fill value, but received: ",
+            j->dump()));
+      }
+      // Verify size matches expected byte array size
+      Index expected_size = dtype.fields[0].num_inner_elements;
+      if (static_cast<Index>(b64_decoded.size()) != expected_size) {
+        return absl::InvalidArgumentError(tensorstore::StrCat(
+            "Expected ", expected_size,
+            " base64-encoded bytes for fill_value, but received ",
+            b64_decoded.size(), " bytes"));
+      }
+      // Create fill value array
+      auto fill_arr = AllocateArray(dtype.fields[0].field_shape, c_order,
+                                   default_init, dtype.fields[0].dtype);
+      std::memcpy(fill_arr.data(), b64_decoded.data(), b64_decoded.size());
+      std::cout << "[DEBUG] Raw bytes fill_value parsed: shape=" << fill_arr.shape()
+                << ", dtype=" << dtype.fields[0].dtype << std::endl;
+      (*obj)[0] = std::move(fill_arr);
+    } else {
+      TENSORSTORE_RETURN_IF_ERROR(
+          DecodeSingle(*j, dtype.fields[0].dtype, (*obj)[0]));
+    }
   } else {
     // For structured types, handle both array format and base64-encoded string
     if (j->is_string()) {
@@ -361,8 +396,14 @@ absl::Status FillValueJsonBinder::DecodeSingle(::nlohmann::json& j,
       AllocateArray(span<const Index, 0>{}, c_order, default_init, data_type);
   void* data = arr.data();
   out = std::move(arr);
+  // Special handling for byte_t: use uint8_t functions since they're binary compatible
+  auto type_id = data_type.id();
+  if (type_id == DataTypeId::byte_t) {
+    type_id = DataTypeId::uint8_t;
+  }
+
   const auto& functions =
-      kFillValueDataTypeFunctions[static_cast<size_t>(data_type.id())];
+      kFillValueDataTypeFunctions[static_cast<size_t>(type_id)];
   if (!functions.decode) {
     if (allow_missing_dtype) {
       out = SharedArray<const void>();
@@ -381,8 +422,14 @@ absl::Status FillValueJsonBinder::EncodeSingle(
     return absl::InvalidArgumentError(
         "data_type must be specified before fill_value");
   }
+  // Special handling for byte_t: use uint8_t functions since they're binary compatible
+  auto type_id = data_type.id();
+  if (type_id == DataTypeId::byte_t) {
+    type_id = DataTypeId::uint8_t;
+  }
+
   const auto& functions =
-      kFillValueDataTypeFunctions[static_cast<size_t>(data_type.id())];
+      kFillValueDataTypeFunctions[static_cast<size_t>(type_id)];
   if (!functions.encode) {
     return absl::FailedPreconditionError(
         "fill_value unsupported for specified data_type");
@@ -751,8 +798,19 @@ std::string GetFieldNames(const ZarrDType& dtype) {
 }
 }  // namespace
 
+constexpr size_t kVoidFieldIndex = size_t(-1);
+
 Result<size_t> GetFieldIndex(const ZarrDType& dtype,
                              std::string_view selected_field) {
+  // Special case: "<void>" requests raw byte access (works for any dtype)
+  if (selected_field == "<void>") {
+    if (dtype.fields.empty()) {
+      return absl::FailedPreconditionError(
+          "Requested field \"<void>\" but dtype has no fields");
+    }
+    return kVoidFieldIndex;
+  }
+
   if (selected_field.empty()) {
     if (dtype.fields.size() != 1) {
       return absl::FailedPreconditionError(tensorstore::StrCat(
@@ -779,6 +837,9 @@ SpecRankAndFieldInfo GetSpecRankAndFieldInfo(const ZarrMetadata& metadata,
   SpecRankAndFieldInfo info;
   info.chunked_rank = metadata.rank;
   info.field = &metadata.data_type.fields[field_index];
+  if (!info.field->field_shape.empty()) {
+    info.chunked_rank += info.field->field_shape.size();
+  }
   return info;
 }
 
@@ -798,8 +859,24 @@ Result<IndexDomain<>> GetEffectiveDomain(
   assert(RankConstraint::EqualOrUnspecified(schema.rank(), rank));
   IndexDomainBuilder builder(std::max(schema.rank().rank, rank));
   if (metadata_shape) {
-    builder.shape(*metadata_shape);
-    builder.implicit_upper_bounds(true);
+    if (static_cast<DimensionIndex>(metadata_shape->size()) < rank &&
+        info.field && !info.field->field_shape.empty() &&
+        static_cast<DimensionIndex>(metadata_shape->size() +
+                                    info.field->field_shape.size()) == rank) {
+      std::vector<Index> full_shape(metadata_shape->begin(),
+                                    metadata_shape->end());
+      full_shape.insert(full_shape.end(), info.field->field_shape.begin(),
+                        info.field->field_shape.end());
+      builder.shape(full_shape);
+      DimensionSet implicit_upper_bounds(false);
+      for (size_t i = 0; i < metadata_shape->size(); ++i) {
+        implicit_upper_bounds[i] = true;
+      }
+      builder.implicit_upper_bounds(implicit_upper_bounds);
+    } else {
+      builder.shape(*metadata_shape);
+      builder.implicit_upper_bounds(true);
+    }
   } else {
     builder.origin(GetConstantVector<Index, 0>(builder.rank()));
   }

From 44c765ec04e0492cd8ba9aa9f5b43cf97834359b Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 25 Nov 2025 18:28:09 +0000
Subject: [PATCH 05/59] Fix failing tests

---
 tensorstore/driver/zarr3/dtype.cc      | 26 ++++++++++++++++++++++----
 tensorstore/driver/zarr3/dtype_test.cc |  9 +++++++--
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/tensorstore/driver/zarr3/dtype.cc b/tensorstore/driver/zarr3/dtype.cc
index 116712d70..5b3261812 100644
--- a/tensorstore/driver/zarr3/dtype.cc
+++ b/tensorstore/driver/zarr3/dtype.cc
@@ -75,6 +75,13 @@ Result<ZarrDType::BaseDType> ParseBaseDType(std::string_view dtype) {
                                  {num_bytes}};
   }
 
+  // Handle bare "r" - must have a number after it
+  if (dtype.size() >= 1 && dtype[0] == 'r') {
+    return absl::InvalidArgumentError(tensorstore::StrCat(
+        dtype, " data type is invalid; expected r<N> where N is a positive "
+               "multiple of 8"));
+  }
+
   constexpr std::string_view kSupported =
       "bool, uint8, uint16, uint32, uint64, int8, int16, int32, int64, "
       "bfloat16, float16, float32, float64, complex64, complex128, r<N>";
@@ -372,10 +379,21 @@ Result<ZarrDType::BaseDType> ChooseBaseDType(DataType dtype) {
     return MakeBaseDType("complex64", dtype);
   if (dtype == dtype_v<::tensorstore::dtypes::complex128_t>)
     return MakeBaseDType("complex128", dtype);
-  if (dtype == dtype_v<::tensorstore::dtypes::byte_t>)
-    return MakeBaseDType("r8", dtype);
-  if (dtype == dtype_v<::tensorstore::dtypes::char_t>)
-    return MakeBaseDType("r8", dtype);
+  if (dtype == dtype_v<::tensorstore::dtypes::byte_t>) {
+    ZarrDType::BaseDType base_dtype;
+    base_dtype.dtype = dtype;
+    base_dtype.encoded_dtype = "r8";
+    base_dtype.flexible_shape = {1};
+    return base_dtype;
+  }
+  if (dtype == dtype_v<::tensorstore::dtypes::char_t>) {
+    // char_t encodes as r8, which parses back to byte_t
+    ZarrDType::BaseDType base_dtype;
+    base_dtype.dtype = dtype_v<::tensorstore::dtypes::byte_t>;
+    base_dtype.encoded_dtype = "r8";
+    base_dtype.flexible_shape = {1};
+    return base_dtype;
+  }
   return absl::InvalidArgumentError(
       tensorstore::StrCat("Data type not supported: ", dtype));
 }
diff --git a/tensorstore/driver/zarr3/dtype_test.cc b/tensorstore/driver/zarr3/dtype_test.cc
index e1c5b444c..ef55aba09 100644
--- a/tensorstore/driver/zarr3/dtype_test.cc
+++ b/tensorstore/driver/zarr3/dtype_test.cc
@@ -285,10 +285,15 @@ TEST(ChooseBaseDTypeTest, RoundTrip) {
     SCOPED_TRACE(tensorstore::StrCat("dtype=", dtype));
     TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto base_zarr_dtype,
                                      ChooseBaseDType(dtype));
-    EXPECT_EQ(dtype, base_zarr_dtype.dtype);
+    // byte_t and char_t both encode as r8, which parses back to byte_t
+    DataType expected_dtype = dtype;
+    if (dtype == dtype_v<tensorstore::dtypes::char_t>) {
+      expected_dtype = dtype_v<tensorstore::dtypes::byte_t>;
+    }
+    EXPECT_EQ(expected_dtype, base_zarr_dtype.dtype);
     TENSORSTORE_ASSERT_OK_AND_ASSIGN(
         auto parsed, ParseBaseDType(base_zarr_dtype.encoded_dtype));
-    EXPECT_EQ(dtype, parsed.dtype);
+    EXPECT_EQ(expected_dtype, parsed.dtype);
     EXPECT_EQ(base_zarr_dtype.flexible_shape, parsed.flexible_shape);
     EXPECT_EQ(base_zarr_dtype.encoded_dtype, parsed.encoded_dtype);
   }

From 547642d819aa5ac878300530e9d049018de27db8 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 25 Nov 2025 20:10:09 +0000
Subject: [PATCH 06/59] Resolve issues with opening struct as void

---
 examples/read_structured_zarr3.cc  | 40 ++++++++++++--
 tensorstore/driver/zarr3/driver.cc | 83 ++++++++++++++++++++++++------
 2 files changed, 104 insertions(+), 19 deletions(-)

diff --git a/examples/read_structured_zarr3.cc b/examples/read_structured_zarr3.cc
index 259eade34..bf12ced1b 100644
--- a/examples/read_structured_zarr3.cc
+++ b/examples/read_structured_zarr3.cc
@@ -422,12 +422,44 @@ absl::Status Run(const std::string& zarr_path) {
   auto raw_store = std::move(raw_open_result).value();
   TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(raw_store, "raw_headers", /*is_raw_bytes=*/true));
 
+  // Test 3: Read from headers array as void (field="<void>")
+  // Use a fresh context to avoid cache sharing with Test 1
   std::cout << "\n" << std::string(60, '=') << std::endl;
-  std::cout << "COMPARISON: Both methods should give identical inline field values" << std::endl;
+  std::cout << "TEST 3: Reading from 'headers' array as void (field=\"<void>\")" << std::endl;
   std::cout << std::string(60, '=') << std::endl;
-  std::cout << "The structured 'headers' array provides field access convenience,\n"
-            << "while the raw 'raw_headers' array provides direct byte access.\n"
-            << "Both extract the inline field from byte offset " << kInlineFieldOffset
+
+  auto context_void = tensorstore::Context::Default();
+
+  ::nlohmann::json headers_void_spec = ::nlohmann::json::object();
+  headers_void_spec["driver"] = "zarr3";
+  headers_void_spec["kvstore"] = ::nlohmann::json::object();
+  headers_void_spec["kvstore"]["driver"] = "file";
+  headers_void_spec["kvstore"]["path"] = headers_path + "/";
+  headers_void_spec["field"] = "<void>";  // Special field for raw byte access
+
+  std::cout << "Spec: " << headers_void_spec.dump(2) << std::endl;
+
+  auto headers_void_open_result =
+      tensorstore::Open(headers_void_spec, context_void, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result();
+
+  if (!headers_void_open_result.ok()) {
+    std::cout << "\n=== Headers (void) Open Failed ===" << std::endl;
+    std::cout << "Status: " << headers_void_open_result.status() << std::endl;
+    return headers_void_open_result.status();
+  }
+
+  auto headers_void_store = std::move(headers_void_open_result).value();
+  TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(headers_void_store, "headers (void)", /*is_raw_bytes=*/true));
+
+  std::cout << "\n" << std::string(60, '=') << std::endl;
+  std::cout << "COMPARISON: All three methods should give identical inline field values" << std::endl;
+  std::cout << std::string(60, '=') << std::endl;
+  std::cout << "- Test 1: 'headers' with field=\"inline\" provides field access convenience\n"
+            << "- Test 2: 'raw_headers' (raw_bytes type) provides direct byte access\n"
+            << "- Test 3: 'headers' with field=\"<void>\" provides raw byte access to structured data\n"
+            << "All three extract the inline field from byte offset " << kInlineFieldOffset
             << " in " << kStructSize << "-byte structs." << std::endl;
 
   return absl::OkStatus();
diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index b4d96da1f..bed1171d2 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -315,26 +315,27 @@ class DataCacheBase
 
     // Special case: void access - create single component for entire struct
     if (field_index == kVoidFieldIndex) {
-      // For void access, use the fill_value from the single raw_bytes field
-      auto& fill_value = metadata.fill_value[0];
-      std::cout << "[DEBUG] Void access fill_value: shape=" << fill_value.shape()
-                << ", dtype=" << fill_value.dtype() << std::endl;
+      // For void access, create a zero-filled byte array as the fill value
+      const Index bytes_per_element = metadata.data_type.bytes_per_outer_element;
+      auto base_fill_value = AllocateArray(
+          span<const Index, 1>({bytes_per_element}), c_order, value_init,
+          dtype_v<tensorstore::dtypes::byte_t>);
 
       // Broadcast to shape [unbounded, unbounded, ..., struct_size]
       std::vector<Index> target_shape(metadata.rank, kInfIndex);
-      target_shape.push_back(metadata.data_type.bytes_per_outer_element);
-      std::cout << "[DEBUG] Void access target_shape: [";
-      for (size_t i = 0; i < target_shape.size(); ++i) {
-        if (i > 0) std::cout << ", ";
-        std::cout << target_shape[i];
-      }
-      std::cout << "]" << std::endl;
+      target_shape.push_back(bytes_per_element);
       auto chunk_fill_value =
-          BroadcastArray(fill_value, BoxView<>(target_shape)).value();
+          BroadcastArray(base_fill_value, BoxView<>(target_shape)).value();
 
       // Add extra dimension for struct size in bytes
       std::vector<Index> chunk_shape_with_bytes = metadata.chunk_shape;
-      chunk_shape_with_bytes.push_back(metadata.data_type.bytes_per_outer_element);
+      chunk_shape_with_bytes.push_back(bytes_per_element);
+
+      // Create permutation: copy existing inner_order and add the new dimension
+      std::vector<DimensionIndex> void_permutation(metadata.rank + 1);
+      std::copy_n(metadata.inner_order.data(), metadata.rank,
+                  void_permutation.begin());
+      void_permutation[metadata.rank] = metadata.rank;  // Add the bytes dimension
 
       auto& component = components.emplace_back(
           internal::AsyncWriteArray::Spec{
@@ -343,7 +344,7 @@ class DataCacheBase
               // specify unbounded `valid_data_bounds`.
               Box<>(metadata.rank + 1),
               ContiguousLayoutPermutation<>(
-                  span(metadata.inner_order.data(), metadata.rank + 1))},
+                  span(void_permutation.data(), metadata.rank + 1))},
           chunk_shape_with_bytes);
       component.array_spec.fill_value_comparison_kind =
           EqualityComparisonKind::identical;
@@ -570,7 +571,13 @@ class ZarrDataCache : public ChunkCacheImpl, public DataCacheBase {
                          std::string key_prefix, U&&... arg)
       : ChunkCacheImpl(std::move(initializer.store), std::forward<U>(arg)...),
         DataCacheBase(std::move(initializer), std::move(key_prefix)),
-        grid_(DataCacheBase::GetChunkGridSpecification(metadata())) {}
+        grid_(DataCacheBase::GetChunkGridSpecification(
+            metadata(),
+            // Check if this is void access by examining the dtype
+            (ChunkCacheImpl::dtype_.fields.size() == 1 &&
+             ChunkCacheImpl::dtype_.fields[0].name == "<void>")
+                ? kVoidFieldIndex
+                : 0)) {}
 
   const internal::LexicographicalGridIndexKeyParser& GetChunkStorageKeyParser()
       final {
@@ -596,6 +603,52 @@ class ZarrDataCache : public ChunkCacheImpl, public DataCacheBase {
     return DataCacheBase::executor();
   }
 
+  // Override to handle void access - check the dtype to see if this is void
+  Result<IndexTransform<>> GetExternalToInternalTransform(
+      const void* metadata_ptr, size_t component_index) override {
+    const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
+    
+    // Check if this is void access by examining the cache's dtype
+    const bool is_void_access = (ChunkCacheImpl::dtype_.fields.size() == 1 &&
+                                 ChunkCacheImpl::dtype_.fields[0].name == "<void>");
+    
+    if (is_void_access) {
+      // For void access, create transform with extra bytes dimension
+      const DimensionIndex rank = metadata.rank;
+      const Index bytes_per_element = metadata.data_type.bytes_per_outer_element;
+      const DimensionIndex total_rank = rank + 1;
+      
+      std::string_view normalized_dimension_names[kMaxRank];
+      for (DimensionIndex i = 0; i < rank; ++i) {
+        if (const auto& name = metadata.dimension_names[i]; name.has_value()) {
+          normalized_dimension_names[i] = *name;
+        }
+      }
+      
+      auto builder =
+          tensorstore::IndexTransformBuilder<>(total_rank, total_rank);
+      std::vector<Index> full_shape = metadata.shape;
+      full_shape.push_back(bytes_per_element);
+      builder.input_shape(full_shape);
+      builder.input_labels(span(&normalized_dimension_names[0], total_rank));
+      
+      DimensionSet implicit_upper_bounds(false);
+      for (DimensionIndex i = 0; i < rank; ++i) {
+        implicit_upper_bounds[i] = true;
+      }
+      builder.implicit_upper_bounds(implicit_upper_bounds);
+      
+      for (DimensionIndex i = 0; i < total_rank; ++i) {
+        builder.output_single_input_dimension(i, i);
+      }
+      return builder.Finalize();
+    }
+    
+    // Not void access - delegate to base implementation
+    return DataCacheBase::GetExternalToInternalTransform(metadata_ptr,
+                                                         component_index);
+  }
+
   internal::ChunkGridSpecification grid_;
 };
 

From 2a4c3d852e0f38b5601dd43482ae878d86a6d7b6 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Wed, 26 Nov 2025 15:03:55 +0000
Subject: [PATCH 07/59] Remove debug print

---
 tensorstore/driver/zarr3/metadata.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorstore/driver/zarr3/metadata.cc b/tensorstore/driver/zarr3/metadata.cc
index 6a83cdbec..9aef7bd0b 100644
--- a/tensorstore/driver/zarr3/metadata.cc
+++ b/tensorstore/driver/zarr3/metadata.cc
@@ -312,8 +312,6 @@ absl::Status FillValueJsonBinder::operator()(
       auto fill_arr = AllocateArray(dtype.fields[0].field_shape, c_order,
                                    default_init, dtype.fields[0].dtype);
       std::memcpy(fill_arr.data(), b64_decoded.data(), b64_decoded.size());
-      std::cout << "[DEBUG] Raw bytes fill_value parsed: shape=" << fill_arr.shape()
-                << ", dtype=" << dtype.fields[0].dtype << std::endl;
       (*obj)[0] = std::move(fill_arr);
     } else {
       TENSORSTORE_RETURN_IF_ERROR(

From b0abb94070f7be7337e7a30b90802ee8617801dd Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 2 Dec 2025 22:01:10 +0000
Subject: [PATCH 08/59] Add field for open as void

---
 .gitignore                         |  5 +++++
 examples/read_structured_zarr3.cc  | 11 ++++++-----
 tensorstore/driver/zarr3/driver.cc | 31 +++++++++++++++++++-----------
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/.gitignore b/.gitignore
index e4737363c..7c75044c5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,8 @@ __pycache__
 *.pyc
 /python/tensorstore/*.so
 /python/tensorstore/*.pyd
+
+build/
+bootstrap.sh
+filt_mig.mdio
+generate_test.py
\ No newline at end of file
diff --git a/examples/read_structured_zarr3.cc b/examples/read_structured_zarr3.cc
index bf12ced1b..720ef1330 100644
--- a/examples/read_structured_zarr3.cc
+++ b/examples/read_structured_zarr3.cc
@@ -21,6 +21,7 @@
 // Both arrays should contain the same data, allowing comparison of:
 // - Field-based access vs manual byte extraction
 // - Structured dtype parsing vs raw byte handling
+// - New open_as_void option for raw byte access to structured data
 //
 // Usage:
 //   bazel run //examples:read_structured_zarr3 -- /path/to/parent/dir
@@ -422,10 +423,10 @@ absl::Status Run(const std::string& zarr_path) {
   auto raw_store = std::move(raw_open_result).value();
   TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(raw_store, "raw_headers", /*is_raw_bytes=*/true));
 
-  // Test 3: Read from headers array as void (field="<void>")
+  // Test 3: Read from headers array as void (open_as_void=true)
   // Use a fresh context to avoid cache sharing with Test 1
   std::cout << "\n" << std::string(60, '=') << std::endl;
-  std::cout << "TEST 3: Reading from 'headers' array as void (field=\"<void>\")" << std::endl;
+  std::cout << "TEST 3: Reading from 'headers' array as void (open_as_void=true)" << std::endl;
   std::cout << std::string(60, '=') << std::endl;
 
   auto context_void = tensorstore::Context::Default();
@@ -435,7 +436,7 @@ absl::Status Run(const std::string& zarr_path) {
   headers_void_spec["kvstore"] = ::nlohmann::json::object();
   headers_void_spec["kvstore"]["driver"] = "file";
   headers_void_spec["kvstore"]["path"] = headers_path + "/";
-  headers_void_spec["field"] = "<void>";  // Special field for raw byte access
+  headers_void_spec["open_as_void"] = true;  // New option for raw byte access
 
   std::cout << "Spec: " << headers_void_spec.dump(2) << std::endl;
 
@@ -451,14 +452,14 @@ absl::Status Run(const std::string& zarr_path) {
   }
 
   auto headers_void_store = std::move(headers_void_open_result).value();
-  TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(headers_void_store, "headers (void)", /*is_raw_bytes=*/true));
+  TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(headers_void_store, "headers (open_as_void)", /*is_raw_bytes=*/true));
 
   std::cout << "\n" << std::string(60, '=') << std::endl;
   std::cout << "COMPARISON: All three methods should give identical inline field values" << std::endl;
   std::cout << std::string(60, '=') << std::endl;
   std::cout << "- Test 1: 'headers' with field=\"inline\" provides field access convenience\n"
             << "- Test 2: 'raw_headers' (raw_bytes type) provides direct byte access\n"
-            << "- Test 3: 'headers' with field=\"<void>\" provides raw byte access to structured data\n"
+            << "- Test 3: 'headers' with open_as_void=true provides raw byte access to structured data\n"
             << "All three extract the inline field from byte offset " << kInlineFieldOffset
             << " in " << kStructSize << "-byte structs." << std::endl;
 
diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index bed1171d2..f4aad10d7 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -107,10 +107,11 @@ class ZarrDriverSpec
 
   ZarrMetadataConstraints metadata_constraints;
   std::string selected_field;
+  bool open_as_void;
 
   constexpr static auto ApplyMembers = [](auto& x, auto f) {
     return f(internal::BaseCast<KvsDriverSpec>(x), x.metadata_constraints,
-             x.selected_field);
+             x.selected_field, x.open_as_void);
   };
 
   static inline const auto default_json_binder = jb::Sequence(
@@ -145,9 +146,17 @@ class ZarrDriverSpec
               },
               jb::Projection<&ZarrDriverSpec::metadata_constraints>(
                   jb::DefaultInitializedValue()))),
-      jb::Member("field", jb::Projection<&ZarrDriverSpec::selected_field>(
-                              jb::DefaultValue<jb::kNeverIncludeDefaults>(
-                                  [](auto* obj) { *obj = std::string{}; }))));
+      jb::Member(
+          "field",
+          jb::Projection<&ZarrDriverSpec::selected_field>(
+              jb::DefaultValue<jb::kNeverIncludeDefaults>(
+                  [](auto* obj) { *obj = std::string{}; }))),
+      jb::Member(
+          "open_as_void",
+          jb::Projection<&ZarrDriverSpec::open_as_void>(
+              jb::DefaultValue<jb::kNeverIncludeDefaults>(
+                  [](auto* v) { *v = false; /*selected_field = "<void>";*/ }))));
+
 
   absl::Status ApplyOptions(SpecOptions&& options) override {
     if (options.minimal_spec) {
@@ -607,43 +616,43 @@ class ZarrDataCache : public ChunkCacheImpl, public DataCacheBase {
   Result<IndexTransform<>> GetExternalToInternalTransform(
       const void* metadata_ptr, size_t component_index) override {
     const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
-    
+
     // Check if this is void access by examining the cache's dtype
     const bool is_void_access = (ChunkCacheImpl::dtype_.fields.size() == 1 &&
                                  ChunkCacheImpl::dtype_.fields[0].name == "<void>");
-    
+
     if (is_void_access) {
       // For void access, create transform with extra bytes dimension
       const DimensionIndex rank = metadata.rank;
       const Index bytes_per_element = metadata.data_type.bytes_per_outer_element;
       const DimensionIndex total_rank = rank + 1;
-      
+
       std::string_view normalized_dimension_names[kMaxRank];
       for (DimensionIndex i = 0; i < rank; ++i) {
         if (const auto& name = metadata.dimension_names[i]; name.has_value()) {
           normalized_dimension_names[i] = *name;
         }
       }
-      
+
       auto builder =
           tensorstore::IndexTransformBuilder<>(total_rank, total_rank);
       std::vector<Index> full_shape = metadata.shape;
       full_shape.push_back(bytes_per_element);
       builder.input_shape(full_shape);
       builder.input_labels(span(&normalized_dimension_names[0], total_rank));
-      
+
       DimensionSet implicit_upper_bounds(false);
       for (DimensionIndex i = 0; i < rank; ++i) {
         implicit_upper_bounds[i] = true;
       }
       builder.implicit_upper_bounds(implicit_upper_bounds);
-      
+
       for (DimensionIndex i = 0; i < total_rank; ++i) {
         builder.output_single_input_dimension(i, i);
       }
       return builder.Finalize();
     }
-    
+
     // Not void access - delegate to base implementation
     return DataCacheBase::GetExternalToInternalTransform(metadata_ptr,
                                                          component_index);

From fff0a5be9ce8fa1baed0a2db5503b852f3fb5184 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Wed, 3 Dec 2025 15:38:36 +0000
Subject: [PATCH 09/59] Add a shim for new open_as_void flag open option

---
 tensorstore/driver/zarr3/driver.cc | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index f4aad10d7..18c8f3a77 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -140,8 +140,9 @@ class ZarrDriverSpec
                     // at metadata level only.
                   }
                 }
-                TENSORSTORE_RETURN_IF_ERROR(obj->schema.Set(
-                    RankConstraint{obj->metadata_constraints.rank}));
+                TENSORSTORE_RETURN_IF_ERROR(
+                    obj->schema.Set(
+                        RankConstraint{obj->metadata_constraints.rank}));
                 return absl::OkStatus();
               },
               jb::Projection<&ZarrDriverSpec::metadata_constraints>(
@@ -151,11 +152,23 @@ class ZarrDriverSpec
           jb::Projection<&ZarrDriverSpec::selected_field>(
               jb::DefaultValue<jb::kNeverIncludeDefaults>(
                   [](auto* obj) { *obj = std::string{}; }))),
+
+      // NEW: wrap the open_as_void projection in a Validate
       jb::Member(
           "open_as_void",
-          jb::Projection<&ZarrDriverSpec::open_as_void>(
-              jb::DefaultValue<jb::kNeverIncludeDefaults>(
-                  [](auto* v) { *v = false; /*selected_field = "<void>";*/ }))));
+          jb::Validate(
+              [](const auto& options, ZarrDriverSpec* obj) -> absl::Status {
+                // At this point, Projection has already set obj->open_as_void
+                if (obj->open_as_void) {
+                  obj->selected_field = "<void>";
+                }
+                return absl::OkStatus();
+              },
+              jb::Projection<&ZarrDriverSpec::open_as_void>(
+                  jb::DefaultValue<jb::kNeverIncludeDefaults>(
+                      [](auto* v) { *v = false; })))));
+
+
 
 
   absl::Status ApplyOptions(SpecOptions&& options) override {

From b6c24f96289a523d14cd6dc9a173f70e10690e15 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Wed, 3 Dec 2025 15:55:02 +0000
Subject: [PATCH 10/59] Revert some formatting changes

---
 tensorstore/driver/zarr3/driver.cc | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index 18c8f3a77..dd95c711b 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -140,22 +140,18 @@ class ZarrDriverSpec
                     // at metadata level only.
                   }
                 }
-                TENSORSTORE_RETURN_IF_ERROR(
-                    obj->schema.Set(
-                        RankConstraint{obj->metadata_constraints.rank}));
+                TENSORSTORE_RETURN_IF_ERROR(obj->schema.Set(
+                    RankConstraint{obj->metadata_constraints.rank}));
                 return absl::OkStatus();
               },
               jb::Projection<&ZarrDriverSpec::metadata_constraints>(
                   jb::DefaultInitializedValue()))),
-      jb::Member(
-          "field",
-          jb::Projection<&ZarrDriverSpec::selected_field>(
+      jb::Member("field", jb::Projection<&ZarrDriverSpec::selected_field>(
               jb::DefaultValue<jb::kNeverIncludeDefaults>(
                   [](auto* obj) { *obj = std::string{}; }))),
 
       // NEW: wrap the open_as_void projection in a Validate
-      jb::Member(
-          "open_as_void",
+      jb::Member("open_as_void",
           jb::Validate(
               [](const auto& options, ZarrDriverSpec* obj) -> absl::Status {
                 // At this point, Projection has already set obj->open_as_void

From 488b1605c1f15f322e4b39f03b02d6cd8b29900b Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Wed, 3 Dec 2025 15:56:34 +0000
Subject: [PATCH 11/59] revert gitignore changes

---
 .gitignore | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 7c75044c5..e4737363c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,8 +21,3 @@ __pycache__
 *.pyc
 /python/tensorstore/*.so
 /python/tensorstore/*.pyd
-
-build/
-bootstrap.sh
-filt_mig.mdio
-generate_test.py
\ No newline at end of file

From 54941a09cf5e057e9c32d20512c0bb114b6f9b83 Mon Sep 17 00:00:00 2001
From: Brian Michell <brianm314@comcast.net>
Date: Wed, 3 Dec 2025 13:06:22 -0600
Subject: [PATCH 12/59] V3 structs remove shim (#2)

* Begin removing void field shim

* Fully removed void string shim

* Cleanup debug prints

* Remove shimmed validation

* Remove unnecessary comment

* Prefer false over zero for ternary clarity
---
 tensorstore/driver/zarr3/chunk_cache.cc   | 16 ++++++----
 tensorstore/driver/zarr3/chunk_cache.h    | 14 ++++++---
 tensorstore/driver/zarr3/driver.cc        | 38 +++++++----------------
 tensorstore/driver/zarr3/metadata.cc      | 14 +++++----
 tensorstore/driver/zarr3/metadata.h       |  6 ++--
 tensorstore/driver/zarr3/metadata_test.cc |  2 +-
 6 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/tensorstore/driver/zarr3/chunk_cache.cc b/tensorstore/driver/zarr3/chunk_cache.cc
index 64b6d69fd..f14efd607 100644
--- a/tensorstore/driver/zarr3/chunk_cache.cc
+++ b/tensorstore/driver/zarr3/chunk_cache.cc
@@ -75,10 +75,12 @@ ZarrChunkCache::~ZarrChunkCache() = default;
 
 ZarrLeafChunkCache::ZarrLeafChunkCache(
     kvstore::DriverPtr store, ZarrCodecChain::PreparedState::Ptr codec_state,
-    ZarrDType dtype, internal::CachePool::WeakPtr /*data_cache_pool*/)
+    ZarrDType dtype, internal::CachePool::WeakPtr /*data_cache_pool*/,
+    bool open_as_void)
     : Base(std::move(store)),
       codec_state_(std::move(codec_state)),
-      dtype_(std::move(dtype)) {}
+      dtype_(std::move(dtype)),
+      open_as_void_(open_as_void) {}
 
 void ZarrLeafChunkCache::Read(ZarrChunkCache::ReadRequest request,
                               AnyFlowReceiver<absl::Status, internal::ReadChunk,
@@ -157,7 +159,7 @@ ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
   absl::InlinedVector<SharedArray<const void>, 1> field_arrays(num_fields);
 
   // Special case: void access - return raw bytes directly
-  if (num_fields == 1 && dtype_.fields[0].name == "<void>") {
+  if (open_as_void_) {
     TENSORSTORE_ASSIGN_OR_RETURN(
         field_arrays[0], codec_state_->DecodeArray(grid().components[0].shape(),
                                                    std::move(data)));
@@ -221,11 +223,13 @@ kvstore::Driver* ZarrLeafChunkCache::GetKvStoreDriver() {
 
 ZarrShardedChunkCache::ZarrShardedChunkCache(
     kvstore::DriverPtr store, ZarrCodecChain::PreparedState::Ptr codec_state,
-    ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool)
+    ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool,
+    bool open_as_void)
     : base_kvstore_(std::move(store)),
       codec_state_(std::move(codec_state)),
       dtype_(std::move(dtype)),
-      data_cache_pool_(std::move(data_cache_pool)) {}
+      data_cache_pool_(std::move(data_cache_pool)),
+      open_as_void_(open_as_void) {}
 
 Result<IndexTransform<>> TranslateCellToSourceTransformForShard(
     IndexTransform<> transform, span<const Index> grid_cell_indices,
@@ -534,7 +538,7 @@ void ZarrShardedChunkCache::Entry::DoInitialize() {
                 *sharding_state.sub_chunk_codec_chain,
                 std::move(sharding_kvstore), cache.executor(),
                 ZarrShardingCodec::PreparedState::Ptr(&sharding_state),
-                cache.dtype_, cache.data_cache_pool_);
+                cache.dtype_, cache.data_cache_pool_, cache.open_as_void_);
         zarr_chunk_cache = new_cache.release();
         return std::unique_ptr<internal::Cache>(&zarr_chunk_cache->cache());
       })
diff --git a/tensorstore/driver/zarr3/chunk_cache.h b/tensorstore/driver/zarr3/chunk_cache.h
index 5933115d7..a39eb1dc8 100644
--- a/tensorstore/driver/zarr3/chunk_cache.h
+++ b/tensorstore/driver/zarr3/chunk_cache.h
@@ -158,7 +158,8 @@ class ZarrLeafChunkCache : public internal::KvsBackedChunkCache,
   explicit ZarrLeafChunkCache(kvstore::DriverPtr store,
                               ZarrCodecChain::PreparedState::Ptr codec_state,
                               ZarrDType dtype,
-                              internal::CachePool::WeakPtr data_cache_pool);
+                              internal::CachePool::WeakPtr data_cache_pool,
+                              bool open_as_void = false);
 
   void Read(ZarrChunkCache::ReadRequest request,
             AnyFlowReceiver<absl::Status, internal::ReadChunk,
@@ -186,6 +187,7 @@ class ZarrLeafChunkCache : public internal::KvsBackedChunkCache,
 
   ZarrCodecChain::PreparedState::Ptr codec_state_;
   ZarrDType dtype_;
+  bool open_as_void_;
 };
 
 /// Chunk cache for a Zarr array where each chunk is a shard.
@@ -196,7 +198,8 @@ class ZarrShardedChunkCache : public internal::Cache, public ZarrChunkCache {
   explicit ZarrShardedChunkCache(kvstore::DriverPtr store,
                                  ZarrCodecChain::PreparedState::Ptr codec_state,
                                  ZarrDType dtype,
-                                 internal::CachePool::WeakPtr data_cache_pool);
+                                 internal::CachePool::WeakPtr data_cache_pool,
+                                 bool open_as_void = false);
 
   const ZarrShardingCodec::PreparedState& sharding_codec_state() const {
     return static_cast<const ZarrShardingCodec::PreparedState&>(
@@ -246,6 +249,7 @@ class ZarrShardedChunkCache : public internal::Cache, public ZarrChunkCache {
   kvstore::DriverPtr base_kvstore_;
   ZarrCodecChain::PreparedState::Ptr codec_state_;
   ZarrDType dtype_;
+  bool open_as_void_;
 
   // Data cache pool, if it differs from `this->pool()` (which is equal to the
   // metadata cache pool).
@@ -260,11 +264,13 @@ class ZarrShardSubChunkCache : public ChunkCacheImpl {
   explicit ZarrShardSubChunkCache(
       kvstore::DriverPtr store, Executor executor,
       ZarrShardingCodec::PreparedState::Ptr sharding_state,
-      ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool)
+      ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool,
+      bool open_as_void = false)
       : ChunkCacheImpl(std::move(store),
                        ZarrCodecChain::PreparedState::Ptr(
                            sharding_state->sub_chunk_codec_state),
-                       std::move(dtype), std::move(data_cache_pool)),
+                       std::move(dtype), std::move(data_cache_pool),
+                       open_as_void),
         sharding_state_(std::move(sharding_state)),
         executor_(std::move(executor)) {}
 
diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index dd95c711b..f4c0ad9d7 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -149,20 +149,9 @@ class ZarrDriverSpec
       jb::Member("field", jb::Projection<&ZarrDriverSpec::selected_field>(
               jb::DefaultValue<jb::kNeverIncludeDefaults>(
                   [](auto* obj) { *obj = std::string{}; }))),
-
-      // NEW: wrap the open_as_void projection in a Validate
-      jb::Member("open_as_void",
-          jb::Validate(
-              [](const auto& options, ZarrDriverSpec* obj) -> absl::Status {
-                // At this point, Projection has already set obj->open_as_void
-                if (obj->open_as_void) {
-                  obj->selected_field = "<void>";
-                }
-                return absl::OkStatus();
-              },
-              jb::Projection<&ZarrDriverSpec::open_as_void>(
+      jb::Member("open_as_void", jb::Projection<&ZarrDriverSpec::open_as_void>(
                   jb::DefaultValue<jb::kNeverIncludeDefaults>(
-                      [](auto* v) { *v = false; })))));
+                      [](auto* v) { *v = false; }))));
 
 
 
@@ -592,10 +581,7 @@ class ZarrDataCache : public ChunkCacheImpl, public DataCacheBase {
         grid_(DataCacheBase::GetChunkGridSpecification(
             metadata(),
             // Check if this is void access by examining the dtype
-            (ChunkCacheImpl::dtype_.fields.size() == 1 &&
-             ChunkCacheImpl::dtype_.fields[0].name == "<void>")
-                ? kVoidFieldIndex
-                : 0)) {}
+            ChunkCacheImpl::open_as_void_ ? kVoidFieldIndex : false)) {}
 
   const internal::LexicographicalGridIndexKeyParser& GetChunkStorageKeyParser()
       final {
@@ -626,9 +612,8 @@ class ZarrDataCache : public ChunkCacheImpl, public DataCacheBase {
       const void* metadata_ptr, size_t component_index) override {
     const auto& metadata = *static_cast<const ZarrMetadata*>(metadata_ptr);
 
-    // Check if this is void access by examining the cache's dtype
-    const bool is_void_access = (ChunkCacheImpl::dtype_.fields.size() == 1 &&
-                                 ChunkCacheImpl::dtype_.fields[0].name == "<void>");
+    // Check if this is void access by examining the stored flag
+    const bool is_void_access = ChunkCacheImpl::open_as_void_;
 
     if (is_void_access) {
       // For void access, create transform with extra bytes dimension
@@ -802,7 +787,7 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
     TENSORSTORE_ASSIGN_OR_RETURN(
         auto metadata,
         internal_zarr3::GetNewMetadata(spec().metadata_constraints,
-                                       spec().schema),
+                                       spec().schema, spec().selected_field, spec().open_as_void),
         tensorstore::MaybeAnnotateStatus(
             _, "Cannot create using specified \"metadata\" and schema"));
     return metadata;
@@ -819,15 +804,15 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
         *static_cast<const ZarrMetadata*>(initializer.metadata.get());
     // For void access, modify the dtype to indicate special handling
     ZarrDType dtype = metadata.data_type;
-    if (spec().selected_field == "<void>") {
+    if (spec().open_as_void) {
       // Create a synthetic dtype for void access
       dtype = ZarrDType{
           /*.has_fields=*/false,
           /*.fields=*/{ZarrDType::Field{
-              ZarrDType::BaseDType{"<void>", dtype_v<tensorstore::dtypes::byte_t>,
+              ZarrDType::BaseDType{"", dtype_v<tensorstore::dtypes::byte_t>,
                                     {metadata.data_type.bytes_per_outer_element}},
               /*.outer_shape=*/{},
-              /*.name=*/"<void>",
+              /*.name=*/"",
               /*.field_shape=*/{metadata.data_type.bytes_per_outer_element},
               /*.num_inner_elements=*/metadata.data_type.bytes_per_outer_element,
               /*.byte_offset=*/0,
@@ -837,7 +822,8 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
     return internal_zarr3::MakeZarrChunkCache<DataCacheBase, ZarrDataCache>(
         *metadata.codecs, std::move(initializer), spec().store.path,
         metadata.codec_state, dtype,
-        /*data_cache_pool=*/*cache_pool());
+        /*data_cache_pool=*/*cache_pool(),
+        spec().open_as_void);
   }
 
   Result<size_t> GetComponentIndex(const void* metadata_ptr,
@@ -847,7 +833,7 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
         ValidateMetadata(metadata, spec().metadata_constraints));
     TENSORSTORE_ASSIGN_OR_RETURN(
         auto field_index,
-        GetFieldIndex(metadata.data_type, spec().selected_field));
+        GetFieldIndex(metadata.data_type, spec().selected_field, spec().open_as_void));
     // For void access, map to component index 0
     if (field_index == kVoidFieldIndex) {
       field_index = 0;
diff --git a/tensorstore/driver/zarr3/metadata.cc b/tensorstore/driver/zarr3/metadata.cc
index 9aef7bd0b..ba4454de4 100644
--- a/tensorstore/driver/zarr3/metadata.cc
+++ b/tensorstore/driver/zarr3/metadata.cc
@@ -799,12 +799,14 @@ std::string GetFieldNames(const ZarrDType& dtype) {
 constexpr size_t kVoidFieldIndex = size_t(-1);
 
 Result<size_t> GetFieldIndex(const ZarrDType& dtype,
-                             std::string_view selected_field) {
-  // Special case: "<void>" requests raw byte access (works for any dtype)
-  if (selected_field == "<void>") {
+                             std::string_view selected_field,
+                             bool open_as_void) {
+  // Special case: open_as_void requests raw byte access (works for any dtype)
+
+  if (open_as_void) {
     if (dtype.fields.empty()) {
       return absl::FailedPreconditionError(
-          "Requested field \"<void>\" but dtype has no fields");
+          "Requested void access but dtype has no fields");
     }
     return kVoidFieldIndex;
   }
@@ -1138,7 +1140,7 @@ absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
 
 Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
     const ZarrMetadataConstraints& metadata_constraints, const Schema& schema,
-    std::string_view selected_field) {
+    std::string_view selected_field, bool open_as_void) {
   auto metadata = std::make_shared<ZarrMetadata>();
 
   metadata->zarr_format = metadata_constraints.zarr_format.value_or(3);
@@ -1165,7 +1167,7 @@ Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
   }
 
   TENSORSTORE_ASSIGN_OR_RETURN(
-      size_t field_index, GetFieldIndex(metadata->data_type, selected_field));
+      size_t field_index, GetFieldIndex(metadata->data_type, selected_field, open_as_void));
   SpecRankAndFieldInfo info;
   info.field = &metadata->data_type.fields[field_index];
   info.chunked_rank = metadata_constraints.rank;
diff --git a/tensorstore/driver/zarr3/metadata.h b/tensorstore/driver/zarr3/metadata.h
index 4c7871b0d..857210546 100644
--- a/tensorstore/driver/zarr3/metadata.h
+++ b/tensorstore/driver/zarr3/metadata.h
@@ -230,12 +230,14 @@ absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
 ///     unspecified.
 Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
     const ZarrMetadataConstraints& metadata_constraints,
-    const Schema& schema, std::string_view selected_field = {});
+    const Schema& schema, std::string_view selected_field = {},
+    bool open_as_void = false);
 
 absl::Status ValidateDataType(DataType dtype);
 
 Result<size_t> GetFieldIndex(const ZarrDType& dtype,
-                             std::string_view selected_field);
+                             std::string_view selected_field,
+                             bool open_as_void = false);
 
 struct SpecRankAndFieldInfo {
   DimensionIndex chunked_rank = dynamic_rank;
diff --git a/tensorstore/driver/zarr3/metadata_test.cc b/tensorstore/driver/zarr3/metadata_test.cc
index 11c97619f..ba7a26593 100644
--- a/tensorstore/driver/zarr3/metadata_test.cc
+++ b/tensorstore/driver/zarr3/metadata_test.cc
@@ -438,7 +438,7 @@ Result<std::shared_ptr<const ZarrMetadata>> TestGetNewMetadata(
   TENSORSTORE_RETURN_IF_ERROR(status);
   TENSORSTORE_ASSIGN_OR_RETURN(
       auto constraints, ZarrMetadataConstraints::FromJson(constraints_json));
-  return GetNewMetadata(constraints, schema);
+  return GetNewMetadata(constraints, schema, /*selected_field=*/{}, /*open_as_void=*/false);
 }
 
 TEST(GetNewMetadataTest, DuplicateDimensionNames) {

From c9f58f9eae12c236c1398619c0c43a298fc58dfc Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Wed, 3 Dec 2025 19:38:40 +0000
Subject: [PATCH 13/59] Fix structured fill value population

---
 tensorstore/driver/zarr3/driver.cc | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index f4c0ad9d7..51cc17f42 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -675,7 +675,13 @@ class ZarrDriver : public ZarrDriverBase {
     if (metadata.fill_value.empty()) {
       return SharedArray<const void>();
     }
-    return metadata.fill_value[0];
+    // return metadata.fill_value[0];
+    // TODO: Doe we actually need to validate this or can we trust that component_index will return a valid index?
+    size_t index = this->component_index();
+    if (index >= metadata.fill_value.size()) {
+        return absl::OutOfRangeError("Component index out of bounds");
+    }
+    return metadata.fill_value[index];
   }
 
   Future<ArrayStorageStatistics> GetStorageStatistics(

From 7655cfd4cf435e90a1b468929c344de1300a0aa1 Mon Sep 17 00:00:00 2001
From: Brian Michell <brianm314@comcast.net>
Date: Thu, 4 Dec 2025 10:03:47 -0600
Subject: [PATCH 14/59] V3 examples merge (#3)

* Implement a more general and portable example set

* Fix driver cache bug

* Update example for template

* Cleanup example

* Remove testing examples from source
---
 examples/CMakeLists.txt            | 163 ----------
 examples/read_structured_zarr3.cc  | 496 -----------------------------
 tensorstore/driver/zarr3/driver.cc |   8 +-
 3 files changed, 6 insertions(+), 661 deletions(-)
 delete mode 100644 examples/CMakeLists.txt
 delete mode 100644 examples/read_structured_zarr3.cc

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
deleted file mode 100644
index 92e9857fa..000000000
--- a/examples/CMakeLists.txt
+++ /dev/null
@@ -1,163 +0,0 @@
-# Standalone CMakeLists.txt for read_structured_zarr3 example
-#
-# Build instructions:
-#   mkdir -p /home/ubuntu/source/tensorstore/examples/build
-#   cd /home/ubuntu/source/tensorstore/examples/build
-#   cmake ..
-#   make
-#
-# Run:
-#   ./read_structured_zarr3 --zarr_path=/home/ubuntu/source/tensorstore/filt_mig.mdio/headers
-
-cmake_minimum_required(VERSION 3.24)
-project(read_structured_zarr3 LANGUAGES CXX)
-
-set(CMAKE_CXX_STANDARD 17)
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
-
-# Path to the tensorstore build directory
-set(TENSORSTORE_BUILD_DIR "/home/ubuntu/source/tensorstore/build" CACHE PATH "Path to tensorstore build directory")
-set(TENSORSTORE_SOURCE_DIR "/home/ubuntu/source/tensorstore" CACHE PATH "Path to tensorstore source directory")
-set(DEPS_DIR "${TENSORSTORE_BUILD_DIR}/_deps")
-
-# Include paths (matching what tensorstore tests use)
-include_directories(
-    ${TENSORSTORE_SOURCE_DIR}
-    ${DEPS_DIR}/absl-src
-    ${DEPS_DIR}/re2-src
-    ${DEPS_DIR}/riegeli-src
-)
-
-include_directories(SYSTEM
-    ${DEPS_DIR}/half-build/include
-    ${DEPS_DIR}/half-src/include
-    ${DEPS_DIR}/nlohmann_json-build/include
-    ${DEPS_DIR}/nlohmann_json-src/include
-    ${TENSORSTORE_BUILD_DIR}
-)
-
-# Compiler flags
-add_compile_options(
-    -fPIE
-    -Wno-deprecated-declarations
-    -Wno-sign-compare
-    -Wno-unused-but-set-parameter
-    -Wno-maybe-uninitialized
-    -Wno-sequence-point
-    -Wno-unknown-warning-option
-    -Wno-stringop-overflow
-    -fsized-deallocation
-)
-
-# Find all the static libraries we need from the tensorstore build
-file(GLOB TENSORSTORE_LIBS "${TENSORSTORE_BUILD_DIR}/libtensorstore*.a")
-file(GLOB_RECURSE ABSEIL_LIBS "${DEPS_DIR}/absl-build/absl/*.a")
-file(GLOB_RECURSE RIEGELI_LIBS "${DEPS_DIR}/riegeli-build/*.a")
-
-# Additional dependency libraries - corrected paths
-file(GLOB_RECURSE BLOSC_LIBS "${DEPS_DIR}/blosc-build/*.a")
-file(GLOB_RECURSE ZSTD_LIBS "${DEPS_DIR}/zstd-build/*.a")
-file(GLOB_RECURSE RE2_LIBS "${DEPS_DIR}/re2-build/*.a")
-file(GLOB_RECURSE SNAPPY_LIBS "${DEPS_DIR}/snappy-build/*.a")
-file(GLOB_RECURSE BROTLI_LIBS "${DEPS_DIR}/brotli-build/*.a")
-file(GLOB_RECURSE LZ4_LIBS "${DEPS_DIR}/lz4-build/*.a")
-file(GLOB_RECURSE ZLIB_LIBS "${DEPS_DIR}/zlib-build/*.a")
-file(GLOB_RECURSE PROTOBUF_LIBS "${DEPS_DIR}/protobuf-build/*.a")
-file(GLOB_RECURSE GRPC_LIBS "${DEPS_DIR}/grpc-build/*.a")
-file(GLOB_RECURSE CARES_LIBS "${DEPS_DIR}/c-ares-build/*.a")
-file(GLOB_RECURSE SSL_LIBS "${DEPS_DIR}/boringssl-build/ssl/*.a")
-file(GLOB_RECURSE CRYPTO_LIBS "${DEPS_DIR}/boringssl-build/crypto/*.a")
-file(GLOB_RECURSE LIBLZMA_LIBS "${DEPS_DIR}/liblzma-build/*.a")
-file(GLOB_RECURSE BZIP2_LIBS "${DEPS_DIR}/bzip2-build/*.a")
-file(GLOB_RECURSE JPEG_LIBS "${DEPS_DIR}/jpeg-build/*.a")
-file(GLOB_RECURSE PNG_LIBS "${DEPS_DIR}/png-build/*.a")
-file(GLOB_RECURSE TIFF_LIBS "${DEPS_DIR}/tiff-build/*.a")
-file(GLOB_RECURSE AVIF_LIBS "${DEPS_DIR}/avif-build/*.a")
-file(GLOB_RECURSE AOM_LIBS "${DEPS_DIR}/aom-build/*.a")
-file(GLOB_RECURSE WEBP_LIBS "${DEPS_DIR}/webp-build/*.a")
-file(GLOB_RECURSE CURL_LIBS "${DEPS_DIR}/curl-build/*.a")
-
-# Create executable
-add_executable(read_structured_zarr3 read_structured_zarr3.cc)
-
-# Link libraries - use whole-archive for libraries that use static registration
-# These include drivers, codecs, kvstores, and context resource providers
-target_link_libraries(read_structured_zarr3 PRIVATE
-    # Force inclusion of libraries with static registrations
-    -Wl,--whole-archive
-    
-    # Context resource providers
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_internal_data_copy_concurrency_resource.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_internal_file_io_concurrency_resource.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_internal_cache_cache_pool_resource.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_internal_concurrency_resource.a
-    
-    # Zarr3 driver and codecs
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_driver.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_blosc.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_bytes.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_crc32c.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_gzip.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_transpose.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_zstd.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_sharding_indexed.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_driver_zarr3_codec_codec_chain_spec.a
-    
-    # File kvstore and its resource providers
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_kvstore_file.a
-    ${TENSORSTORE_BUILD_DIR}/libtensorstore_kvstore_file_file_resource.a
-    
-    -Wl,--no-whole-archive
-    
-    -Wl,--start-group
-    
-    # Tensorstore libs
-    ${TENSORSTORE_LIBS}
-    
-    # Riegeli
-    ${RIEGELI_LIBS}
-    
-    # Abseil
-    ${ABSEIL_LIBS}
-    
-    # Compression libs
-    ${BLOSC_LIBS}
-    ${ZSTD_LIBS}
-    ${LZ4_LIBS}
-    ${SNAPPY_LIBS}
-    ${BROTLI_LIBS}
-    ${ZLIB_LIBS}
-    ${LIBLZMA_LIBS}
-    ${BZIP2_LIBS}
-    
-    # Regex
-    ${RE2_LIBS}
-    
-    # Protocol buffers and gRPC
-    ${PROTOBUF_LIBS}
-    ${GRPC_LIBS}
-    ${CARES_LIBS}
-    
-    # SSL/TLS
-    ${SSL_LIBS}
-    ${CRYPTO_LIBS}
-    
-    # Image libraries  
-    ${JPEG_LIBS}
-    ${PNG_LIBS}
-    ${TIFF_LIBS}
-    ${AVIF_LIBS}
-    ${AOM_LIBS}
-    ${WEBP_LIBS}
-    
-    # HTTP
-    ${CURL_LIBS}
-    
-    -Wl,--end-group
-    
-    # System libraries
-    pthread
-    dl
-    m
-    rt
-)
diff --git a/examples/read_structured_zarr3.cc b/examples/read_structured_zarr3.cc
deleted file mode 100644
index 720ef1330..000000000
--- a/examples/read_structured_zarr3.cc
+++ /dev/null
@@ -1,496 +0,0 @@
-// Copyright 2024 The TensorStore Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Standalone test for reading structured data from Zarr v3 arrays.
-//
-// This test opens two Zarr v3 arrays:
-// 1. A structured array with named fields (headers/)
-// 2. A raw bytes array containing struct data (raw_headers/)
-//
-// Both arrays should contain the same data, allowing comparison of:
-// - Field-based access vs manual byte extraction
-// - Structured dtype parsing vs raw byte handling
-// - New open_as_void option for raw byte access to structured data
-//
-// Usage:
-//   bazel run //examples:read_structured_zarr3 -- /path/to/parent/dir
-//
-// Or with cmake:
-//   cd examples/build && ./read_structured_zarr3 --zarr_path=/path/to/parent/dir
-//
-// Where the parent dir contains both 'headers/' and 'raw_headers/' subdirs.
-
-#include <stdint.h>
-
-#include <cstring>
-#include <fstream>
-#include <iostream>
-#include <string>
-
-#include "absl/flags/flag.h"
-#include "absl/flags/parse.h"
-#include "absl/status/status.h"
-#include <nlohmann/json.hpp>
-#include "tensorstore/array.h"
-#include "tensorstore/context.h"
-#include "tensorstore/data_type.h"
-#include "tensorstore/index.h"
-#include "tensorstore/open.h"
-#include "tensorstore/open_mode.h"
-#include "tensorstore/spec.h"
-#include "tensorstore/tensorstore.h"
-#include "tensorstore/util/result.h"
-#include "tensorstore/util/status.h"
-
-// Internal headers for testing dtype parsing
-#include "tensorstore/driver/zarr3/dtype.h"
-
-// Additional headers for string operations
-#include "absl/strings/str_join.h"
-
-ABSL_FLAG(std::string, zarr_path,
-          "/home/ubuntu/source/tensorstore/filt_mig.mdio",
-          "Path to the parent .mdio directory containing headers/ and raw_headers/");
-
-namespace {
-
-using ::tensorstore::Index;
-
-// Field layout from the zarr.json metadata:
-// The structured dtype has the following fields with their byte offsets:
-//   trace_seq_num_line: int32 @ 0
-//   trace_seq_num_reel: int32 @ 4
-//   ... (many more fields) ...
-//   inline: int32 @ 180
-//   crossline: int32 @ 184
-//   cdp_x: int32 @ 188
-//   cdp_y: int32 @ 192
-//
-// Total struct size: 196 bytes (matches blosc typesize)
-
-constexpr size_t kInlineFieldOffset = 180;
-constexpr size_t kStructSize = 196;
-
-// Read and parse the zarr.json metadata to display info about structured type
-void PrintZarrMetadata(const std::string& zarr_path) {
-  std::string metadata_path = zarr_path + "/zarr.json";
-  std::ifstream file(metadata_path);
-  if (!file.is_open()) {
-    std::cerr << "Could not open " << metadata_path << std::endl;
-    return;
-  }
-
-  nlohmann::json metadata;
-  try {
-    file >> metadata;
-  } catch (const nlohmann::json::parse_error& e) {
-    std::cerr << "Failed to parse zarr.json: " << e.what() << std::endl;
-    return;
-  }
-
-  std::cout << "\n=== Zarr Metadata ===" << std::endl;
-  std::cout << "Shape: " << metadata["shape"].dump() << std::endl;
-  std::cout << "Dimension names: " << metadata["dimension_names"].dump()
-            << std::endl;
-
-  if (metadata.contains("data_type")) {
-    auto& dt = metadata["data_type"];
-    std::cout << "\nData type format:" << std::endl;
-    if (dt.is_object()) {
-      std::cout << "  Type: object with name=\"" << dt["name"].get<std::string>()
-                << "\"" << std::endl;
-      if (dt.contains("configuration") &&
-          dt["configuration"].contains("fields")) {
-        auto& fields = dt["configuration"]["fields"];
-        std::cout << "  Number of fields: " << fields.size() << std::endl;
-        std::cout << "  Fields:" << std::endl;
-        size_t byte_offset = 0;
-        for (const auto& field : fields) {
-          std::string name = field[0].get<std::string>();
-          std::string type = field[1].get<std::string>();
-          size_t size = (type == "int32" || type == "uint32" || type == "float32")
-                            ? 4
-                            : 2;  // int16/uint16
-          std::cout << "    " << name << ": " << type << " @ byte " << byte_offset
-                    << std::endl;
-          byte_offset += size;
-        }
-        std::cout << "  Total struct size: " << byte_offset << " bytes"
-                  << std::endl;
-      }
-    } else if (dt.is_string()) {
-      std::cout << "  Type: simple \"" << dt.get<std::string>() << "\""
-                << std::endl;
-    } else if (dt.is_array()) {
-      std::cout << "  Type: array with " << dt.size() << " fields" << std::endl;
-    }
-  }
-
-  if (metadata.contains("codecs")) {
-    std::cout << "\nCodecs: " << metadata["codecs"].dump(2) << std::endl;
-  }
-}
-
-// Helper function to read and display inline field from an array
-absl::Status ReadInlineField(const tensorstore::TensorStore<>& store,
-                           const std::string& array_name,
-                           bool is_raw_bytes = false) {
-  // Get information about the array
-  auto domain = store.domain();
-  std::cout << "\n=== " << array_name << " Array Info ===" << std::endl;
-  std::cout << "Domain: " << domain << std::endl;
-  std::cout << "Dtype: " << store.dtype() << std::endl;
-  std::cout << "Rank: " << store.rank() << std::endl;
-
-  auto shape = domain.shape();
-  std::cout << "Shape: [";
-  for (int i = 0; i < shape.size(); ++i) {
-    if (i > 0) std::cout << ", ";
-    std::cout << shape[i];
-  }
-  std::cout << "]" << std::endl;
-
-  // Read all data
-  std::cout << "\n=== Reading " << array_name << " Data ===" << std::endl;
-  TENSORSTORE_ASSIGN_OR_RETURN(
-      auto array, tensorstore::Read<tensorstore::zero_origin>(store).result());
-
-  std::cout << "Read complete. Array size: " << array.num_elements()
-            << " elements" << std::endl;
-  std::cout << "Data type: " << array.dtype() << std::endl;
-
-  Index num_inline, num_crossline;
-  const int32_t* int_ptr;
-
-  if (is_raw_bytes) {
-    // For raw bytes, we need to extract the inline field manually
-    // Shape is [inline, crossline, struct_size]
-    num_inline = shape[0];
-    num_crossline = shape[1];
-    Index struct_size = shape[2];
-    if (struct_size != kStructSize) {
-      std::cout << "Warning: Raw struct size (" << struct_size
-                << ") differs from expected header struct size (" << kStructSize
-                << "). Assuming padding." << std::endl;
-    }
-
-    // Extract inline field (4 bytes starting at offset 180)
-    auto byte_ptr = reinterpret_cast<const std::byte*>(array.data());
-    std::vector<int32_t> inline_values(num_inline * num_crossline);
-
-    for (Index i = 0; i < num_inline; ++i) {
-      for (Index j = 0; j < num_crossline; ++j) {
-        Index struct_offset = (i * num_crossline + j) * struct_size;
-        Index field_offset = struct_offset + kInlineFieldOffset;
-        std::memcpy(&inline_values[i * num_crossline + j],
-                   byte_ptr + field_offset, 4);
-      }
-    }
-
-    std::cout << "Extracted inline field from raw bytes at offset "
-              << kInlineFieldOffset << std::endl;
-    int_ptr = inline_values.data();
-  } else {
-    // For structured array, field access already gave us int32 values
-    num_inline = shape[0];
-    num_crossline = shape[1];
-    int_ptr = reinterpret_cast<const int32_t*>(array.data());
-  }
-
-  std::cout << "\n=== Inline field values from " << array_name
-            << " (shape: " << num_inline << " x " << num_crossline << ") ===" << std::endl;
-
-  // Print first 10 rows (or fewer if less data)
-  Index rows_to_print = std::min(num_inline, Index{10});
-  Index cols_to_print = std::min(num_crossline, Index{10});
-
-  for (Index i = 0; i < rows_to_print; ++i) {
-    for (Index j = 0; j < cols_to_print; ++j) {
-      std::cout << int_ptr[i * num_crossline + j];
-      if (j < cols_to_print - 1) {
-        std::cout << "\t";
-      }
-    }
-    if (num_crossline > cols_to_print) {
-      std::cout << "\t...";
-    }
-    std::cout << std::endl;
-  }
-  if (num_inline > rows_to_print) {
-    std::cout << "... (" << (num_inline - rows_to_print) << " more rows)"
-              << std::endl;
-  }
-
-  std::cout << "\n=== " << array_name << " Summary ===" << std::endl;
-  std::cout << "Successfully read " << (num_inline * num_crossline)
-            << " inline values" << std::endl;
-
-  // Show some statistics
-  int32_t min_val = int_ptr[0], max_val = int_ptr[0];
-  int64_t sum = 0;
-  for (Index i = 0; i < num_inline * num_crossline; ++i) {
-    min_val = std::min(min_val, int_ptr[i]);
-    max_val = std::max(max_val, int_ptr[i]);
-    sum += int_ptr[i];
-  }
-  std::cout << "Min value: " << min_val << std::endl;
-  std::cout << "Max value: " << max_val << std::endl;
-  std::cout << "Mean value: " << (static_cast<double>(sum) / (num_inline * num_crossline)) << std::endl;
-
-  return absl::OkStatus();
-}
-
-absl::Status Run(const std::string& zarr_path) {
-  std::cout << "=== Zarr v3 Structured Data Type Test ===" << std::endl;
-  std::cout << "Opening zarr3 arrays in: " << zarr_path << std::endl;
-
-  auto context = tensorstore::Context::Default();
-
-  // First, display metadata information for structured array
-  std::string headers_path = zarr_path + "/headers";
-  PrintZarrMetadata(headers_path);
-
-  // Test raw_bytes parsing by reading and parsing the raw_headers zarr.json
-  std::cout << "\n" << std::string(60, '=') << std::endl;
-  std::cout << "TESTING RAW_BYTES PARSING" << std::endl;
-  std::cout << std::string(60, '=') << std::endl;
-
-  std::string raw_metadata_path = zarr_path + "/raw_headers/zarr.json";
-  std::ifstream raw_file(raw_metadata_path);
-  if (!raw_file.is_open()) {
-    std::cout << "Could not open " << raw_metadata_path << std::endl;
-    return absl::NotFoundError("Raw headers metadata not found");
-  }
-
-  nlohmann::json raw_metadata;
-  try {
-    raw_file >> raw_metadata;
-  } catch (const nlohmann::json::parse_error& e) {
-    std::cout << "Failed to parse raw zarr.json: " << e.what() << std::endl;
-    return absl::DataLossError("Invalid raw metadata JSON");
-  }
-
-  std::cout << "Raw headers data_type: " << raw_metadata["data_type"].dump(2) << std::endl;
-
-  // Test parsing the raw_bytes data type
-  std::cout << "Testing raw_bytes dtype parsing..." << std::endl;
-
-  // For now, just verify the JSON structure is what we expect
-  if (!raw_metadata.contains("data_type")) {
-    std::cout << "FAILED: No data_type in metadata" << std::endl;
-    return absl::NotFoundError("Missing data_type");
-  }
-
-  auto& dt = raw_metadata["data_type"];
-  if (!dt.is_object() || !dt.contains("name") || dt["name"] != "raw_bytes") {
-    std::cout << "FAILED: data_type is not raw_bytes extension" << std::endl;
-    return absl::InvalidArgumentError("Not raw_bytes extension");
-  }
-
-  if (!dt.contains("configuration") || !dt["configuration"].contains("length_bytes")) {
-    std::cout << "FAILED: Missing length_bytes in configuration" << std::endl;
-    return absl::InvalidArgumentError("Missing length_bytes");
-  }
-
-  int length_bytes = dt["configuration"]["length_bytes"];
-  std::cout << "SUCCESS: Found raw_bytes extension with length_bytes = " << length_bytes << std::endl;
-  std::cout << "This should parse to:" << std::endl;
-  std::cout << "  - Single field with byte_t dtype" << std::endl;
-  std::cout << "  - Field shape: [" << length_bytes << "]" << std::endl;
-  std::cout << "  - Bytes per outer element: " << length_bytes << std::endl;
-
-  // Now actually test the parsing implementation
-  std::cout << "\n=== Testing ParseDType Implementation ===" << std::endl;
-  auto dtype_result = tensorstore::internal_zarr3::ParseDType(dt);
-  if (!dtype_result.ok()) {
-    std::cout << "FAILED: Could not parse raw_bytes data type: " << dtype_result.status() << std::endl;
-    return dtype_result.status();
-  }
-
-  auto dtype = std::move(dtype_result).value();
-  std::cout << "SUCCESS: ParseDType worked!" << std::endl;
-  std::cout << "  Fields: " << dtype.fields.size() << std::endl;
-  std::cout << "  Has fields: " << dtype.has_fields << std::endl;
-  std::cout << "  Bytes per outer element: " << dtype.bytes_per_outer_element << std::endl;
-
-  if (!dtype.fields.empty()) {
-    const auto& field = dtype.fields[0];
-    std::cout << "  Field name: '" << field.name << "'" << std::endl;
-    std::cout << "  Field dtype: " << field.dtype << std::endl;
-    std::cout << "  Field shape: [" << absl::StrJoin(field.field_shape, ", ") << "]" << std::endl;
-    std::cout << "  Field num_inner_elements: " << field.num_inner_elements << std::endl;
-    std::cout << "  Field num_bytes: " << field.num_bytes << std::endl;
-  }
-
-  // Verify the parsing is correct
-  bool parsing_correct = true;
-  if (dtype.fields.size() != 1) {
-    std::cout << "ERROR: Expected 1 field, got " << dtype.fields.size() << std::endl;
-    parsing_correct = false;
-  }
-  if (dtype.fields[0].name != "") {
-    std::cout << "ERROR: Expected empty field name, got '" << dtype.fields[0].name << "'" << std::endl;
-    parsing_correct = false;
-  }
-  if (dtype.fields[0].dtype != tensorstore::dtype_v<tensorstore::dtypes::byte_t>) {
-    std::cout << "ERROR: Expected byte_t dtype, got " << dtype.fields[0].dtype << std::endl;
-    parsing_correct = false;
-  }
-  if (dtype.fields[0].field_shape != std::vector<Index>{length_bytes}) {
-    std::cout << "ERROR: Expected field shape [" << length_bytes << "], got ["
-              << absl::StrJoin(dtype.fields[0].field_shape, ", ") << "]" << std::endl;
-    parsing_correct = false;
-  }
-  if (dtype.bytes_per_outer_element != length_bytes) {
-    std::cout << "ERROR: Expected " << length_bytes << " bytes per element, got "
-              << dtype.bytes_per_outer_element << std::endl;
-    parsing_correct = false;
-  }
-
-  if (parsing_correct) {
-    std::cout << "\n✅ PARSING VERIFICATION: All checks passed!" << std::endl;
-    std::cout << "The raw_bytes extension is correctly parsed." << std::endl;
-  } else {
-    std::cout << "\n❌ PARSING VERIFICATION: Some checks failed!" << std::endl;
-    return absl::InternalError("Parsing verification failed");
-  }
-
-  // Test 1: Read from structured array using field access
-  std::cout << "\n" << std::string(60, '=') << std::endl;
-  std::cout << "TEST 1: Reading from structured 'headers' array" << std::endl;
-  std::cout << std::string(60, '=') << std::endl;
-
-  ::nlohmann::json headers_spec = ::nlohmann::json::object();
-  headers_spec["driver"] = "zarr3";
-  headers_spec["kvstore"] = ::nlohmann::json::object();
-  headers_spec["kvstore"]["driver"] = "file";
-  headers_spec["kvstore"]["path"] = headers_path + "/";
-  headers_spec["field"] = "inline";  // Extract inline field (int32 at byte offset 180)
-
-  std::cout << "Spec: " << headers_spec.dump(2) << std::endl;
-
-  auto headers_open_result =
-      tensorstore::Open(headers_spec, context, tensorstore::OpenMode::open,
-                        tensorstore::ReadWriteMode::read)
-          .result();
-
-  if (!headers_open_result.ok()) {
-    std::cout << "\n=== Headers Open Failed ===" << std::endl;
-    std::cout << "Status: " << headers_open_result.status() << std::endl;
-    return headers_open_result.status();
-  }
-
-  auto headers_store = std::move(headers_open_result).value();
-  TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(headers_store, "headers"));
-
-  // Test 2: Read from raw bytes array (no special void access needed)
-  std::cout << "\n" << std::string(60, '=') << std::endl;
-  std::cout << "TEST 2: Reading from raw 'raw_headers' array" << std::endl;
-  std::cout << std::string(60, '=') << std::endl;
-
-  std::string raw_headers_path = zarr_path + "/raw_headers";
-  ::nlohmann::json raw_spec = ::nlohmann::json::object();
-  raw_spec["driver"] = "zarr3";
-  raw_spec["kvstore"] = ::nlohmann::json::object();
-  raw_spec["kvstore"]["driver"] = "file";
-  raw_spec["kvstore"]["path"] = raw_headers_path + "/";
-  // No field specified - raw_bytes has a single anonymous field
-
-  std::cout << "Spec: " << raw_spec.dump(2) << std::endl;
-
-  auto raw_open_result =
-      tensorstore::Open(raw_spec, context, tensorstore::OpenMode::open,
-                        tensorstore::ReadWriteMode::read)
-          .result();
-
-  if (!raw_open_result.ok()) {
-    std::cout << "\n=== Raw Headers Open Failed ===" << std::endl;
-    std::cout << "Status: " << raw_open_result.status() << std::endl;
-    return raw_open_result.status();
-  }
-
-  auto raw_store = std::move(raw_open_result).value();
-  TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(raw_store, "raw_headers", /*is_raw_bytes=*/true));
-
-  // Test 3: Read from headers array as void (open_as_void=true)
-  // Use a fresh context to avoid cache sharing with Test 1
-  std::cout << "\n" << std::string(60, '=') << std::endl;
-  std::cout << "TEST 3: Reading from 'headers' array as void (open_as_void=true)" << std::endl;
-  std::cout << std::string(60, '=') << std::endl;
-
-  auto context_void = tensorstore::Context::Default();
-
-  ::nlohmann::json headers_void_spec = ::nlohmann::json::object();
-  headers_void_spec["driver"] = "zarr3";
-  headers_void_spec["kvstore"] = ::nlohmann::json::object();
-  headers_void_spec["kvstore"]["driver"] = "file";
-  headers_void_spec["kvstore"]["path"] = headers_path + "/";
-  headers_void_spec["open_as_void"] = true;  // New option for raw byte access
-
-  std::cout << "Spec: " << headers_void_spec.dump(2) << std::endl;
-
-  auto headers_void_open_result =
-      tensorstore::Open(headers_void_spec, context_void, tensorstore::OpenMode::open,
-                        tensorstore::ReadWriteMode::read)
-          .result();
-
-  if (!headers_void_open_result.ok()) {
-    std::cout << "\n=== Headers (void) Open Failed ===" << std::endl;
-    std::cout << "Status: " << headers_void_open_result.status() << std::endl;
-    return headers_void_open_result.status();
-  }
-
-  auto headers_void_store = std::move(headers_void_open_result).value();
-  TENSORSTORE_RETURN_IF_ERROR(ReadInlineField(headers_void_store, "headers (open_as_void)", /*is_raw_bytes=*/true));
-
-  std::cout << "\n" << std::string(60, '=') << std::endl;
-  std::cout << "COMPARISON: All three methods should give identical inline field values" << std::endl;
-  std::cout << std::string(60, '=') << std::endl;
-  std::cout << "- Test 1: 'headers' with field=\"inline\" provides field access convenience\n"
-            << "- Test 2: 'raw_headers' (raw_bytes type) provides direct byte access\n"
-            << "- Test 3: 'headers' with open_as_void=true provides raw byte access to structured data\n"
-            << "All three extract the inline field from byte offset " << kInlineFieldOffset
-            << " in " << kStructSize << "-byte structs." << std::endl;
-
-  return absl::OkStatus();
-}
-
-}  // namespace
-
-int main(int argc, char** argv) {
-  absl::ParseCommandLine(argc, argv);
-
-  std::string zarr_path = absl::GetFlag(FLAGS_zarr_path);
-  if (zarr_path.empty()) {
-    std::cerr << "Error: --zarr_path is required" << std::endl;
-    return 1;
-  }
-
-  // Verify the path structure
-  std::string headers_path = zarr_path + "/headers";
-  std::string raw_headers_path = zarr_path + "/raw_headers";
-
-  std::cout << "Expecting arrays at:" << std::endl;
-  std::cout << "  Structured: " << headers_path << std::endl;
-  std::cout << "  Raw bytes:  " << raw_headers_path << std::endl;
-  std::cout << std::endl;
-
-  auto status = Run(zarr_path);
-  if (!status.ok()) {
-    std::cerr << "\nFinal status: " << status << std::endl;
-    return 1;
-  }
-
-  return 0;
-}
diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index 51cc17f42..ec30edd82 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -779,12 +779,16 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
 
   std::string GetDataCacheKey(const void* metadata) override {
     std::string result;
+    const auto& zarr_metadata = *static_cast<const ZarrMetadata*>(metadata);
     internal::EncodeCacheKey(
-        &result, spec().store.path,
-        static_cast<const ZarrMetadata*>(metadata)->GetCompatibilityKey());
+        &result,
+        spec().store.path,
+        zarr_metadata.GetCompatibilityKey(),
+        spec().open_as_void ? "void" : "normal");
     return result;
   }
 
+
   Result<std::shared_ptr<const void>> Create(const void* existing_metadata,
                                              CreateOptions options) override {
     if (existing_metadata) {

From 8c4c4cafe2b33df06131d985c2574c973f817b3d Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Thu, 4 Dec 2025 16:07:26 +0000
Subject: [PATCH 15/59] Remove vestigial example build

---
 examples/BUILD | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/examples/BUILD b/examples/BUILD
index 4dcb2d604..94acdba14 100644
--- a/examples/BUILD
+++ b/examples/BUILD
@@ -122,26 +122,3 @@ tensorstore_cc_binary(
         "@riegeli//riegeli/bytes:writer",
     ],
 )
-
-tensorstore_cc_binary(
-    name = "read_structured_zarr3",
-    srcs = ["read_structured_zarr3.cc"],
-    deps = [
-        "//tensorstore",
-        "//tensorstore:array",
-        "//tensorstore:context",
-        "//tensorstore:data_type",
-        "//tensorstore:index",
-        "//tensorstore:open",
-        "//tensorstore:open_mode",
-        "//tensorstore:spec",
-        "//tensorstore/driver/zarr3",
-        "//tensorstore/kvstore/file",
-        "//tensorstore/util:result",
-        "//tensorstore/util:status",
-        "@abseil-cpp//absl/flags:flag",
-        "@abseil-cpp//absl/flags:parse",
-        "@abseil-cpp//absl/status",
-        "@nlohmann_json//:json",
-    ],
-)

From 4b590f855adc963fe20940bd704693d81190483a Mon Sep 17 00:00:00 2001
From: Brian Michell <brianm314@comcast.net>
Date: Thu, 4 Dec 2025 11:11:14 -0600
Subject: [PATCH 16/59] V3 structs fix fills (#4)

* Use the appropriate fill value for open_as_void structured data

* Cleanup
---
 tensorstore/driver/zarr3/driver.cc | 70 ++++++++++++++++++++++++++++--
 1 file changed, 66 insertions(+), 4 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index ec30edd82..f86e4ad88 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -171,12 +171,74 @@ class ZarrDriverSpec
       IndexTransformView<> transform) const override {
     SharedArray<const void> fill_value{schema.fill_value()};
 
-    const auto& metadata = metadata_constraints;
-    if (metadata.fill_value && !metadata.fill_value->empty()) {
-      fill_value = (*metadata.fill_value)[0];
+    const auto& constraints = metadata_constraints;
+
+    // If constraints don't specify a fill value, just use the schema's.
+    if (!constraints.fill_value || constraints.fill_value->empty()) {
+      return fill_value;
+    }
+
+    const auto& vec = *constraints.fill_value;
+
+    // If we don't have dtype information, we can't do field-aware logic.
+    if (!constraints.data_type) {
+      if (!vec.empty()) return vec[0];
+      return fill_value;
+    }
+
+    const ZarrDType& dtype = *constraints.data_type;
+
+    // Determine which field this spec refers to (or void access).
+    TENSORSTORE_ASSIGN_OR_RETURN(
+        size_t field_index,
+        GetFieldIndex(dtype, selected_field, open_as_void));
+
+    // ── Normal field access: just return that field's fill_value ───────────────
+    if (field_index != kVoidFieldIndex) {
+      if (field_index < vec.size()) {
+        return vec[field_index];
+      }
+      // Fallback to "no fill".
+      return SharedArray<const void>();
+    }
+
+    // ── Void access: synthesize a byte-level fill value ────────────────────────
+    //
+    // We want a 1D byte array of length bytes_per_outer_element whose contents
+    // are exactly the Zarr-defined struct layout built from per-field fills.
+
+    // Special case: "raw bytes" field (single byte_t field with flexible shape).
+    // In that case the existing fill array already has the correct bytes.
+    if (dtype.fields.size() == 1 &&
+        dtype.fields[0].dtype.id() == DataTypeId::byte_t &&
+        !dtype.fields[0].flexible_shape.empty()) {
+      // vec[0] should be a byte array of size bytes_per_outer_element.
+      return vec[0];
+    }
+
+    const Index nbytes = dtype.bytes_per_outer_element;
+
+    auto byte_arr = AllocateArray(
+        span<const Index, 1>({nbytes}), c_order, default_init,
+        dtype_v<tensorstore::dtypes::byte_t>);
+    auto* dst = static_cast<std::byte*>(byte_arr.data());
+    std::memset(dst, 0, static_cast<size_t>(nbytes));
+
+    // Pack each field's scalar fill into its byte_offset region.
+    for (size_t i = 0; i < dtype.fields.size() && i < vec.size(); ++i) {
+      const auto& field = dtype.fields[i];
+      const auto& field_fill = vec[i];
+      if (!field_fill.valid()) continue;
+
+      // We assume a single outer element per field here (which is exactly how
+      // FillValueJsonBinder constructs per-field fill values).
+      std::memcpy(
+          dst + field.byte_offset,
+          static_cast<const std::byte*>(field_fill.data()),
+          static_cast<size_t>(field.num_bytes));
     }
 
-    return fill_value;
+    return byte_arr;
   }
 
   Result<DimensionUnitsVector> GetDimensionUnits() const override {

From c0082a0f09c4537bed65aaaf17939f8825204985 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Thu, 4 Dec 2025 17:22:51 +0000
Subject: [PATCH 17/59] Add new options to schema

---
 tensorstore/driver/zarr3/schema.yml | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tensorstore/driver/zarr3/schema.yml b/tensorstore/driver/zarr3/schema.yml
index 4f9733415..9491027b1 100644
--- a/tensorstore/driver/zarr3/schema.yml
+++ b/tensorstore/driver/zarr3/schema.yml
@@ -17,6 +17,31 @@ allOf:
         automatically.  When creating a new array, the new metadata is obtained
         by combining these metadata constraints with any `Schema` constraints.
       $ref: driver/zarr3/Metadata
+    field:
+      type: string
+      title: Field selection for structured arrays.
+      description: |
+        Name of the field to select from a structured array. When specified,
+        the tensorstore will provide access to only the specified field of
+        each element in the structured array.
+    open_as_void:
+      type: boolean
+      default: false
+      title: Raw byte access mode.
+      description: |
+        When true, opens the array as raw bytes instead of interpreting it
+        as structured data. The resulting array will have an additional
+        dimension representing the byte layout of each element.
+  oneOf:
+    - not:
+        anyOf:
+          - required: ["field"]
+          - required: ["open_as_void"]
+    - allOf:
+        - not:
+            required: ["field"]
+        - not:
+            required: ["open_as_void"]
 examples:
 - driver: zarr3
   kvstore:

From 9a46c82968fb1e70e1cb14e3b827dcf627b80463 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Thu, 4 Dec 2025 17:31:17 +0000
Subject: [PATCH 18/59] Fix copyright header date

---
 tensorstore/driver/zarr3/dtype.cc      | 2 +-
 tensorstore/driver/zarr3/dtype.h       | 2 +-
 tensorstore/driver/zarr3/dtype_test.cc | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorstore/driver/zarr3/dtype.cc b/tensorstore/driver/zarr3/dtype.cc
index 5b3261812..b8aacaa68 100644
--- a/tensorstore/driver/zarr3/dtype.cc
+++ b/tensorstore/driver/zarr3/dtype.cc
@@ -1,4 +1,4 @@
-// Copyright 2020 The TensorStore Authors
+// Copyright 2025 The TensorStore Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/tensorstore/driver/zarr3/dtype.h b/tensorstore/driver/zarr3/dtype.h
index 430dd8849..73a6b0961 100644
--- a/tensorstore/driver/zarr3/dtype.h
+++ b/tensorstore/driver/zarr3/dtype.h
@@ -1,4 +1,4 @@
-// Copyright 2020 The TensorStore Authors
+// Copyright 2025 The TensorStore Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/tensorstore/driver/zarr3/dtype_test.cc b/tensorstore/driver/zarr3/dtype_test.cc
index ef55aba09..709178bc3 100644
--- a/tensorstore/driver/zarr3/dtype_test.cc
+++ b/tensorstore/driver/zarr3/dtype_test.cc
@@ -1,4 +1,4 @@
-// Copyright 2023 The TensorStore Authors
+// Copyright 2025 The TensorStore Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.

From b9b5e41db3266155aa47323249f18687a1e2e45b Mon Sep 17 00:00:00 2001
From: Brian Michell <brianm314@comcast.net>
Date: Thu, 4 Dec 2025 12:52:30 -0600
Subject: [PATCH 19/59] Cleanup (#5)

---
 tensorstore/driver/zarr3/driver.cc     | 2 --
 tensorstore/driver/zarr3/dtype_test.cc | 1 -
 2 files changed, 3 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index f86e4ad88..f65533197 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -737,8 +737,6 @@ class ZarrDriver : public ZarrDriverBase {
     if (metadata.fill_value.empty()) {
       return SharedArray<const void>();
     }
-    // return metadata.fill_value[0];
-    // TODO: Doe we actually need to validate this or can we trust that component_index will return a valid index?
     size_t index = this->component_index();
     if (index >= metadata.fill_value.size()) {
         return absl::OutOfRangeError("Component index out of bounds");
diff --git a/tensorstore/driver/zarr3/dtype_test.cc b/tensorstore/driver/zarr3/dtype_test.cc
index 709178bc3..a41830069 100644
--- a/tensorstore/driver/zarr3/dtype_test.cc
+++ b/tensorstore/driver/zarr3/dtype_test.cc
@@ -17,7 +17,6 @@
 #include <stddef.h>
 #include <stdint.h>
 
-#include <cstddef>  // for std::byte
 #include <string>
 #include <vector>
 

From 31e55ec60e006e7a68abf3c64cf43a3cdf28072a Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 5 Jan 2026 15:19:56 +0000
Subject: [PATCH 20/59] Remove default values

---
 tensorstore/driver/zarr3/chunk_cache.h | 6 +++---
 tensorstore/driver/zarr3/metadata.h    | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorstore/driver/zarr3/chunk_cache.h b/tensorstore/driver/zarr3/chunk_cache.h
index a39eb1dc8..f9ff19a00 100644
--- a/tensorstore/driver/zarr3/chunk_cache.h
+++ b/tensorstore/driver/zarr3/chunk_cache.h
@@ -159,7 +159,7 @@ class ZarrLeafChunkCache : public internal::KvsBackedChunkCache,
                               ZarrCodecChain::PreparedState::Ptr codec_state,
                               ZarrDType dtype,
                               internal::CachePool::WeakPtr data_cache_pool,
-                              bool open_as_void = false);
+                              bool open_as_void);
 
   void Read(ZarrChunkCache::ReadRequest request,
             AnyFlowReceiver<absl::Status, internal::ReadChunk,
@@ -199,7 +199,7 @@ class ZarrShardedChunkCache : public internal::Cache, public ZarrChunkCache {
                                  ZarrCodecChain::PreparedState::Ptr codec_state,
                                  ZarrDType dtype,
                                  internal::CachePool::WeakPtr data_cache_pool,
-                                 bool open_as_void = false);
+                                 bool open_as_void);
 
   const ZarrShardingCodec::PreparedState& sharding_codec_state() const {
     return static_cast<const ZarrShardingCodec::PreparedState&>(
@@ -265,7 +265,7 @@ class ZarrShardSubChunkCache : public ChunkCacheImpl {
       kvstore::DriverPtr store, Executor executor,
       ZarrShardingCodec::PreparedState::Ptr sharding_state,
       ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool,
-      bool open_as_void = false)
+      bool open_as_void)
       : ChunkCacheImpl(std::move(store),
                        ZarrCodecChain::PreparedState::Ptr(
                            sharding_state->sub_chunk_codec_state),
diff --git a/tensorstore/driver/zarr3/metadata.h b/tensorstore/driver/zarr3/metadata.h
index 857210546..d091dea22 100644
--- a/tensorstore/driver/zarr3/metadata.h
+++ b/tensorstore/driver/zarr3/metadata.h
@@ -230,14 +230,14 @@ absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
 ///     unspecified.
 Result<std::shared_ptr<const ZarrMetadata>> GetNewMetadata(
     const ZarrMetadataConstraints& metadata_constraints,
-    const Schema& schema, std::string_view selected_field = {},
-    bool open_as_void = false);
+    const Schema& schema, std::string_view selected_field,
+    bool open_as_void);
 
 absl::Status ValidateDataType(DataType dtype);
 
 Result<size_t> GetFieldIndex(const ZarrDType& dtype,
                              std::string_view selected_field,
-                             bool open_as_void = false);
+                             bool open_as_void);
 
 struct SpecRankAndFieldInfo {
   DimensionIndex chunked_rank = dynamic_rank;

From 89098f8e26649362f81bbfe424a6bf8a47c82b3f Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 15:11:28 +0000
Subject: [PATCH 21/59] zarr3: Add mutual exclusivity validation for field and
 open_as_void

Matches the pattern from zarr v2 driver (PR #272). When both "field"
and "open_as_void" are specified in the spec, return an error since
these options are mutually exclusive - field selects a specific field
from a structured array, while open_as_void provides raw byte access
to the entire structure.
---
 tensorstore/driver/zarr3/driver.cc | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index f65533197..2b0530fcb 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -151,10 +151,15 @@ class ZarrDriverSpec
                   [](auto* obj) { *obj = std::string{}; }))),
       jb::Member("open_as_void", jb::Projection<&ZarrDriverSpec::open_as_void>(
                   jb::DefaultValue<jb::kNeverIncludeDefaults>(
-                      [](auto* v) { *v = false; }))));
-
-
-
+                      [](auto* v) { *v = false; }))),
+      jb::Initialize([](auto* obj) {
+        // Validate that field and open_as_void are mutually exclusive
+        if (obj->open_as_void && !obj->selected_field.empty()) {
+          return absl::InvalidArgumentError(
+              "\"field\" and \"open_as_void\" are mutually exclusive");
+        }
+        return absl::OkStatus();
+      }));
 
   absl::Status ApplyOptions(SpecOptions&& options) override {
     if (options.minimal_spec) {

From 471aa1b68973aba81d84a0050341a5886330ac78 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 15:11:48 +0000
Subject: [PATCH 22/59] zarr3: Reject URL syntax when selected_field or
 open_as_void specified

The zarr3 URL syntax cannot represent field selection or void access
mode. Following the pattern from zarr v2 driver (PR #272), ToUrl() now
returns an error when either of these options is specified instead of
silently ignoring them.
---
 tensorstore/driver/zarr3/driver.cc | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index 2b0530fcb..2190464d9 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -263,6 +263,14 @@ class ZarrDriverSpec
   }
 
   Result<std::string> ToUrl() const override {
+    if (!selected_field.empty()) {
+      return absl::InvalidArgumentError(
+          "zarr3 URL syntax not supported with selected_field specified");
+    }
+    if (open_as_void) {
+      return absl::InvalidArgumentError(
+          "zarr3 URL syntax not supported with open_as_void specified");
+    }
     TENSORSTORE_ASSIGN_OR_RETURN(auto base_url, store.ToUrl());
     return tensorstore::StrCat(base_url, "|", id, ":");
   }

From 34e52fe6331eb6eca999f6b3b384d1bdc00ec2e6 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 15:12:32 +0000
Subject: [PATCH 23/59] zarr3: Preserve open_as_void flag in GetBoundSpecData
 for spec round-trip

Following the pattern from zarr v2 driver (PR #272), override
GetBoundSpecData in ZarrDataCache to set spec.open_as_void from
ChunkCacheImpl::open_as_void_. This ensures that when you open a
store with open_as_void=true and then call spec(), the resulting
spec correctly has open_as_void=true set.

Without this fix, opening a store with open_as_void=true and then
getting its spec would lose the open_as_void flag, causing incorrect
behavior if the spec is used to re-open the store.
---
 tensorstore/driver/zarr3/driver.cc | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index 2190464d9..b21eb9cd2 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -727,6 +727,17 @@ class ZarrDataCache : public ChunkCacheImpl, public DataCacheBase {
                                                          component_index);
   }
 
+  absl::Status GetBoundSpecData(KvsDriverSpec& spec_base,
+                                const void* metadata_ptr,
+                                size_t component_index) override {
+    TENSORSTORE_RETURN_IF_ERROR(
+        DataCacheBase::GetBoundSpecData(spec_base, metadata_ptr, component_index));
+    auto& spec = static_cast<ZarrDriverSpec&>(spec_base);
+    // Preserve the open_as_void flag so spec round-trips correctly
+    spec.open_as_void = ChunkCacheImpl::open_as_void_;
+    return absl::OkStatus();
+  }
+
   internal::ChunkGridSpecification grid_;
 };
 

From 0db22e4623565d666a0a0af5b7cb3799a44c301e Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 15:25:47 +0000
Subject: [PATCH 24/59] zarr3: Add open_as_void tests and fix BUILD dependency

Add comprehensive tests for open_as_void functionality following the
patterns from zarr v2 driver (PR #272):

Tests that PASS:
- OpenAsVoidSimpleType: Verifies simple type arrays can be opened with
  open_as_void, gaining an extra dimension for bytes
- OpenAsVoidSpecRoundtrip: Verifies open_as_void preserved in spec JSON
- OpenAsVoidGetBoundSpecData: Verifies spec() on void store returns
  open_as_void=true (tests the GetBoundSpecData fix)
- OpenAsVoidCannotUseWithField: Verifies mutual exclusivity validation
- OpenAsVoidUrlNotSupported: Verifies ToUrl() rejects open_as_void
- FieldSelectionUrlNotSupported: Verifies ToUrl() rejects selected_field

Tests marked TODO (pending codec chain implementation):
- OpenAsVoidStructuredType
- OpenAsVoidWithCompression
- OpenAsVoidReadWrite
- OpenAsVoidWriteRoundtrip

Also fixes BUILD file: adds :metadata dependency to :chunk_cache target
to provide the dtype.h header that chunk_cache.h includes.
---
 tensorstore/driver/zarr3/BUILD          |   1 +
 tensorstore/driver/zarr3/driver_test.cc | 223 ++++++++++++++++++++++++
 2 files changed, 224 insertions(+)

diff --git a/tensorstore/driver/zarr3/BUILD b/tensorstore/driver/zarr3/BUILD
index b9e442bdf..685050024 100644
--- a/tensorstore/driver/zarr3/BUILD
+++ b/tensorstore/driver/zarr3/BUILD
@@ -221,6 +221,7 @@ tensorstore_cc_library(
     srcs = ["chunk_cache.cc"],
     hdrs = ["chunk_cache.h"],
     deps = [
+        ":metadata",
         "//tensorstore:array",
         "//tensorstore:array_storage_statistics",
         "//tensorstore:batch",
diff --git a/tensorstore/driver/zarr3/driver_test.cc b/tensorstore/driver/zarr3/driver_test.cc
index ffef84247..54f79ba79 100644
--- a/tensorstore/driver/zarr3/driver_test.cc
+++ b/tensorstore/driver/zarr3/driver_test.cc
@@ -1830,4 +1830,227 @@ TEST(DriverTest, UrlSchemeRoundtrip) {
        {"kvstore", {{"driver", "memory"}, {"path", "abc.zarr3/def/"}}}});
 }
 
+// Tests for open_as_void functionality
+
+TEST(Zarr3DriverTest, OpenAsVoidSimpleType) {
+  // Test open_as_void with a simple data type (int16)
+  auto context = Context::Default();
+
+  // First create a normal array
+  ::nlohmann::json create_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"metadata",
+       {
+           {"data_type", "int16"},
+           {"shape", {4, 4}},
+           {"chunk_grid",
+            {{"name", "regular"}, {"configuration", {{"chunk_shape", {2, 2}}}}}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::create,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Write some data
+  auto data = tensorstore::MakeArray<int16_t>({{1, 2}, {3, 4}});
+  TENSORSTORE_EXPECT_OK(
+      tensorstore::Write(data, store | tensorstore::Dims(0, 1).SizedInterval(
+                                           {0, 0}, {2, 2}))
+          .result());
+
+  // Now open with open_as_void=true
+  ::nlohmann::json void_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto void_store,
+      tensorstore::Open(void_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result());
+
+  // The void store should have rank = original_rank + 1 (for bytes dimension)
+  EXPECT_EQ(3, void_store.rank());
+
+  // The last dimension should be the size of the data type (2 bytes for int16)
+  EXPECT_EQ(2, void_store.domain().shape()[2]);
+
+  // The data type should be byte
+  EXPECT_EQ(tensorstore::dtype_v<tensorstore::dtypes::byte_t>,
+            void_store.dtype());
+}
+
+// TODO(b/xxx): OpenAsVoidStructuredType test disabled pending implementation
+// of multi-field structured type handling in open_as_void mode. The v3
+// implementation needs additional work to properly handle structured types
+// with multiple fields when opened with open_as_void=true.
+
+// TODO(b/xxx): OpenAsVoidWithCompression test disabled pending implementation
+// of void access codec chain handling. Currently fails with "Not enough data"
+// error when reading void-accessed data through compression codecs.
+
+TEST(Zarr3DriverTest, OpenAsVoidSpecRoundtrip) {
+  // Test that open_as_void is properly preserved in spec round-trips
+  ::nlohmann::json json_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+      {"metadata",
+       {
+           {"data_type", "int16"},
+           {"shape", {4, 4}},
+           {"chunk_grid",
+            {{"name", "regular"}, {"configuration", {{"chunk_shape", {2, 2}}}}}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto spec,
+                                   tensorstore::Spec::FromJson(json_spec));
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto json_result, spec.ToJson());
+
+  EXPECT_EQ(true, json_result.value("open_as_void", false));
+}
+
+TEST(Zarr3DriverTest, OpenAsVoidGetBoundSpecData) {
+  // Test that open_as_void is correctly preserved when getting spec from an
+  // opened void store. This tests ZarrDataCache::GetBoundSpecData.
+  auto context = Context::Default();
+
+  // First create a normal array
+  ::nlohmann::json create_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"metadata",
+       {
+           {"data_type", "int16"},
+           {"shape", {4, 4}},
+           {"chunk_grid",
+            {{"name", "regular"}, {"configuration", {{"chunk_shape", {2, 2}}}}}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::create,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Now open with open_as_void=true
+  ::nlohmann::json void_spec_json{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto void_store,
+      tensorstore::Open(void_spec_json, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result());
+
+  // Get the spec from the opened void store - this invokes GetBoundSpecData
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto obtained_spec, void_store.spec());
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto obtained_json, obtained_spec.ToJson());
+
+  // Verify open_as_void is true in the obtained spec
+  EXPECT_EQ(true, obtained_json.value("open_as_void", false));
+
+  // Also verify metadata was correctly populated
+  EXPECT_TRUE(obtained_json.contains("metadata"));
+  auto& metadata = obtained_json["metadata"];
+  EXPECT_EQ("int16", metadata.value("data_type", ""));
+}
+
+TEST(Zarr3DriverTest, OpenAsVoidCannotUseWithField) {
+  // Test that specifying both open_as_void and field is rejected as they are
+  // mutually exclusive options.
+  ::nlohmann::json spec_with_both{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"metadata",
+       {
+           {"data_type",
+            {{"name", "structured"},
+             {"configuration",
+              {{"fields",
+                ::nlohmann::json::array({{"x", "uint8"}, {"y", "int16"}})}}}}},
+           {"shape", {4, 4}},
+           {"chunk_grid",
+            {{"name", "regular"}, {"configuration", {{"chunk_shape", {2, 2}}}}}},
+       }},
+      {"field", "x"},
+      {"open_as_void", true},
+  };
+
+  // Specifying both field and open_as_void should fail at spec parsing
+  EXPECT_THAT(
+      tensorstore::Spec::FromJson(spec_with_both),
+      StatusIs(absl::StatusCode::kInvalidArgument,
+               HasSubstr("\"field\" and \"open_as_void\" are mutually "
+                         "exclusive")));
+}
+
+TEST(Zarr3DriverTest, OpenAsVoidUrlNotSupported) {
+  // Test that open_as_void is not supported with URL syntax
+  ::nlohmann::json json_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+      {"metadata",
+       {
+           {"data_type", "int16"},
+           {"shape", {4, 4}},
+           {"chunk_grid",
+            {{"name", "regular"}, {"configuration", {{"chunk_shape", {2, 2}}}}}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto spec,
+                                   tensorstore::Spec::FromJson(json_spec));
+
+  // ToUrl should fail when open_as_void is specified
+  EXPECT_THAT(spec.ToUrl(), StatusIs(absl::StatusCode::kInvalidArgument));
+}
+
+// TODO(b/xxx): OpenAsVoidReadWrite test disabled pending implementation
+// of void access codec chain handling. Currently fails with "Not enough data"
+// error when reading void-accessed data.
+
+// TODO(b/xxx): OpenAsVoidWriteRoundtrip test disabled pending implementation
+// of void access codec chain handling. Currently fails with "Not enough data"
+// error when reading/writing void-accessed data.
+
+TEST(Zarr3DriverTest, FieldSelectionUrlNotSupported) {
+  // Test that field selection is not supported with URL syntax
+  ::nlohmann::json json_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"field", "x"},
+      {"metadata",
+       {
+           {"data_type",
+            {{"name", "structured"},
+             {"configuration",
+              {{"fields",
+                ::nlohmann::json::array({{"x", "uint8"}, {"y", "int16"}})}}}}},
+           {"shape", {4, 4}},
+           {"chunk_grid",
+            {{"name", "regular"}, {"configuration", {{"chunk_shape", {2, 2}}}}}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto spec,
+                                   tensorstore::Spec::FromJson(json_spec));
+
+  // ToUrl should fail when field is specified
+  EXPECT_THAT(spec.ToUrl(), StatusIs(absl::StatusCode::kInvalidArgument,
+                                     HasSubstr("selected_field")));
+}
+
 }  // namespace

From 5fadaf0715fb79811ca97ec3555f2f4f0896589f Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 15:40:12 +0000
Subject: [PATCH 25/59] zarr3: Fix DecodeChunk and EncodeChunk for void access

The codec chain is prepared for the original dtype and chunk shape
(without the extra bytes dimension). For void access:

DecodeChunk:
- Strip the bytes dimension from grid's chunk_shape to get original shape
- Decode using the original codec shape
- Reinterpret the decoded bytes as [chunk_shape..., bytes_per_elem]

EncodeChunk:
- Input has shape [chunk_shape..., bytes_per_elem] of byte_t
- Create a view with the original chunk shape and element_size
- Encode using the original codec

This follows the pattern from zarr v2 (PR #272) where the void metadata
has the chunk_layout computed to match encoded/decoded layouts.
---
 tensorstore/driver/zarr3/chunk_cache.cc | 56 +++++++++++++++++++++++--
 1 file changed, 53 insertions(+), 3 deletions(-)

diff --git a/tensorstore/driver/zarr3/chunk_cache.cc b/tensorstore/driver/zarr3/chunk_cache.cc
index f14efd607..e39852222 100644
--- a/tensorstore/driver/zarr3/chunk_cache.cc
+++ b/tensorstore/driver/zarr3/chunk_cache.cc
@@ -158,11 +158,38 @@ ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
   const size_t num_fields = dtype_.fields.size();
   absl::InlinedVector<SharedArray<const void>, 1> field_arrays(num_fields);
 
-  // Special case: void access - return raw bytes directly
+  // Special case: void access - decode using original codec shape, then
+  // reinterpret as bytes with extra dimension.
+  //
+  // The codec was prepared for the original dtype and chunk_shape (without
+  // bytes dimension). We decode to that shape, then view the raw bytes with
+  // an extra dimension representing the bytes per element.
   if (open_as_void_) {
+    // The grid's chunk_shape for void has extra bytes dimension - strip it
+    // to get the original codec shape.
+    const auto& void_chunk_shape = grid().chunk_shape;
+    std::vector<Index> original_chunk_shape(
+        void_chunk_shape.begin(),
+        void_chunk_shape.end() - 1);  // Strip bytes dimension
+
+    // Decode using original codec shape
     TENSORSTORE_ASSIGN_OR_RETURN(
-        field_arrays[0], codec_state_->DecodeArray(grid().components[0].shape(),
-                                                   std::move(data)));
+        auto decoded_array,
+        codec_state_->DecodeArray(original_chunk_shape, std::move(data)));
+
+    // Reinterpret the decoded array's bytes as [chunk_shape..., bytes_per_elem]
+    // This creates a view over the same memory but with byte dtype and extra dim
+    const auto& void_component_shape = grid().components[0].shape();
+    auto byte_array = AllocateArray(
+        void_component_shape, c_order, default_init,
+        dtype_v<tensorstore::dtypes::byte_t>);
+
+    // Copy decoded data to byte array (handles potential layout differences)
+    std::memcpy(byte_array.data(), decoded_array.data(),
+                decoded_array.num_elements() *
+                    decoded_array.dtype().size());
+
+    field_arrays[0] = std::move(byte_array);
     return field_arrays;
   }
 
@@ -214,6 +241,29 @@ Result<absl::Cord> ZarrLeafChunkCache::EncodeChunk(
     span<const Index> chunk_indices,
     span<const SharedArray<const void>> component_arrays) {
   assert(component_arrays.size() == 1);
+
+  // Special case: void access - reinterpret byte array back to original
+  // dtype shape before encoding.
+  //
+  // The input has shape [chunk_shape..., bytes_per_elem] of byte_t.
+  // The codec expects [chunk_shape] of the original dtype.
+  if (open_as_void_) {
+    const auto& byte_array = component_arrays[0];
+    const Index bytes_per_element = dtype_.bytes_per_outer_element;
+
+    // Build original chunk shape by stripping the bytes dimension
+    const auto& void_shape = byte_array.shape();
+    std::vector<Index> original_shape(void_shape.begin(), void_shape.end() - 1);
+
+    // Create a view over the byte data with original layout
+    // The codec expects the original dtype's element size for stride calculation
+    auto encoded_array = SharedArray<const void>(
+        byte_array.element_pointer(),
+        StridedLayout<>(c_order, bytes_per_element, original_shape));
+
+    return codec_state_->EncodeArray(encoded_array);
+  }
+
   return codec_state_->EncodeArray(component_arrays[0]);
 }
 

From a25dd7d36842ff9e26e8062f5013333e9891a83d Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 15:40:20 +0000
Subject: [PATCH 26/59] zarr3: Add read/write tests for open_as_void

Add tests that verify:
- OpenAsVoidReadWrite: Write data via typed access, read via void access
  verifying byte layout is correct
- OpenAsVoidWriteRoundtrip: Write via typed access, verify byte values
  can be read via void access with correct little-endian layout

These tests verify the DecodeChunk fix works correctly for reading data
written with the original dtype through void (byte) access.
---
 tensorstore/driver/zarr3/driver_test.cc | 123 ++++++++++++++++++++++--
 1 file changed, 117 insertions(+), 6 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver_test.cc b/tensorstore/driver/zarr3/driver_test.cc
index 54f79ba79..e590b2866 100644
--- a/tensorstore/driver/zarr3/driver_test.cc
+++ b/tensorstore/driver/zarr3/driver_test.cc
@@ -2018,13 +2018,124 @@ TEST(Zarr3DriverTest, OpenAsVoidUrlNotSupported) {
   EXPECT_THAT(spec.ToUrl(), StatusIs(absl::StatusCode::kInvalidArgument));
 }
 
-// TODO(b/xxx): OpenAsVoidReadWrite test disabled pending implementation
-// of void access codec chain handling. Currently fails with "Not enough data"
-// error when reading void-accessed data.
+TEST(Zarr3DriverTest, OpenAsVoidReadWrite) {
+  // Test reading and writing through open_as_void
+  auto context = Context::Default();
 
-// TODO(b/xxx): OpenAsVoidWriteRoundtrip test disabled pending implementation
-// of void access codec chain handling. Currently fails with "Not enough data"
-// error when reading/writing void-accessed data.
+  // Create an array
+  ::nlohmann::json create_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"metadata",
+       {
+           {"data_type", "uint16"},
+           {"shape", {2, 2}},
+           {"chunk_grid",
+            {{"name", "regular"}, {"configuration", {{"chunk_shape", {2, 2}}}}}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::create,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Write data as normal uint16
+  auto data =
+      tensorstore::MakeArray<uint16_t>({{0x0102, 0x0304}, {0x0506, 0x0708}});
+  TENSORSTORE_EXPECT_OK(tensorstore::Write(data, store).result());
+
+  // Open as void and read
+  ::nlohmann::json void_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto void_store,
+      tensorstore::Open(void_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Read the raw bytes
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto bytes_read,
+                                   tensorstore::Read(void_store).result());
+
+  // Verify shape: [2, 2, 2] where last dim is 2 bytes per uint16
+  EXPECT_EQ(bytes_read.shape()[0], 2);
+  EXPECT_EQ(bytes_read.shape()[1], 2);
+  EXPECT_EQ(bytes_read.shape()[2], 2);
+
+  // Verify the raw bytes (little endian)
+  auto bytes_ptr = static_cast<const unsigned char*>(bytes_read.data());
+  // First element: 0x0102 -> bytes 0x02, 0x01 (little endian)
+  EXPECT_EQ(bytes_ptr[0], 0x02);
+  EXPECT_EQ(bytes_ptr[1], 0x01);
+}
+
+TEST(Zarr3DriverTest, OpenAsVoidWriteRoundtrip) {
+  // Test that writing through open_as_void correctly encodes data
+  // and can be read back both through void access and normal typed access.
+  auto context = Context::Default();
+
+  // Create an array and write initial data via typed access
+  ::nlohmann::json create_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"metadata",
+       {
+           {"data_type", "uint16"},
+           {"shape", {2, 2}},
+           {"chunk_grid",
+            {{"name", "regular"}, {"configuration", {{"chunk_shape", {2, 2}}}}}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::create,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Write initial data via typed access
+  auto data = tensorstore::MakeArray<uint16_t>({{0x1234, 0x5678},
+                                                 {0x9ABC, 0xDEF0}});
+  TENSORSTORE_EXPECT_OK(tensorstore::Write(data, store).result());
+
+  // Now read via void access and verify the byte layout
+  ::nlohmann::json void_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto void_store,
+      tensorstore::Open(void_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result());
+
+  // Read through void access
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto bytes_read,
+                                   tensorstore::Read(void_store).result());
+  auto bytes_read_ptr = static_cast<const unsigned char*>(bytes_read.data());
+
+  // Verify the raw bytes (little endian)
+  // Element [0,0] = 0x1234 -> bytes 0x34, 0x12
+  EXPECT_EQ(bytes_read_ptr[0], 0x34);
+  EXPECT_EQ(bytes_read_ptr[1], 0x12);
+  // Element [0,1] = 0x5678 -> bytes 0x78, 0x56
+  EXPECT_EQ(bytes_read_ptr[2], 0x78);
+  EXPECT_EQ(bytes_read_ptr[3], 0x56);
+  // Element [1,0] = 0x9ABC -> bytes 0xBC, 0x9A
+  EXPECT_EQ(bytes_read_ptr[4], 0xBC);
+  EXPECT_EQ(bytes_read_ptr[5], 0x9A);
+  // Element [1,1] = 0xDEF0 -> bytes 0xF0, 0xDE
+  EXPECT_EQ(bytes_read_ptr[6], 0xF0);
+  EXPECT_EQ(bytes_read_ptr[7], 0xDE);
+}
 
 TEST(Zarr3DriverTest, FieldSelectionUrlNotSupported) {
   // Test that field selection is not supported with URL syntax

From 7065b424f99b52965464a09b2442df92d8a0628b Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 15:41:30 +0000
Subject: [PATCH 27/59] zarr3: Add compression test for open_as_void

Verify that open_as_void works correctly when the array uses compression
codecs (gzip). The fix to DecodeChunk properly handles the bytes->bytes
codec chain when decoding for void access.
---
 tensorstore/driver/zarr3/driver_test.cc | 68 +++++++++++++++++++++++--
 1 file changed, 65 insertions(+), 3 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver_test.cc b/tensorstore/driver/zarr3/driver_test.cc
index e590b2866..13d0eeee3 100644
--- a/tensorstore/driver/zarr3/driver_test.cc
+++ b/tensorstore/driver/zarr3/driver_test.cc
@@ -1891,9 +1891,71 @@ TEST(Zarr3DriverTest, OpenAsVoidSimpleType) {
 // implementation needs additional work to properly handle structured types
 // with multiple fields when opened with open_as_void=true.
 
-// TODO(b/xxx): OpenAsVoidWithCompression test disabled pending implementation
-// of void access codec chain handling. Currently fails with "Not enough data"
-// error when reading void-accessed data through compression codecs.
+TEST(Zarr3DriverTest, OpenAsVoidWithCompression) {
+  // Test open_as_void with compression enabled
+  auto context = Context::Default();
+
+  // Create an array with gzip compression
+  ::nlohmann::json create_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"metadata",
+       {
+           {"data_type", "int32"},
+           {"shape", {4, 4}},
+           {"chunk_grid",
+            {{"name", "regular"}, {"configuration", {{"chunk_shape", {2, 2}}}}}},
+           {"codecs", {{{"name", "bytes"}}, {{"name", "gzip"}}}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::create,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Write some data
+  auto data = tensorstore::MakeArray<int32_t>(
+      {{0x01020304, 0x05060708}, {0x090a0b0c, 0x0d0e0f10}});
+  TENSORSTORE_EXPECT_OK(
+      tensorstore::Write(data, store | tensorstore::Dims(0, 1).SizedInterval(
+                                           {0, 0}, {2, 2}))
+          .result());
+
+  // Now open with open_as_void=true
+  ::nlohmann::json void_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto void_store,
+      tensorstore::Open(void_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result());
+
+  // The void store should have rank = original_rank + 1 (for bytes dimension)
+  EXPECT_EQ(3, void_store.rank());
+
+  // The last dimension should be 4 bytes for int32
+  EXPECT_EQ(4, void_store.domain().shape()[2]);
+
+  // The data type should be byte
+  EXPECT_EQ(tensorstore::dtype_v<tensorstore::dtypes::byte_t>,
+            void_store.dtype());
+
+  // Read the raw bytes and verify decompression works
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto read_result,
+      tensorstore::Read(void_store | tensorstore::Dims(0, 1).SizedInterval(
+                                         {0, 0}, {2, 2}))
+          .result());
+  EXPECT_EQ(read_result.shape()[0], 2);
+  EXPECT_EQ(read_result.shape()[1], 2);
+  EXPECT_EQ(read_result.shape()[2], 4);
+}
 
 TEST(Zarr3DriverTest, OpenAsVoidSpecRoundtrip) {
   // Test that open_as_void is properly preserved in spec round-trips

From b8daec0c4fa0c187cb76e1cd844a31096db3563e Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 16:26:24 +0000
Subject: [PATCH 28/59] zarr3: Add original_is_structured flag for void access

For void access, the codec handling differs between:
- Non-structured types: codec prepared for [chunk_shape] with original dtype
  Need to decode/encode then reinterpret bytes.
- Structured types: codec already prepared for [chunk_shape, bytes_per_elem]
  with byte dtype. Just decode/encode directly.

Add original_is_structured parameter to cache constructors to properly
distinguish these cases in DecodeChunk and EncodeChunk.

This follows the pattern from zarr v2 (PR #272) where CreateVoidMetadata()
creates a modified metadata for void access.
---
 tensorstore/driver/zarr3/chunk_cache.cc | 60 ++++++++++++++++---------
 tensorstore/driver/zarr3/chunk_cache.h  | 12 +++--
 tensorstore/driver/zarr3/driver.cc      |  9 +++-
 3 files changed, 56 insertions(+), 25 deletions(-)

diff --git a/tensorstore/driver/zarr3/chunk_cache.cc b/tensorstore/driver/zarr3/chunk_cache.cc
index e39852222..8f8acc384 100644
--- a/tensorstore/driver/zarr3/chunk_cache.cc
+++ b/tensorstore/driver/zarr3/chunk_cache.cc
@@ -76,11 +76,12 @@ ZarrChunkCache::~ZarrChunkCache() = default;
 ZarrLeafChunkCache::ZarrLeafChunkCache(
     kvstore::DriverPtr store, ZarrCodecChain::PreparedState::Ptr codec_state,
     ZarrDType dtype, internal::CachePool::WeakPtr /*data_cache_pool*/,
-    bool open_as_void)
+    bool open_as_void, bool original_is_structured)
     : Base(std::move(store)),
       codec_state_(std::move(codec_state)),
       dtype_(std::move(dtype)),
-      open_as_void_(open_as_void) {}
+      open_as_void_(open_as_void),
+      original_is_structured_(original_is_structured) {}
 
 void ZarrLeafChunkCache::Read(ZarrChunkCache::ReadRequest request,
                               AnyFlowReceiver<absl::Status, internal::ReadChunk,
@@ -158,15 +159,27 @@ ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
   const size_t num_fields = dtype_.fields.size();
   absl::InlinedVector<SharedArray<const void>, 1> field_arrays(num_fields);
 
-  // Special case: void access - decode using original codec shape, then
-  // reinterpret as bytes with extra dimension.
+  // Special case: void access - decode and return as bytes.
   //
-  // The codec was prepared for the original dtype and chunk_shape (without
-  // bytes dimension). We decode to that shape, then view the raw bytes with
-  // an extra dimension representing the bytes per element.
+  // For non-structured types: codec was prepared for [chunk_shape] with
+  // original dtype. We decode to that shape then reinterpret as bytes.
+  //
+  // For structured types: codec was already prepared for
+  // [chunk_shape, bytes_per_elem] with byte dtype. Just decode directly.
   if (open_as_void_) {
-    // The grid's chunk_shape for void has extra bytes dimension - strip it
-    // to get the original codec shape.
+    const auto& void_component_shape = grid().components[0].shape();
+
+    if (original_is_structured_) {
+      // Structured types: codec already expects bytes with extra dimension.
+      // Just decode directly to the void component shape.
+      TENSORSTORE_ASSIGN_OR_RETURN(
+          field_arrays[0],
+          codec_state_->DecodeArray(void_component_shape, std::move(data)));
+      return field_arrays;
+    }
+
+    // Non-structured types: codec expects original dtype without extra
+    // dimension. Decode, then reinterpret as bytes.
     const auto& void_chunk_shape = grid().chunk_shape;
     std::vector<Index> original_chunk_shape(
         void_chunk_shape.begin(),
@@ -178,8 +191,6 @@ ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
         codec_state_->DecodeArray(original_chunk_shape, std::move(data)));
 
     // Reinterpret the decoded array's bytes as [chunk_shape..., bytes_per_elem]
-    // This creates a view over the same memory but with byte dtype and extra dim
-    const auto& void_component_shape = grid().components[0].shape();
     auto byte_array = AllocateArray(
         void_component_shape, c_order, default_init,
         dtype_v<tensorstore::dtypes::byte_t>);
@@ -242,12 +253,20 @@ Result<absl::Cord> ZarrLeafChunkCache::EncodeChunk(
     span<const SharedArray<const void>> component_arrays) {
   assert(component_arrays.size() == 1);
 
-  // Special case: void access - reinterpret byte array back to original
-  // dtype shape before encoding.
+  // Special case: void access - encode bytes back to original format.
+  //
+  // For structured types: codec already expects bytes with extra dimension.
+  // Just encode directly.
   //
-  // The input has shape [chunk_shape..., bytes_per_elem] of byte_t.
-  // The codec expects [chunk_shape] of the original dtype.
+  // For non-structured types: reinterpret byte array as original dtype
+  // and shape before encoding.
   if (open_as_void_) {
+    if (original_is_structured_) {
+      // Structured types: codec already expects bytes with extra dimension.
+      return codec_state_->EncodeArray(component_arrays[0]);
+    }
+
+    // Non-structured types: reinterpret bytes as original dtype/shape.
     const auto& byte_array = component_arrays[0];
     const Index bytes_per_element = dtype_.bytes_per_outer_element;
 
@@ -256,7 +275,6 @@ Result<absl::Cord> ZarrLeafChunkCache::EncodeChunk(
     std::vector<Index> original_shape(void_shape.begin(), void_shape.end() - 1);
 
     // Create a view over the byte data with original layout
-    // The codec expects the original dtype's element size for stride calculation
     auto encoded_array = SharedArray<const void>(
         byte_array.element_pointer(),
         StridedLayout<>(c_order, bytes_per_element, original_shape));
@@ -274,12 +292,13 @@ kvstore::Driver* ZarrLeafChunkCache::GetKvStoreDriver() {
 ZarrShardedChunkCache::ZarrShardedChunkCache(
     kvstore::DriverPtr store, ZarrCodecChain::PreparedState::Ptr codec_state,
     ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool,
-    bool open_as_void)
+    bool open_as_void, bool original_is_structured)
     : base_kvstore_(std::move(store)),
       codec_state_(std::move(codec_state)),
       dtype_(std::move(dtype)),
-      data_cache_pool_(std::move(data_cache_pool)),
-      open_as_void_(open_as_void) {}
+      open_as_void_(open_as_void),
+      original_is_structured_(original_is_structured),
+      data_cache_pool_(std::move(data_cache_pool)) {}
 
 Result<IndexTransform<>> TranslateCellToSourceTransformForShard(
     IndexTransform<> transform, span<const Index> grid_cell_indices,
@@ -588,7 +607,8 @@ void ZarrShardedChunkCache::Entry::DoInitialize() {
                 *sharding_state.sub_chunk_codec_chain,
                 std::move(sharding_kvstore), cache.executor(),
                 ZarrShardingCodec::PreparedState::Ptr(&sharding_state),
-                cache.dtype_, cache.data_cache_pool_, cache.open_as_void_);
+                cache.dtype_, cache.data_cache_pool_, cache.open_as_void_,
+                cache.original_is_structured_);
         zarr_chunk_cache = new_cache.release();
         return std::unique_ptr<internal::Cache>(&zarr_chunk_cache->cache());
       })
diff --git a/tensorstore/driver/zarr3/chunk_cache.h b/tensorstore/driver/zarr3/chunk_cache.h
index f9ff19a00..34ffbf7d9 100644
--- a/tensorstore/driver/zarr3/chunk_cache.h
+++ b/tensorstore/driver/zarr3/chunk_cache.h
@@ -159,7 +159,8 @@ class ZarrLeafChunkCache : public internal::KvsBackedChunkCache,
                               ZarrCodecChain::PreparedState::Ptr codec_state,
                               ZarrDType dtype,
                               internal::CachePool::WeakPtr data_cache_pool,
-                              bool open_as_void);
+                              bool open_as_void,
+                              bool original_is_structured);
 
   void Read(ZarrChunkCache::ReadRequest request,
             AnyFlowReceiver<absl::Status, internal::ReadChunk,
@@ -188,6 +189,7 @@ class ZarrLeafChunkCache : public internal::KvsBackedChunkCache,
   ZarrCodecChain::PreparedState::Ptr codec_state_;
   ZarrDType dtype_;
   bool open_as_void_;
+  bool original_is_structured_;
 };
 
 /// Chunk cache for a Zarr array where each chunk is a shard.
@@ -199,7 +201,8 @@ class ZarrShardedChunkCache : public internal::Cache, public ZarrChunkCache {
                                  ZarrCodecChain::PreparedState::Ptr codec_state,
                                  ZarrDType dtype,
                                  internal::CachePool::WeakPtr data_cache_pool,
-                                 bool open_as_void);
+                                 bool open_as_void,
+                                 bool original_is_structured);
 
   const ZarrShardingCodec::PreparedState& sharding_codec_state() const {
     return static_cast<const ZarrShardingCodec::PreparedState&>(
@@ -250,6 +253,7 @@ class ZarrShardedChunkCache : public internal::Cache, public ZarrChunkCache {
   ZarrCodecChain::PreparedState::Ptr codec_state_;
   ZarrDType dtype_;
   bool open_as_void_;
+  bool original_is_structured_;
 
   // Data cache pool, if it differs from `this->pool()` (which is equal to the
   // metadata cache pool).
@@ -265,12 +269,12 @@ class ZarrShardSubChunkCache : public ChunkCacheImpl {
       kvstore::DriverPtr store, Executor executor,
       ZarrShardingCodec::PreparedState::Ptr sharding_state,
       ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool,
-      bool open_as_void)
+      bool open_as_void, bool original_is_structured)
       : ChunkCacheImpl(std::move(store),
                        ZarrCodecChain::PreparedState::Ptr(
                            sharding_state->sub_chunk_codec_state),
                        std::move(dtype), std::move(data_cache_pool),
-                       open_as_void),
+                       open_as_void, original_is_structured),
         sharding_state_(std::move(sharding_state)),
         executor_(std::move(executor)) {}
 
diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index b21eb9cd2..06945da15 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -913,11 +913,18 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
               /*.num_bytes=*/metadata.data_type.bytes_per_outer_element}},
           /*.bytes_per_outer_element=*/metadata.data_type.bytes_per_outer_element};
     }
+    // Determine if original dtype is structured (multiple fields or field with
+    // outer_shape). This affects how void access handles codec operations.
+    const bool original_is_structured =
+        metadata.data_type.fields.size() > 1 ||
+        (metadata.data_type.fields.size() == 1 &&
+         !metadata.data_type.fields[0].outer_shape.empty());
+
     return internal_zarr3::MakeZarrChunkCache<DataCacheBase, ZarrDataCache>(
         *metadata.codecs, std::move(initializer), spec().store.path,
         metadata.codec_state, dtype,
         /*data_cache_pool=*/*cache_pool(),
-        spec().open_as_void);
+        spec().open_as_void, original_is_structured);
   }
 
   Result<size_t> GetComponentIndex(const void* metadata_ptr,

From 5dab237c1955f597e22a001885c126c81ae07a01 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 16:26:29 +0000
Subject: [PATCH 29/59] zarr3: Mark structured type void access test as TODO

The structured type with void access requires additional work to handle
rank mismatch between spec transform (based on original shape) and void
access transform (which adds the bytes dimension). Mark as TODO for now.
---
 tensorstore/driver/zarr3/driver_test.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver_test.cc b/tensorstore/driver/zarr3/driver_test.cc
index 13d0eeee3..35bcfe505 100644
--- a/tensorstore/driver/zarr3/driver_test.cc
+++ b/tensorstore/driver/zarr3/driver_test.cc
@@ -1886,10 +1886,10 @@ TEST(Zarr3DriverTest, OpenAsVoidSimpleType) {
             void_store.dtype());
 }
 
-// TODO(b/xxx): OpenAsVoidStructuredType test disabled pending implementation
-// of multi-field structured type handling in open_as_void mode. The v3
-// implementation needs additional work to properly handle structured types
-// with multiple fields when opened with open_as_void=true.
+// TODO(b/xxx): OpenAsVoidStructuredType test disabled pending additional work
+// to handle rank mismatch between spec transform and void access transform.
+// The void access adds an extra dimension for bytes_per_outer_element, but the
+// spec's transform is based on the original array shape without this dimension.
 
 TEST(Zarr3DriverTest, OpenAsVoidWithCompression) {
   // Test open_as_void with compression enabled

From 6a05640b0a2118dc4cf16418c4e66c0a43590868 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 16:43:11 +0000
Subject: [PATCH 30/59] zarr3: Fix GetDomain rank handling for void access

For void access, the domain needs to include an extra dimension for
bytes_per_outer_element. This requires:

1. Deferring rank setting in the JSON binder until after open_as_void
   is known, then adding 1 to the rank for void access.

2. Building the domain directly in GetDomain() when open_as_void=true
   and the metadata constraints include dtype and shape, adding the
   extra bytes dimension.

This enables void access to work correctly with simple (non-structured)
types when creating arrays.
---
 tensorstore/driver/zarr3/driver.cc | 54 ++++++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index 06945da15..e43e1a178 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -50,6 +50,7 @@
 #include "tensorstore/index_interval.h"
 #include "tensorstore/index_space/dimension_units.h"
 #include "tensorstore/index_space/index_domain.h"
+#include "tensorstore/index_space/index_domain_builder.h"
 #include "tensorstore/index_space/index_transform.h"
 #include "tensorstore/index_space/index_transform_builder.h"
 #include "tensorstore/internal/async_write_array.h"
@@ -140,8 +141,7 @@ class ZarrDriverSpec
                     // at metadata level only.
                   }
                 }
-                TENSORSTORE_RETURN_IF_ERROR(obj->schema.Set(
-                    RankConstraint{obj->metadata_constraints.rank}));
+                // Note: rank is set in Initialize after open_as_void is known.
                 return absl::OkStatus();
               },
               jb::Projection<&ZarrDriverSpec::metadata_constraints>(
@@ -158,6 +158,15 @@ class ZarrDriverSpec
           return absl::InvalidArgumentError(
               "\"field\" and \"open_as_void\" are mutually exclusive");
         }
+        // Set the rank from metadata constraints, adding 1 for void access
+        // (which has an extra bytes dimension).
+        if (obj->metadata_constraints.rank != dynamic_rank) {
+          DimensionIndex rank = obj->metadata_constraints.rank;
+          if (obj->open_as_void) {
+            rank += 1;
+          }
+          TENSORSTORE_RETURN_IF_ERROR(obj->schema.Set(RankConstraint{rank}));
+        }
         return absl::OkStatus();
       }));
 
@@ -169,6 +178,47 @@ class ZarrDriverSpec
   }
 
   Result<IndexDomain<>> GetDomain() const override {
+    // For void access with known dtype and shape, build domain directly
+    // to include the extra bytes dimension.
+    if (open_as_void && metadata_constraints.data_type &&
+        metadata_constraints.shape) {
+      const Index bytes_per_elem =
+          metadata_constraints.data_type->bytes_per_outer_element;
+      const DimensionIndex original_rank = metadata_constraints.shape->size();
+      IndexDomainBuilder builder(original_rank + 1);
+
+      // Set original dimensions from metadata
+      for (DimensionIndex i = 0; i < original_rank; ++i) {
+        builder.origin()[i] = 0;
+        builder.shape()[i] = (*metadata_constraints.shape)[i];
+      }
+
+      // Add bytes dimension
+      builder.origin()[original_rank] = 0;
+      builder.shape()[original_rank] = bytes_per_elem;
+
+      // Set implicit bounds: array dims are implicit, bytes dim is explicit
+      DimensionSet implicit_lower(false);
+      DimensionSet implicit_upper(false);
+      for (DimensionIndex i = 0; i < original_rank; ++i) {
+        implicit_upper[i] = true;  // Array dimensions are resizable
+      }
+      builder.implicit_lower_bounds(implicit_lower);
+      builder.implicit_upper_bounds(implicit_upper);
+
+      // Copy dimension names if available
+      if (metadata_constraints.dimension_names) {
+        for (DimensionIndex i = 0; i < original_rank; ++i) {
+          if (const auto& name = (*metadata_constraints.dimension_names)[i];
+              name.has_value()) {
+            builder.labels()[i] = *name;
+          }
+        }
+      }
+
+      return builder.Finalize();
+    }
+
     return GetEffectiveDomain(metadata_constraints, schema);
   }
 

From 76b30023cc940f71f7f73a5319cf191e655796e4 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 16:43:18 +0000
Subject: [PATCH 31/59] zarr3: Mark structured type void access test as TODO

The structured type void access requires additional work in GetNewMetadata
to properly handle the extra bytes dimension. The current implementation
doesn't correctly propagate the void rank through all the metadata
validation and domain building code paths.

For now, disable this test and leave as TODO for future work.
---
 tensorstore/driver/zarr3/driver_test.cc | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver_test.cc b/tensorstore/driver/zarr3/driver_test.cc
index 35bcfe505..d08660b12 100644
--- a/tensorstore/driver/zarr3/driver_test.cc
+++ b/tensorstore/driver/zarr3/driver_test.cc
@@ -1886,10 +1886,11 @@ TEST(Zarr3DriverTest, OpenAsVoidSimpleType) {
             void_store.dtype());
 }
 
-// TODO(b/xxx): OpenAsVoidStructuredType test disabled pending additional work
-// to handle rank mismatch between spec transform and void access transform.
-// The void access adds an extra dimension for bytes_per_outer_element, but the
-// spec's transform is based on the original array shape without this dimension.
+// TODO(b/xxx): OpenAsVoidStructuredType test disabled pending implementation
+// of proper rank handling in GetNewMetadata for void access with structured
+// types. The current implementation doesn't correctly handle the extra bytes
+// dimension when creating new arrays with open_as_void=true and structured
+// dtypes.
 
 TEST(Zarr3DriverTest, OpenAsVoidWithCompression) {
   // Test open_as_void with compression enabled

From 97944508605f98098c5bcfbdfc1f34ca73127175 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 16:53:54 +0000
Subject: [PATCH 32/59] zarr3: Improve comments for void access rank handling

Update comments in the JSON binder initialization to better explain
the void field's field_shape and how it affects the schema rank.

Also update the TODO for the structured type void access test to
more accurately describe the remaining work needed:
- GetNewMetadata needs to handle field_shape dimensions
- SetChunkLayoutFromMetadata needs dimension mismatch handling
---
 tensorstore/driver/zarr3/driver.cc      | 7 ++++---
 tensorstore/driver/zarr3/driver_test.cc | 8 +++++---
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index e43e1a178..a9844d5ad 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -158,12 +158,13 @@ class ZarrDriverSpec
           return absl::InvalidArgumentError(
               "\"field\" and \"open_as_void\" are mutually exclusive");
         }
-        // Set the rank from metadata constraints, adding 1 for void access
-        // (which has an extra bytes dimension).
+        // Set the schema rank from metadata constraints.
+        // For void access, add 1 for the bytes dimension (from the void field's
+        // field_shape = {bytes_per_outer_element}).
         if (obj->metadata_constraints.rank != dynamic_rank) {
           DimensionIndex rank = obj->metadata_constraints.rank;
           if (obj->open_as_void) {
-            rank += 1;
+            rank += 1;  // Add bytes dimension
           }
           TENSORSTORE_RETURN_IF_ERROR(obj->schema.Set(RankConstraint{rank}));
         }
diff --git a/tensorstore/driver/zarr3/driver_test.cc b/tensorstore/driver/zarr3/driver_test.cc
index d08660b12..c27b9a7ca 100644
--- a/tensorstore/driver/zarr3/driver_test.cc
+++ b/tensorstore/driver/zarr3/driver_test.cc
@@ -1888,9 +1888,11 @@ TEST(Zarr3DriverTest, OpenAsVoidSimpleType) {
 
 // TODO(b/xxx): OpenAsVoidStructuredType test disabled pending implementation
 // of proper rank handling in GetNewMetadata for void access with structured
-// types. The current implementation doesn't correctly handle the extra bytes
-// dimension when creating new arrays with open_as_void=true and structured
-// dtypes.
+// types. Creating new arrays with open_as_void=true and structured dtypes
+// requires adding field_shape dimensions to chunked_rank and updating
+// SetChunkLayoutFromMetadata to handle the dimension mismatch between
+// metadata shape and full rank. This is a more extensive change that will
+// be addressed separately.
 
 TEST(Zarr3DriverTest, OpenAsVoidWithCompression) {
   // Test open_as_void with compression enabled

From a0271956f459f3f4a9b73f27d2fdfadd11ad0edb Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 17:18:01 +0000
Subject: [PATCH 33/59] zarr3: Fix void access EncodeChunk to use original
 dtype

When encoding data through void access, the codec expects the original
dtype (e.g., int32), not the synthesized void dtype (byte_t). This fix:

1. Adds original_dtype_ member to ZarrLeafChunkCache and
   ZarrShardedChunkCache to store the original dtype from metadata.

2. Updates EncodeChunk to use original_dtype_ when creating the
   SharedArray for encoding, ensuring the codec receives data in
   the correct format.

3. Passes original_dtype through MakeZarrChunkCache and
   ZarrShardSubChunkCache constructors.

This fixes writing through void access, both with and without
compression.
---
 tensorstore/driver/zarr3/chunk_cache.cc | 28 ++++++++++++++++++-------
 tensorstore/driver/zarr3/chunk_cache.h  | 12 +++++++----
 tensorstore/driver/zarr3/driver.cc      |  8 ++++++-
 3 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/tensorstore/driver/zarr3/chunk_cache.cc b/tensorstore/driver/zarr3/chunk_cache.cc
index 8f8acc384..f2a61f5c8 100644
--- a/tensorstore/driver/zarr3/chunk_cache.cc
+++ b/tensorstore/driver/zarr3/chunk_cache.cc
@@ -76,12 +76,13 @@ ZarrChunkCache::~ZarrChunkCache() = default;
 ZarrLeafChunkCache::ZarrLeafChunkCache(
     kvstore::DriverPtr store, ZarrCodecChain::PreparedState::Ptr codec_state,
     ZarrDType dtype, internal::CachePool::WeakPtr /*data_cache_pool*/,
-    bool open_as_void, bool original_is_structured)
+    bool open_as_void, bool original_is_structured, DataType original_dtype)
     : Base(std::move(store)),
       codec_state_(std::move(codec_state)),
       dtype_(std::move(dtype)),
       open_as_void_(open_as_void),
-      original_is_structured_(original_is_structured) {}
+      original_is_structured_(original_is_structured),
+      original_dtype_(original_dtype) {}
 
 void ZarrLeafChunkCache::Read(ZarrChunkCache::ReadRequest request,
                               AnyFlowReceiver<absl::Status, internal::ReadChunk,
@@ -274,10 +275,20 @@ Result<absl::Cord> ZarrLeafChunkCache::EncodeChunk(
     const auto& void_shape = byte_array.shape();
     std::vector<Index> original_shape(void_shape.begin(), void_shape.end() - 1);
 
-    // Create a view over the byte data with original layout
-    auto encoded_array = SharedArray<const void>(
-        byte_array.element_pointer(),
-        StridedLayout<>(c_order, bytes_per_element, original_shape));
+    // Use the original dtype (stored during cache creation) for encoding.
+    // This is the dtype the codec was prepared for, not the void dtype.
+
+    // Create a view over the byte data with original dtype and layout.
+    // Use the aliasing constructor to share ownership with byte_array but
+    // interpret the data with the original dtype.
+    SharedArray<const void> encoded_array;
+    auto aliased_ptr = std::shared_ptr<const void>(
+        byte_array.pointer(),  // Share ownership with byte_array
+        byte_array.data());    // But point to the raw data
+    encoded_array.element_pointer() = SharedElementPointer<const void>(
+        std::move(aliased_ptr), original_dtype_);
+    encoded_array.layout() = StridedLayout<>(c_order, bytes_per_element,
+                                             original_shape);
 
     return codec_state_->EncodeArray(encoded_array);
   }
@@ -292,12 +303,13 @@ kvstore::Driver* ZarrLeafChunkCache::GetKvStoreDriver() {
 ZarrShardedChunkCache::ZarrShardedChunkCache(
     kvstore::DriverPtr store, ZarrCodecChain::PreparedState::Ptr codec_state,
     ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool,
-    bool open_as_void, bool original_is_structured)
+    bool open_as_void, bool original_is_structured, DataType original_dtype)
     : base_kvstore_(std::move(store)),
       codec_state_(std::move(codec_state)),
       dtype_(std::move(dtype)),
       open_as_void_(open_as_void),
       original_is_structured_(original_is_structured),
+      original_dtype_(original_dtype),
       data_cache_pool_(std::move(data_cache_pool)) {}
 
 Result<IndexTransform<>> TranslateCellToSourceTransformForShard(
@@ -608,7 +620,7 @@ void ZarrShardedChunkCache::Entry::DoInitialize() {
                 std::move(sharding_kvstore), cache.executor(),
                 ZarrShardingCodec::PreparedState::Ptr(&sharding_state),
                 cache.dtype_, cache.data_cache_pool_, cache.open_as_void_,
-                cache.original_is_structured_);
+                cache.original_is_structured_, cache.original_dtype_);
         zarr_chunk_cache = new_cache.release();
         return std::unique_ptr<internal::Cache>(&zarr_chunk_cache->cache());
       })
diff --git a/tensorstore/driver/zarr3/chunk_cache.h b/tensorstore/driver/zarr3/chunk_cache.h
index 34ffbf7d9..58b1d4c68 100644
--- a/tensorstore/driver/zarr3/chunk_cache.h
+++ b/tensorstore/driver/zarr3/chunk_cache.h
@@ -160,7 +160,8 @@ class ZarrLeafChunkCache : public internal::KvsBackedChunkCache,
                               ZarrDType dtype,
                               internal::CachePool::WeakPtr data_cache_pool,
                               bool open_as_void,
-                              bool original_is_structured);
+                              bool original_is_structured,
+                              DataType original_dtype);
 
   void Read(ZarrChunkCache::ReadRequest request,
             AnyFlowReceiver<absl::Status, internal::ReadChunk,
@@ -190,6 +191,7 @@ class ZarrLeafChunkCache : public internal::KvsBackedChunkCache,
   ZarrDType dtype_;
   bool open_as_void_;
   bool original_is_structured_;
+  DataType original_dtype_;  // Original dtype for void access encoding
 };
 
 /// Chunk cache for a Zarr array where each chunk is a shard.
@@ -202,7 +204,8 @@ class ZarrShardedChunkCache : public internal::Cache, public ZarrChunkCache {
                                  ZarrDType dtype,
                                  internal::CachePool::WeakPtr data_cache_pool,
                                  bool open_as_void,
-                                 bool original_is_structured);
+                                 bool original_is_structured,
+                                 DataType original_dtype);
 
   const ZarrShardingCodec::PreparedState& sharding_codec_state() const {
     return static_cast<const ZarrShardingCodec::PreparedState&>(
@@ -254,6 +257,7 @@ class ZarrShardedChunkCache : public internal::Cache, public ZarrChunkCache {
   ZarrDType dtype_;
   bool open_as_void_;
   bool original_is_structured_;
+  DataType original_dtype_;  // Original dtype for void access encoding
 
   // Data cache pool, if it differs from `this->pool()` (which is equal to the
   // metadata cache pool).
@@ -269,12 +273,12 @@ class ZarrShardSubChunkCache : public ChunkCacheImpl {
       kvstore::DriverPtr store, Executor executor,
       ZarrShardingCodec::PreparedState::Ptr sharding_state,
       ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool,
-      bool open_as_void, bool original_is_structured)
+      bool open_as_void, bool original_is_structured, DataType original_dtype)
       : ChunkCacheImpl(std::move(store),
                        ZarrCodecChain::PreparedState::Ptr(
                            sharding_state->sub_chunk_codec_state),
                        std::move(dtype), std::move(data_cache_pool),
-                       open_as_void, original_is_structured),
+                       open_as_void, original_is_structured, original_dtype),
         sharding_state_(std::move(sharding_state)),
         executor_(std::move(executor)) {}
 
diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index a9844d5ad..8b6a355eb 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -971,11 +971,17 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
         (metadata.data_type.fields.size() == 1 &&
          !metadata.data_type.fields[0].outer_shape.empty());
 
+    // Get the original dtype for void access encoding (needed by EncodeChunk).
+    // For non-structured types, this is the single field's dtype.
+    DataType original_dtype = metadata.data_type.fields.size() > 0
+                                  ? metadata.data_type.fields[0].dtype
+                                  : DataType{};
+
     return internal_zarr3::MakeZarrChunkCache<DataCacheBase, ZarrDataCache>(
         *metadata.codecs, std::move(initializer), spec().store.path,
         metadata.codec_state, dtype,
         /*data_cache_pool=*/*cache_pool(),
-        spec().open_as_void, original_is_structured);
+        spec().open_as_void, original_is_structured, original_dtype);
   }
 
   Result<size_t> GetComponentIndex(const void* metadata_ptr,

From 83a519950e1351f4b6ef61e57809c1dff0d94ac2 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 17:18:08 +0000
Subject: [PATCH 34/59] zarr3: Add OpenAsVoidWriteWithCompression test

Add test to verify that writing through void access with compression
enabled works correctly. The test:

1. Creates an array with gzip compression
2. Initializes with zeros via typed access
3. Opens as void and writes raw bytes
4. Reads back through void access to verify the write
5. Reads back through typed access to verify byte interpretation

This test exercises the EncodeChunk path for void access with the
codec chain including compression.
---
 tensorstore/driver/zarr3/driver_test.cc | 95 +++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

diff --git a/tensorstore/driver/zarr3/driver_test.cc b/tensorstore/driver/zarr3/driver_test.cc
index c27b9a7ca..c01237626 100644
--- a/tensorstore/driver/zarr3/driver_test.cc
+++ b/tensorstore/driver/zarr3/driver_test.cc
@@ -2202,6 +2202,101 @@ TEST(Zarr3DriverTest, OpenAsVoidWriteRoundtrip) {
   EXPECT_EQ(bytes_read_ptr[7], 0xDE);
 }
 
+TEST(Zarr3DriverTest, OpenAsVoidWriteWithCompression) {
+  // Test writing through open_as_void with compression enabled.
+  // Verifies that the EncodeChunk method correctly compresses data.
+  auto context = Context::Default();
+
+  // Create an array with gzip compression
+  ::nlohmann::json create_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"metadata",
+       {
+           {"data_type", "int32"},
+           {"shape", {4, 4}},
+           {"chunk_grid",
+            {{"name", "regular"}, {"configuration", {{"chunk_shape", {4, 4}}}}}},
+           {"codecs",
+            ::nlohmann::json::array(
+                {{{"name", "bytes"}, {"configuration", {{"endian", "little"}}}},
+                 {{"name", "gzip"}, {"configuration", {{"level", 5}}}}})},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::create,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Initialize with zeros
+  auto zeros = tensorstore::MakeArray<int32_t>(
+      {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}});
+  TENSORSTORE_EXPECT_OK(tensorstore::Write(zeros, store).result());
+
+  // Open as void for writing
+  ::nlohmann::json void_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto void_store,
+      tensorstore::Open(void_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Verify the void store has the expected shape: [4, 4, 4] (4x4 ints, 4 bytes each)
+  EXPECT_EQ(3, void_store.rank());
+  EXPECT_EQ(4, void_store.domain().shape()[0]);
+  EXPECT_EQ(4, void_store.domain().shape()[1]);
+  EXPECT_EQ(4, void_store.domain().shape()[2]);
+
+  // Create raw bytes representing int32 values in little endian
+  // Using a simple pattern: 0x01020304 at position [0,0]
+  auto raw_bytes = tensorstore::AllocateArray<tensorstore::dtypes::byte_t>(
+      {4, 4, 4}, tensorstore::c_order, tensorstore::value_init);
+
+  // Set first element to 0x01020304 (little endian: 04 03 02 01)
+  auto raw_bytes_ptr = static_cast<unsigned char*>(
+      const_cast<void*>(static_cast<const void*>(raw_bytes.data())));
+  raw_bytes_ptr[0] = 0x04;
+  raw_bytes_ptr[1] = 0x03;
+  raw_bytes_ptr[2] = 0x02;
+  raw_bytes_ptr[3] = 0x01;
+
+  // Write raw bytes through void access (triggers compression)
+  TENSORSTORE_EXPECT_OK(tensorstore::Write(raw_bytes, void_store).result());
+
+  // Verify the write worked by reading back through void access first
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto void_read,
+                                   tensorstore::Read(void_store).result());
+  auto void_read_ptr = static_cast<const unsigned char*>(void_read.data());
+  // First 4 bytes should be our pattern
+  EXPECT_EQ(void_read_ptr[0], 0x04);
+  EXPECT_EQ(void_read_ptr[1], 0x03);
+  EXPECT_EQ(void_read_ptr[2], 0x02);
+  EXPECT_EQ(void_read_ptr[3], 0x01);
+
+  // Read back through normal typed access
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto typed_store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result());
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto typed_read,
+                                   tensorstore::Read(typed_store).result());
+  auto typed_ptr = static_cast<const int32_t*>(typed_read.data());
+
+  // First element should be 0x01020304
+  EXPECT_EQ(typed_ptr[0], 0x01020304);
+  // Rest should be zeros
+  EXPECT_EQ(typed_ptr[1], 0);
+}
+
 TEST(Zarr3DriverTest, FieldSelectionUrlNotSupported) {
   // Test that field selection is not supported with URL syntax
   ::nlohmann::json json_spec{

From 0d307c8c2ac5f12bdf2d5e38bff12e6015fa13df Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 17:41:42 +0000
Subject: [PATCH 35/59] zarr3: Add GetSpecInfo rank tests for void access

Add tests to verify that GetSpecInfo correctly computes rank when
open_as_void=true (mirroring v2 test patterns):

- GetSpecInfoOpenAsVoidWithKnownRank: Verifies full_rank = chunked_rank + 1
- GetSpecInfoOpenAsVoidWithDynamicRank: Verifies dynamic rank handling
- GetSpecInfoOpenAsVoidWithoutDtype: Verifies behavior without dtype
- GetSpecInfoOpenAsVoidRankConsistency: Verifies spec rank matches opened store

Also adds TODO for OpenAsVoidFillValue test - fill_value handling for
void access requires additional implementation (similar to v2's
CreateVoidMetadata which converts fill_value to byte array).
---
 tensorstore/driver/zarr3/driver_test.cc | 150 ++++++++++++++++++++++++
 1 file changed, 150 insertions(+)

diff --git a/tensorstore/driver/zarr3/driver_test.cc b/tensorstore/driver/zarr3/driver_test.cc
index c01237626..7d7307c2c 100644
--- a/tensorstore/driver/zarr3/driver_test.cc
+++ b/tensorstore/driver/zarr3/driver_test.cc
@@ -2324,4 +2324,154 @@ TEST(Zarr3DriverTest, FieldSelectionUrlNotSupported) {
                                      HasSubstr("selected_field")));
 }
 
+// Tests for GetSpecInfo() with open_as_void (mirroring v2 tests)
+
+TEST(Zarr3DriverTest, GetSpecInfoOpenAsVoidWithKnownRank) {
+  // Test that GetSpecInfo correctly computes rank when open_as_void=true
+  // and dtype is specified with known chunked_rank.
+  // Expected: full_rank = chunked_rank + 1 (for bytes dimension)
+  ::nlohmann::json json_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+      {"metadata",
+       {
+           {"data_type", "int32"},  // 4-byte integer
+           {"shape", {10, 20}},     // 2D array, so chunked_rank=2
+           {"chunk_grid",
+            {{"name", "regular"},
+             {"configuration", {{"chunk_shape", {5, 10}}}}}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto spec,
+                                   tensorstore::Spec::FromJson(json_spec));
+
+  // With open_as_void and dtype specified, rank should be chunked_rank + 1
+  // chunked_rank = 2 (from shape), so full_rank = 3
+  EXPECT_EQ(3, spec.rank());
+}
+
+TEST(Zarr3DriverTest, GetSpecInfoOpenAsVoidWithDynamicRank) {
+  // Test GetSpecInfo when open_as_void=true with dtype but no shape/chunks
+  // (i.e., chunked_rank is dynamic). In this case, full_rank should remain
+  // dynamic until metadata is loaded.
+  ::nlohmann::json json_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+      {"metadata",
+       {
+           {"data_type", "int16"},
+           // No shape or chunks specified, so chunked_rank is dynamic
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto spec,
+                                   tensorstore::Spec::FromJson(json_spec));
+
+  // When chunked_rank is dynamic, full_rank remains dynamic
+  EXPECT_EQ(tensorstore::dynamic_rank, spec.rank());
+}
+
+TEST(Zarr3DriverTest, GetSpecInfoOpenAsVoidWithoutDtype) {
+  // Test that when open_as_void=true but dtype is not specified,
+  // GetSpecInfo falls through to normal GetSpecRankAndFieldInfo behavior.
+  ::nlohmann::json json_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+      // No metadata.data_type specified
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto spec,
+                                   tensorstore::Spec::FromJson(json_spec));
+
+  // Without dtype, rank should be dynamic (normal behavior)
+  EXPECT_EQ(tensorstore::dynamic_rank, spec.rank());
+}
+
+TEST(Zarr3DriverTest, GetSpecInfoOpenAsVoidRankConsistency) {
+  // Verify that the rank computed by GetSpecInfo matches what we get when
+  // actually opening the store.
+  auto context = Context::Default();
+
+  // First create a normal array
+  ::nlohmann::json create_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"metadata",
+       {
+           {"data_type", "float32"},  // 4-byte float
+           {"shape", {3, 4, 5}},      // 3D array
+           {"chunk_grid",
+            {{"name", "regular"},
+             {"configuration", {{"chunk_shape", {3, 4, 5}}}}}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::create,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Open the store with open_as_void - don't specify metadata so it's read
+  // from the existing store
+  ::nlohmann::json void_spec_json{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto void_store,
+      tensorstore::Open(void_spec_json, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result());
+
+  // Opened store rank should be chunked_rank + 1 = 3 + 1 = 4
+  EXPECT_EQ(4, void_store.rank());
+
+  // Verify bytes dimension size - the domain is valid on an opened store
+  auto store_domain = void_store.domain();
+  EXPECT_TRUE(store_domain.valid());
+  EXPECT_EQ(4, store_domain.shape()[3]);  // 4 bytes for float32
+
+  // Now test the spec parsing with known metadata also sets rank correctly
+  ::nlohmann::json void_spec_with_metadata{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix2/"}}},
+      {"open_as_void", true},
+      {"metadata",
+       {
+           {"data_type", "float32"},
+           {"shape", {3, 4, 5}},
+           {"chunk_grid",
+            {{"name", "regular"},
+             {"configuration", {{"chunk_shape", {3, 4, 5}}}}}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto void_spec, tensorstore::Spec::FromJson(void_spec_with_metadata));
+
+  // Spec rank should be 4 (3D chunked + 1 bytes dimension)
+  // This verifies GetSpecInfo computes full_rank = chunked_rank + 1
+  EXPECT_EQ(4, void_spec.rank());
+}
+
+// TODO(fill_value): OpenAsVoidFillValue test disabled pending implementation
+// of proper fill_value handling for void access. The v2 implementation converts
+// the fill_value to a byte array representation via CreateVoidMetadata().
+// The v3 implementation needs similar functionality to properly expose the
+// fill_value as raw bytes when using open_as_void.
+//
+// TEST(Zarr3DriverTest, OpenAsVoidFillValue) {
+//   // Test that fill_value is correctly obtained from metadata when using
+//   // open_as_void. The void access should get the fill_value representing
+//   // the raw bytes of the original fill_value.
+//   ...
+// }
+
 }  // namespace

From 5819c8a401a21a47161654ad7f4f3b5cbecb5b79 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 17:55:00 +0000
Subject: [PATCH 36/59] zarr3: Add fill_value handling for void access

Implement proper fill_value conversion for void access mode:

1. Add is_void_access() virtual method to DataCacheBase to expose
   whether the cache was opened with open_as_void=true.

2. Modify ZarrDriver::GetFillValue to convert fill_value to byte
   array representation when in void access mode. This copies bytes
   from each field's fill_value at their respective offsets, similar
   to v2's CreateVoidMetadata handling.

3. Add OpenAsVoidFillValue test to verify that:
   - Normal store returns the expected scalar fill_value
   - Void store returns fill_value as byte array with correct shape
   - Byte representation matches the original value (little endian)
---
 tensorstore/driver/zarr3/driver.cc      | 33 ++++++++++++
 tensorstore/driver/zarr3/driver_test.cc | 69 ++++++++++++++++++++-----
 2 files changed, 90 insertions(+), 12 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index 8b6a355eb..6a9315b5c 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -18,6 +18,7 @@
 
 #include <algorithm>
 #include <cassert>
+#include <cstring>
 #include <memory>
 #include <numeric>
 #include <string>
@@ -380,6 +381,9 @@ class DataCacheBase
 
   virtual ZarrChunkCache& zarr_chunk_cache() = 0;
 
+  /// Returns true if this cache was opened with open_as_void=true.
+  virtual bool is_void_access() const = 0;
+
   absl::Status ValidateMetadataCompatibility(
       const void* existing_metadata_ptr,
       const void* new_metadata_ptr) override {
@@ -718,6 +722,8 @@ class ZarrDataCache : public ChunkCacheImpl, public DataCacheBase {
 
   ZarrChunkCache& zarr_chunk_cache() final { return *this; }
 
+  bool is_void_access() const final { return ChunkCacheImpl::open_as_void_; }
+
   const internal::ChunkGridSpecification& grid() const override {
     return grid_;
   }
@@ -812,6 +818,33 @@ class ZarrDriver : public ZarrDriverBase {
     if (metadata.fill_value.empty()) {
       return SharedArray<const void>();
     }
+
+    // For void access, convert fill_value to byte array representation.
+    // This is similar to v2's CreateVoidMetadata fill_value handling.
+    // In zarr3, endianness is handled by the codec chain, so we just copy
+    // the raw bytes from each field's fill_value.
+    if (static_cast<DataCacheBase*>(cache())->is_void_access()) {
+      const Index nbytes = metadata.data_type.bytes_per_outer_element;
+      auto byte_fill = AllocateArray<std::byte>({nbytes}, c_order, value_init);
+
+      // Copy bytes from each field's fill_value at their respective offsets
+      for (size_t field_i = 0; field_i < metadata.data_type.fields.size();
+           ++field_i) {
+        const auto& field = metadata.data_type.fields[field_i];
+        if (field_i >= metadata.fill_value.size() ||
+            !metadata.fill_value[field_i].valid()) {
+          continue;
+        }
+        const auto& fill_value = metadata.fill_value[field_i];
+        // Copy the raw bytes from the fill_value to the byte array
+        std::memcpy(byte_fill.data() + field.byte_offset,
+                    fill_value.data(),
+                    field.num_bytes);
+      }
+
+      return byte_fill;
+    }
+
     size_t index = this->component_index();
     if (index >= metadata.fill_value.size()) {
         return absl::OutOfRangeError("Component index out of bounds");
diff --git a/tensorstore/driver/zarr3/driver_test.cc b/tensorstore/driver/zarr3/driver_test.cc
index 7d7307c2c..d592b8924 100644
--- a/tensorstore/driver/zarr3/driver_test.cc
+++ b/tensorstore/driver/zarr3/driver_test.cc
@@ -2461,17 +2461,62 @@ TEST(Zarr3DriverTest, GetSpecInfoOpenAsVoidRankConsistency) {
   EXPECT_EQ(4, void_spec.rank());
 }
 
-// TODO(fill_value): OpenAsVoidFillValue test disabled pending implementation
-// of proper fill_value handling for void access. The v2 implementation converts
-// the fill_value to a byte array representation via CreateVoidMetadata().
-// The v3 implementation needs similar functionality to properly expose the
-// fill_value as raw bytes when using open_as_void.
-//
-// TEST(Zarr3DriverTest, OpenAsVoidFillValue) {
-//   // Test that fill_value is correctly obtained from metadata when using
-//   // open_as_void. The void access should get the fill_value representing
-//   // the raw bytes of the original fill_value.
-//   ...
-// }
+TEST(Zarr3DriverTest, OpenAsVoidFillValue) {
+  // Test that fill_value is correctly obtained from metadata when using
+  // open_as_void. The void access should get the fill_value representing
+  // the raw bytes of the original fill_value.
+  auto context = Context::Default();
+
+  // Create an array with an explicit fill_value
+  ::nlohmann::json create_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"metadata",
+       {
+           {"data_type", "int16"},
+           {"shape", {4, 4}},
+           {"chunk_grid",
+            {{"name", "regular"}, {"configuration", {{"chunk_shape", {2, 2}}}}}},
+           {"fill_value", 0x1234},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::create,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Verify the normal store has the expected fill_value
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto normal_fill, store.fill_value());
+  EXPECT_TRUE(normal_fill.valid());
+  EXPECT_EQ(tensorstore::MakeScalarArray<int16_t>(0x1234), normal_fill);
+
+  // Open with open_as_void=true
+  ::nlohmann::json void_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto void_store,
+      tensorstore::Open(void_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result());
+
+  // Verify void store has a valid fill_value derived from the original
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto void_fill, void_store.fill_value());
+  EXPECT_TRUE(void_fill.valid());
+
+  // The void fill_value should have shape {2} (2 bytes for int16)
+  EXPECT_EQ(1, void_fill.rank());
+  EXPECT_EQ(2, void_fill.shape()[0]);
+
+  // The fill_value bytes should represent 0x1234 in little endian: 0x34, 0x12
+  auto fill_bytes = static_cast<const unsigned char*>(void_fill.data());
+  EXPECT_EQ(0x34, fill_bytes[0]);
+  EXPECT_EQ(0x12, fill_bytes[1]);
+}
 
 }  // namespace

From 53ced58d76215dceb0b9ee4a1548aab3dd33f264 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 18:07:21 +0000
Subject: [PATCH 37/59] zarr3: Add structured type support for void access

Fix EncodeChunk to properly handle structured types:

1. For single non-structured field: encode directly (existing behavior)

2. For structured types (multiple fields): combine field arrays into
   a single byte array by copying each field's data at their respective
   byte offsets, then encode the combined byte array.

This matches the pattern in DecodeChunk which extracts fields from a
decoded byte array.

Add OpenAsVoidStructuredType test that:
- Creates an array with structured dtype (uint8 + int16 fields)
- Writes data using field access
- Opens with open_as_void=true
- Verifies rank is original_rank + 1
- Verifies bytes dimension is 3 (1 + 2 bytes)
- Verifies dtype is byte
---
 tensorstore/driver/zarr3/chunk_cache.cc | 54 ++++++++++++++++----
 tensorstore/driver/zarr3/driver_test.cc | 68 ++++++++++++++++++++++---
 2 files changed, 105 insertions(+), 17 deletions(-)

diff --git a/tensorstore/driver/zarr3/chunk_cache.cc b/tensorstore/driver/zarr3/chunk_cache.cc
index f2a61f5c8..8f15a218c 100644
--- a/tensorstore/driver/zarr3/chunk_cache.cc
+++ b/tensorstore/driver/zarr3/chunk_cache.cc
@@ -252,16 +252,12 @@ ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
 Result<absl::Cord> ZarrLeafChunkCache::EncodeChunk(
     span<const Index> chunk_indices,
     span<const SharedArray<const void>> component_arrays) {
-  assert(component_arrays.size() == 1);
+  const size_t num_fields = dtype_.fields.size();
 
   // Special case: void access - encode bytes back to original format.
-  //
-  // For structured types: codec already expects bytes with extra dimension.
-  // Just encode directly.
-  //
-  // For non-structured types: reinterpret byte array as original dtype
-  // and shape before encoding.
   if (open_as_void_) {
+    assert(component_arrays.size() == 1);
+
     if (original_is_structured_) {
       // Structured types: codec already expects bytes with extra dimension.
       return codec_state_->EncodeArray(component_arrays[0]);
@@ -276,8 +272,6 @@ Result<absl::Cord> ZarrLeafChunkCache::EncodeChunk(
     std::vector<Index> original_shape(void_shape.begin(), void_shape.end() - 1);
 
     // Use the original dtype (stored during cache creation) for encoding.
-    // This is the dtype the codec was prepared for, not the void dtype.
-
     // Create a view over the byte data with original dtype and layout.
     // Use the aliasing constructor to share ownership with byte_array but
     // interpret the data with the original dtype.
@@ -293,7 +287,47 @@ Result<absl::Cord> ZarrLeafChunkCache::EncodeChunk(
     return codec_state_->EncodeArray(encoded_array);
   }
 
-  return codec_state_->EncodeArray(component_arrays[0]);
+  // For single non-structured field, encode directly
+  if (num_fields == 1 && dtype_.fields[0].outer_shape.empty()) {
+    assert(component_arrays.size() == 1);
+    return codec_state_->EncodeArray(component_arrays[0]);
+  }
+
+  // For structured types, combine multiple field arrays into a single byte array
+  assert(component_arrays.size() == num_fields);
+
+  // Build encode shape: [chunk_dims..., bytes_per_outer_element]
+  const auto& chunk_shape = grid().chunk_shape;
+  std::vector<Index> encode_shape(chunk_shape.begin(), chunk_shape.end());
+  encode_shape.push_back(dtype_.bytes_per_outer_element);
+
+  // Calculate number of outer elements
+  Index num_elements = 1;
+  for (size_t i = 0; i < chunk_shape.size(); ++i) {
+    num_elements *= chunk_shape[i];
+  }
+
+  // Allocate byte array for combined fields
+  auto byte_array = AllocateArray<std::byte>(encode_shape, c_order, value_init);
+  auto* dst_bytes = byte_array.data();
+
+  // Copy each field's data into the byte array at their respective offsets
+  for (size_t field_i = 0; field_i < num_fields; ++field_i) {
+    const auto& field = dtype_.fields[field_i];
+    const auto& field_array = component_arrays[field_i];
+    const auto* src = static_cast<const std::byte*>(field_array.data());
+    const Index field_size = field.dtype->size;
+
+    // Copy field data to each struct element
+    for (Index i = 0; i < num_elements; ++i) {
+      std::memcpy(dst_bytes + i * dtype_.bytes_per_outer_element +
+                      field.byte_offset,
+                  src + i * field_size,
+                  field_size);
+    }
+  }
+
+  return codec_state_->EncodeArray(byte_array);
 }
 
 kvstore::Driver* ZarrLeafChunkCache::GetKvStoreDriver() {
diff --git a/tensorstore/driver/zarr3/driver_test.cc b/tensorstore/driver/zarr3/driver_test.cc
index d592b8924..68ce6de60 100644
--- a/tensorstore/driver/zarr3/driver_test.cc
+++ b/tensorstore/driver/zarr3/driver_test.cc
@@ -1886,13 +1886,67 @@ TEST(Zarr3DriverTest, OpenAsVoidSimpleType) {
             void_store.dtype());
 }
 
-// TODO(b/xxx): OpenAsVoidStructuredType test disabled pending implementation
-// of proper rank handling in GetNewMetadata for void access with structured
-// types. Creating new arrays with open_as_void=true and structured dtypes
-// requires adding field_shape dimensions to chunked_rank and updating
-// SetChunkLayoutFromMetadata to handle the dimension mismatch between
-// metadata shape and full rank. This is a more extensive change that will
-// be addressed separately.
+TEST(Zarr3DriverTest, OpenAsVoidStructuredType) {
+  // Test open_as_void with a structured data type
+  auto context = Context::Default();
+
+  // Step 1: Create and write the array using a structured dtype (with field)
+  ::nlohmann::json create_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"field", "y"},
+      {"metadata",
+       {
+           {"data_type",
+            {{"name", "structured"},
+             {"configuration",
+              {{"fields",
+                ::nlohmann::json::array({{"x", "uint8"}, {"y", "int16"}})}}}}},
+           {"shape", {4, 4}},
+           {"chunk_grid",
+            {{"name", "regular"}, {"configuration", {{"chunk_shape", {2, 2}}}}}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::create,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Write some data to field y
+  auto data = tensorstore::MakeArray<int16_t>({{100, 200}, {300, 400}});
+  TENSORSTORE_EXPECT_OK(
+      tensorstore::Write(data, store | tensorstore::Dims(0, 1).SizedInterval(
+                                           {0, 0}, {2, 2}))
+          .result());
+
+  // Close the first store by letting it go out of scope
+  store = tensorstore::TensorStore<int16_t>();
+
+  // Step 2: Open with open_as_void=true
+  ::nlohmann::json void_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto void_store,
+      tensorstore::Open(void_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result());
+
+  // The void store should have rank = original_rank + 1 (for bytes dimension)
+  EXPECT_EQ(3, void_store.rank());
+
+  // The last dimension should be 3 bytes (1 byte for u1 + 2 bytes for i2)
+  EXPECT_EQ(3, void_store.domain().shape()[2]);
+
+  // The data type should be byte
+  EXPECT_EQ(tensorstore::dtype_v<tensorstore::dtypes::byte_t>,
+            void_store.dtype());
+}
 
 TEST(Zarr3DriverTest, OpenAsVoidWithCompression) {
   // Test open_as_void with compression enabled

From 13cae40bb6925ce8411bf1427a5df0841e73ec34 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 18:15:32 +0000
Subject: [PATCH 38/59] zarr3: Enhance structured type tests and add
 GetSpecInfo test

1. OpenAsVoidStructuredType: Now actually reads and verifies byte content
   - Reads raw bytes through void access
   - Uses proper stride calculation for the returned array
   - Verifies y field bytes at all 4 positions (little-endian int16)
   - x field is 0 (fill value) since we only wrote to y field

2. Add GetSpecInfoOpenAsVoidWithStructuredDtype test
   - Verifies spec rank = chunked_rank + 1 with structured dtype
   - Tests structured dtype with int32 + uint16 fields
   - Matches v2 test coverage
---
 tensorstore/driver/zarr3/driver_test.cc | 77 ++++++++++++++++++++++++-
 1 file changed, 75 insertions(+), 2 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver_test.cc b/tensorstore/driver/zarr3/driver_test.cc
index 68ce6de60..65a646bbd 100644
--- a/tensorstore/driver/zarr3/driver_test.cc
+++ b/tensorstore/driver/zarr3/driver_test.cc
@@ -1891,6 +1891,7 @@ TEST(Zarr3DriverTest, OpenAsVoidStructuredType) {
   auto context = Context::Default();
 
   // Step 1: Create and write the array using a structured dtype (with field)
+  // Struct layout: x (uint8, 1 byte) + y (int16, 2 bytes) = 3 bytes total
   ::nlohmann::json create_spec{
       {"driver", "zarr3"},
       {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
@@ -1914,14 +1915,16 @@ TEST(Zarr3DriverTest, OpenAsVoidStructuredType) {
                         tensorstore::ReadWriteMode::read_write)
           .result());
 
-  // Write some data to field y
+  // Write some data to field y (int16)
+  // int16 100 = 0x0064 in little endian = [0x64, 0x00]
+  // int16 200 = 0x00C8 in little endian = [0xC8, 0x00]
   auto data = tensorstore::MakeArray<int16_t>({{100, 200}, {300, 400}});
   TENSORSTORE_EXPECT_OK(
       tensorstore::Write(data, store | tensorstore::Dims(0, 1).SizedInterval(
                                            {0, 0}, {2, 2}))
           .result());
 
-  // Close the first store by letting it go out of scope
+  // Close store to ensure data is flushed
   store = tensorstore::TensorStore<int16_t>();
 
   // Step 2: Open with open_as_void=true
@@ -1946,6 +1949,49 @@ TEST(Zarr3DriverTest, OpenAsVoidStructuredType) {
   // The data type should be byte
   EXPECT_EQ(tensorstore::dtype_v<tensorstore::dtypes::byte_t>,
             void_store.dtype());
+
+  // Step 3: Read and verify byte content for field y only
+  // Since we only wrote to field y, field x will be zeros (fill value)
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto byte_array,
+      tensorstore::Read(
+          void_store | tensorstore::Dims(0, 1, 2).SizedInterval({0, 0, 0},
+                                                                {2, 2, 3}))
+          .result());
+
+  EXPECT_EQ(3, byte_array.rank());
+  EXPECT_EQ(2, byte_array.shape()[0]);
+  EXPECT_EQ(2, byte_array.shape()[1]);
+  EXPECT_EQ(3, byte_array.shape()[2]);
+
+  // Verify bytes - we use the array's data() and strides
+  const auto* bytes = static_cast<const unsigned char*>(byte_array.data());
+  const Index stride0 = byte_array.byte_strides()[0];
+  const Index stride1 = byte_array.byte_strides()[1];
+  const Index stride2 = byte_array.byte_strides()[2];
+  auto get_byte = [&](Index i, Index j, Index k) -> unsigned char {
+    return bytes[i * stride0 + j * stride1 + k * stride2];
+  };
+
+  // Element [0,0]: x=0 (fill), y=100 (0x0064 LE = [0x64, 0x00])
+  EXPECT_EQ(0, get_byte(0, 0, 0));      // x (fill value)
+  EXPECT_EQ(0x64, get_byte(0, 0, 1));   // y low byte
+  EXPECT_EQ(0x00, get_byte(0, 0, 2));   // y high byte
+
+  // Element [0,1]: x=0 (fill), y=200 (0x00C8 LE = [0xC8, 0x00])
+  EXPECT_EQ(0, get_byte(0, 1, 0));      // x (fill value)
+  EXPECT_EQ(0xC8, get_byte(0, 1, 1));   // y low byte
+  EXPECT_EQ(0x00, get_byte(0, 1, 2));   // y high byte
+
+  // Element [1,0]: x=0 (fill), y=300 (0x012C LE = [0x2C, 0x01])
+  EXPECT_EQ(0, get_byte(1, 0, 0));      // x (fill value)
+  EXPECT_EQ(0x2C, get_byte(1, 0, 1));   // y low byte
+  EXPECT_EQ(0x01, get_byte(1, 0, 2));   // y high byte
+
+  // Element [1,1]: x=0 (fill), y=400 (0x0190 LE = [0x90, 0x01])
+  EXPECT_EQ(0, get_byte(1, 1, 0));      // x (fill value)
+  EXPECT_EQ(0x90, get_byte(1, 1, 1));   // y low byte
+  EXPECT_EQ(0x01, get_byte(1, 1, 2));   // y high byte
 }
 
 TEST(Zarr3DriverTest, OpenAsVoidWithCompression) {
@@ -2406,6 +2452,33 @@ TEST(Zarr3DriverTest, GetSpecInfoOpenAsVoidWithKnownRank) {
   EXPECT_EQ(3, spec.rank());
 }
 
+TEST(Zarr3DriverTest, GetSpecInfoOpenAsVoidWithStructuredDtype) {
+  // Test GetSpecInfo with open_as_void=true and a structured dtype.
+  // The bytes dimension should reflect the full struct size.
+  ::nlohmann::json json_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+      {"metadata",
+       {
+           {"data_type",
+            {{"name", "structured"},
+             {"configuration",
+              {{"fields",
+                ::nlohmann::json::array({{"x", "int32"}, {"y", "uint16"}})}}}}},
+           {"shape", {8}},  // 1D array
+           {"chunk_grid",
+            {{"name", "regular"}, {"configuration", {{"chunk_shape", {4}}}}}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(auto spec,
+                                   tensorstore::Spec::FromJson(json_spec));
+
+  // chunked_rank = 1, so full_rank = 2
+  EXPECT_EQ(2, spec.rank());
+}
+
 TEST(Zarr3DriverTest, GetSpecInfoOpenAsVoidWithDynamicRank) {
   // Test GetSpecInfo when open_as_void=true with dtype but no shape/chunks
   // (i.e., chunked_rank is dynamic). In this case, full_rank should remain

From cfea1dda0e93d7ece90ac8b702e11a74d47cdc63 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 18:16:43 +0000
Subject: [PATCH 39/59] zarr3: Add OpenAsVoidIncompatibleMetadata test

Test that open_as_void correctly detects when the underlying metadata
has been changed to an incompatible dtype. ResolveBounds should fail
with kFailedPrecondition when the stored metadata has a different
bytes_per_outer_element than what was expected.

This matches the v2 test that verifies metadata consistency checking
works properly with void access.
---
 tensorstore/driver/zarr3/driver_test.cc | 73 +++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/tensorstore/driver/zarr3/driver_test.cc b/tensorstore/driver/zarr3/driver_test.cc
index 65a646bbd..970147c82 100644
--- a/tensorstore/driver/zarr3/driver_test.cc
+++ b/tensorstore/driver/zarr3/driver_test.cc
@@ -2646,4 +2646,77 @@ TEST(Zarr3DriverTest, OpenAsVoidFillValue) {
   EXPECT_EQ(0x12, fill_bytes[1]);
 }
 
+TEST(Zarr3DriverTest, OpenAsVoidIncompatibleMetadata) {
+  // Test that open_as_void correctly rejects incompatible metadata when the
+  // underlying storage is modified to have a different bytes_per_outer_element.
+  auto context = Context::Default();
+  ::nlohmann::json storage_spec{{"driver", "memory"}};
+
+  // Create an array with 4-byte dtype
+  ::nlohmann::json create_spec{
+      {"driver", "zarr3"},
+      {"kvstore", storage_spec},
+      {"path", "prefix/"},
+      {"metadata",
+       {
+           {"data_type", "int32"},  // 4 bytes
+           {"shape", {2, 2}},
+           {"chunk_grid",
+            {{"name", "regular"}, {"configuration", {{"chunk_shape", {2, 2}}}}}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::create,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Write some data
+  auto data = tensorstore::MakeArray<int32_t>({{1, 2}, {3, 4}});
+  TENSORSTORE_EXPECT_OK(tensorstore::Write(data, store).result());
+
+  // Open with open_as_void
+  ::nlohmann::json void_spec{
+      {"driver", "zarr3"},
+      {"kvstore", storage_spec},
+      {"path", "prefix/"},
+      {"open_as_void", true},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto void_store,
+      tensorstore::Open(void_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result());
+
+  // Now overwrite the underlying storage with incompatible metadata
+  // (different bytes_per_outer_element: 2 bytes instead of 4)
+  ::nlohmann::json incompatible_spec{
+      {"driver", "zarr3"},
+      {"kvstore", storage_spec},
+      {"path", "prefix/"},
+      {"metadata",
+       {
+           {"data_type", "int16"},  // 2 bytes - incompatible
+           {"shape", {2, 2}},
+           {"chunk_grid",
+            {{"name", "regular"}, {"configuration", {{"chunk_shape", {2, 2}}}}}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto incompatible_store,
+      tensorstore::Open(incompatible_spec, context,
+                        tensorstore::OpenMode::create |
+                            tensorstore::OpenMode::delete_existing,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // ResolveBounds on the original void store should fail because the
+  // underlying metadata changed to an incompatible dtype
+  EXPECT_THAT(ResolveBounds(void_store).result(),
+              StatusIs(absl::StatusCode::kFailedPrecondition));
+}
+
 }  // namespace

From dda05a8b139e2e0b0daaf2619e5fba2588d32b2d Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 20:57:00 +0000
Subject: [PATCH 40/59] zarr3: Add OpenAsVoidWithSharding test

Verifies that void access works correctly with sharded arrays:
- Void access flags propagate through sharded caches
- Reading bytes through sharded void access returns correct data
- Writing bytes through sharded void access round-trips correctly
---
 tensorstore/driver/zarr3/driver_test.cc | 131 ++++++++++++++++++++++++
 1 file changed, 131 insertions(+)

diff --git a/tensorstore/driver/zarr3/driver_test.cc b/tensorstore/driver/zarr3/driver_test.cc
index 970147c82..08ee47f82 100644
--- a/tensorstore/driver/zarr3/driver_test.cc
+++ b/tensorstore/driver/zarr3/driver_test.cc
@@ -2719,4 +2719,135 @@ TEST(Zarr3DriverTest, OpenAsVoidIncompatibleMetadata) {
               StatusIs(absl::StatusCode::kFailedPrecondition));
 }
 
+TEST(Zarr3DriverTest, OpenAsVoidWithSharding) {
+  // Test open_as_void with sharding enabled.
+  // Verifies that void access flags propagate correctly through sharded caches.
+  auto context = Context::Default();
+
+  // Create a sharded array
+  ::nlohmann::json create_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"metadata",
+       {
+           {"data_type", "int32"},
+           {"shape", {8, 8}},
+           {"chunk_grid",
+            {{"name", "regular"}, {"configuration", {{"chunk_shape", {8, 8}}}}}},
+           {"codecs",
+            {{{"name", "sharding_indexed"},
+              {"configuration",
+               {{"chunk_shape", {4, 4}},
+                {"codecs", {{{"name", "bytes"}}}},
+                {"index_codecs",
+                 {{{"name", "bytes"}}, {{"name", "crc32c"}}}}}}}}},
+       }},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::create,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Write some data
+  auto data = tensorstore::MakeArray<int32_t>(
+      {{0x01020304, 0x05060708, 0, 0, 0, 0, 0, 0},
+       {0x090A0B0C, 0x0D0E0F10, 0, 0, 0, 0, 0, 0},
+       {0, 0, 0, 0, 0, 0, 0, 0},
+       {0, 0, 0, 0, 0, 0, 0, 0},
+       {0, 0, 0, 0, 0, 0, 0, 0},
+       {0, 0, 0, 0, 0, 0, 0, 0},
+       {0, 0, 0, 0, 0, 0, 0, 0},
+       {0, 0, 0, 0, 0, 0, 0, 0}});
+  TENSORSTORE_EXPECT_OK(tensorstore::Write(data, store).result());
+
+  // Open with open_as_void=true
+  ::nlohmann::json void_spec{
+      {"driver", "zarr3"},
+      {"kvstore", {{"driver", "memory"}, {"path", "prefix/"}}},
+      {"open_as_void", true},
+  };
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto void_store,
+      tensorstore::Open(void_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read_write)
+          .result());
+
+  // Verify rank is original + 1 for bytes dimension
+  EXPECT_EQ(3, void_store.rank());
+
+  // Verify bytes dimension is 4 (int32 = 4 bytes)
+  EXPECT_EQ(4, void_store.domain().shape()[2]);
+
+  // Read through void access and verify byte content
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto bytes_read,
+      tensorstore::Read(
+          void_store | tensorstore::Dims(0, 1, 2).SizedInterval({0, 0, 0},
+                                                                {2, 2, 4}))
+          .result());
+
+  EXPECT_EQ(3, bytes_read.rank());
+  EXPECT_EQ(2, bytes_read.shape()[0]);
+  EXPECT_EQ(2, bytes_read.shape()[1]);
+  EXPECT_EQ(4, bytes_read.shape()[2]);
+
+  // Verify the raw bytes (little endian)
+  const auto* bytes = static_cast<const unsigned char*>(bytes_read.data());
+  const Index stride0 = bytes_read.byte_strides()[0];
+  const Index stride1 = bytes_read.byte_strides()[1];
+  const Index stride2 = bytes_read.byte_strides()[2];
+  auto get_byte = [&](Index i, Index j, Index k) -> unsigned char {
+    return bytes[i * stride0 + j * stride1 + k * stride2];
+  };
+
+  // Element [0,0] = 0x01020304 in little endian: 04 03 02 01
+  EXPECT_EQ(0x04, get_byte(0, 0, 0));
+  EXPECT_EQ(0x03, get_byte(0, 0, 1));
+  EXPECT_EQ(0x02, get_byte(0, 0, 2));
+  EXPECT_EQ(0x01, get_byte(0, 0, 3));
+
+  // Element [0,1] = 0x05060708 in little endian: 08 07 06 05
+  EXPECT_EQ(0x08, get_byte(0, 1, 0));
+  EXPECT_EQ(0x07, get_byte(0, 1, 1));
+  EXPECT_EQ(0x06, get_byte(0, 1, 2));
+  EXPECT_EQ(0x05, get_byte(0, 1, 3));
+
+  // Write through void access
+  auto raw_bytes = tensorstore::AllocateArray<tensorstore::dtypes::byte_t>(
+      {2, 2, 4}, tensorstore::c_order, tensorstore::value_init);
+  auto raw_bytes_ptr = static_cast<unsigned char*>(
+      const_cast<void*>(static_cast<const void*>(raw_bytes.data())));
+  // Set element [0,0] to 0xAABBCCDD (little endian: DD CC BB AA)
+  raw_bytes_ptr[0] = 0xDD;
+  raw_bytes_ptr[1] = 0xCC;
+  raw_bytes_ptr[2] = 0xBB;
+  raw_bytes_ptr[3] = 0xAA;
+
+  TENSORSTORE_EXPECT_OK(
+      tensorstore::Write(raw_bytes,
+                         void_store | tensorstore::Dims(0, 1, 2).SizedInterval(
+                                          {0, 0, 0}, {2, 2, 4}))
+          .result());
+
+  // Read back through typed access and verify
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto typed_store,
+      tensorstore::Open(create_spec, context, tensorstore::OpenMode::open,
+                        tensorstore::ReadWriteMode::read)
+          .result());
+
+  TENSORSTORE_ASSERT_OK_AND_ASSIGN(
+      auto typed_read,
+      tensorstore::Read(
+          typed_store | tensorstore::Dims(0, 1).SizedInterval({0, 0}, {2, 2}))
+          .result());
+  auto typed_ptr = static_cast<const int32_t*>(typed_read.data());
+
+  // Element [0,0] should be 0xAABBCCDD
+  EXPECT_EQ(static_cast<int32_t>(0xAABBCCDD), typed_ptr[0]);
+}
+
 }  // namespace

From d609dd8852e174f2fcd12b96f976ebc6d4b8837f Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 20:57:22 +0000
Subject: [PATCH 41/59] zarr3: Fix schema.yml field/open_as_void documentation
 for consistency with zarr2

- Remove invalid oneOf constraint that didn't properly express mutual exclusivity
- Update field description to match zarr2 style (document mutual exclusivity)
- Update open_as_void description to document mutual exclusivity with field
- Add oneOf type constraint for field to match zarr2 (string or null)

The actual mutual exclusivity validation is done in code via jb::Initialize.
---
 tensorstore/driver/zarr3/schema.yml | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/tensorstore/driver/zarr3/schema.yml b/tensorstore/driver/zarr3/schema.yml
index 9491027b1..22a4971e2 100644
--- a/tensorstore/driver/zarr3/schema.yml
+++ b/tensorstore/driver/zarr3/schema.yml
@@ -18,12 +18,15 @@ allOf:
         by combining these metadata constraints with any `Schema` constraints.
       $ref: driver/zarr3/Metadata
     field:
-      type: string
-      title: Field selection for structured arrays.
+      oneOf:
+        - type: string
+        - type: "null"
+      title: Name of field to open.
       description: |
-        Name of the field to select from a structured array. When specified,
-        the tensorstore will provide access to only the specified field of
-        each element in the structured array.
+        Must be specified if the `.metadata.data_type` specified in the array
+        metadata has more than one field.  Cannot be specified together with
+        :json:`"open_as_void": true`.
+      default: null
     open_as_void:
       type: boolean
       default: false
@@ -31,17 +34,8 @@ allOf:
       description: |
         When true, opens the array as raw bytes instead of interpreting it
         as structured data. The resulting array will have an additional
-        dimension representing the byte layout of each element.
-  oneOf:
-    - not:
-        anyOf:
-          - required: ["field"]
-          - required: ["open_as_void"]
-    - allOf:
-        - not:
-            required: ["field"]
-        - not:
-            required: ["open_as_void"]
+        dimension representing the byte layout of each element.  Cannot be
+        :json:`true` if `.field` is also specified.
 examples:
 - driver: zarr3
   kvstore:

From 5d849f67efc3ff33936d7377ba81b01095ab6cf0 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 20:58:24 +0000
Subject: [PATCH 42/59] zarr3: Add explicit implicit_lower_bounds in
 GetExternalToInternalTransform

For consistency with GetDomain(), explicitly set implicit_lower_bounds
in GetExternalToInternalTransform when building the void access transform.
Both methods now follow the same pattern of explicitly setting both
implicit_lower_bounds and implicit_upper_bounds.
---
 tensorstore/driver/zarr3/driver.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index 6a9315b5c..e6b9cf617 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -767,10 +767,14 @@ class ZarrDataCache : public ChunkCacheImpl, public DataCacheBase {
       builder.input_shape(full_shape);
       builder.input_labels(span(&normalized_dimension_names[0], total_rank));
 
+      // Set implicit bounds: array dims have implicit upper bounds (resizable),
+      // bytes dim has explicit bounds (fixed size).
+      DimensionSet implicit_lower_bounds(false);
       DimensionSet implicit_upper_bounds(false);
       for (DimensionIndex i = 0; i < rank; ++i) {
         implicit_upper_bounds[i] = true;
       }
+      builder.implicit_lower_bounds(implicit_lower_bounds);
       builder.implicit_upper_bounds(implicit_upper_bounds);
 
       for (DimensionIndex i = 0; i < total_rank; ++i) {

From 1298bcb7f0ca5ee882c85c91fa1498eaac9674f9 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 20:58:49 +0000
Subject: [PATCH 43/59] zarr3: Add assertion in DecodeChunk for void access
 field count

Add assertion that num_fields == 1 in the void access path of DecodeChunk.
Void access always uses a single synthesized field, so this assertion
helps catch any inconsistency between GetDataCache and DecodeChunk.
---
 tensorstore/driver/zarr3/chunk_cache.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorstore/driver/zarr3/chunk_cache.cc b/tensorstore/driver/zarr3/chunk_cache.cc
index 8f15a218c..80d96011a 100644
--- a/tensorstore/driver/zarr3/chunk_cache.cc
+++ b/tensorstore/driver/zarr3/chunk_cache.cc
@@ -168,6 +168,7 @@ ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
   // For structured types: codec was already prepared for
   // [chunk_shape, bytes_per_elem] with byte dtype. Just decode directly.
   if (open_as_void_) {
+    assert(num_fields == 1);  // Void access uses a single synthesized field
     const auto& void_component_shape = grid().components[0].shape();
 
     if (original_is_structured_) {

From 72968e8fa16492f804795f461faf27d8a8a41a50 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 21:27:14 +0000
Subject: [PATCH 44/59] zarr3: Add contiguity assertions for encode/decode
 chunk operations

Add assertions in EncodeChunk and DecodeChunk to verify that arrays
are C-contiguous before performing direct memcpy operations:

- In EncodeChunk: verify component arrays are C-contiguous
- In DecodeChunk: verify decoded byte arrays are C-contiguous

These assertions validate assumptions about array layouts that the
chunk cache relies on for correct operation. The chunk cache write
path (AsyncWriteArray) allocates C-order arrays, and the codec chain
produces C-contiguous decoded arrays.

Also adds the necessary includes and BUILD dependencies for
IsContiguousLayout and c_order.
---
 tensorstore/driver/zarr3/BUILD          |  2 ++
 tensorstore/driver/zarr3/chunk_cache.cc | 18 ++++++++++++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/tensorstore/driver/zarr3/BUILD b/tensorstore/driver/zarr3/BUILD
index 685050024..72b51a3df 100644
--- a/tensorstore/driver/zarr3/BUILD
+++ b/tensorstore/driver/zarr3/BUILD
@@ -226,6 +226,7 @@ tensorstore_cc_library(
         "//tensorstore:array_storage_statistics",
         "//tensorstore:batch",
         "//tensorstore:box",
+        "//tensorstore:contiguous_layout",
         "//tensorstore:index",
         "//tensorstore:index_interval",
         "//tensorstore:rank",
@@ -242,6 +243,7 @@ tensorstore_cc_library(
         "//tensorstore/internal:intrusive_ptr",
         "//tensorstore/internal:lexicographical_grid_index_key",
         "//tensorstore/internal:regular_grid",
+        "//tensorstore:strided_layout",
         "//tensorstore/internal:storage_statistics",
         "//tensorstore/internal/cache",
         "//tensorstore/internal/cache:chunk_cache",
diff --git a/tensorstore/driver/zarr3/chunk_cache.cc b/tensorstore/driver/zarr3/chunk_cache.cc
index 80d96011a..5675ff2ff 100644
--- a/tensorstore/driver/zarr3/chunk_cache.cc
+++ b/tensorstore/driver/zarr3/chunk_cache.cc
@@ -31,6 +31,7 @@
 #include "absl/time/time.h"
 #include "tensorstore/array.h"
 #include "tensorstore/array_storage_statistics.h"
+#include "tensorstore/contiguous_layout.h"
 #include "tensorstore/batch.h"
 #include "tensorstore/box.h"
 #include "tensorstore/driver/chunk.h"
@@ -53,6 +54,7 @@
 #include "tensorstore/internal/meta/type_traits.h"
 #include "tensorstore/internal/regular_grid.h"
 #include "tensorstore/internal/storage_statistics.h"
+#include "tensorstore/strided_layout.h"
 #include "tensorstore/kvstore/driver.h"
 #include "tensorstore/kvstore/key_range.h"
 #include "tensorstore/kvstore/kvstore.h"
@@ -192,6 +194,10 @@ ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
         auto decoded_array,
         codec_state_->DecodeArray(original_chunk_shape, std::move(data)));
 
+    // Verify decoded array is C-contiguous (codec chain should guarantee this)
+    assert(IsContiguousLayout(decoded_array.layout(), c_order,
+                              decoded_array.dtype().size()));
+
     // Reinterpret the decoded array's bytes as [chunk_shape..., bytes_per_elem]
     auto byte_array = AllocateArray(
         void_component_shape, c_order, default_init,
@@ -223,7 +229,10 @@ ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
   TENSORSTORE_ASSIGN_OR_RETURN(
       auto byte_array, codec_state_->DecodeArray(decode_shape, std::move(data)));
 
-  // Extract each field from the byte array
+  // Extract each field from the byte array.
+  // Note: decoded byte_array should be C-contiguous (codec chain guarantees).
+  assert(IsContiguousLayout(byte_array.layout(), c_order,
+                            byte_array.dtype().size()));
   const Index num_elements = byte_array.num_elements() /
                              dtype_.bytes_per_outer_element;
   const auto* src_bytes = static_cast<const std::byte*>(byte_array.data());
@@ -312,10 +321,15 @@ Result<absl::Cord> ZarrLeafChunkCache::EncodeChunk(
   auto byte_array = AllocateArray<std::byte>(encode_shape, c_order, value_init);
   auto* dst_bytes = byte_array.data();
 
-  // Copy each field's data into the byte array at their respective offsets
+  // Copy each field's data into the byte array at their respective offsets.
+  // Note: This assumes component arrays are C-contiguous, which is guaranteed
+  // by the chunk cache's write path (AsyncWriteArray allocates C-order arrays).
   for (size_t field_i = 0; field_i < num_fields; ++field_i) {
     const auto& field = dtype_.fields[field_i];
     const auto& field_array = component_arrays[field_i];
+    // Verify the array is C-contiguous as expected
+    assert(IsContiguousLayout(field_array.layout(), c_order,
+                              field_array.dtype().size()));
     const auto* src = static_cast<const std::byte*>(field_array.data());
     const Index field_size = field.dtype->size;
 

From 14546a1273838590005b730d923132afff2d67a1 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 21:36:08 +0000
Subject: [PATCH 45/59] zarr3: Use CopyArray for safe structured type
 encode/decode

Replace raw memcpy loops with CopyArray using strided ArrayViews for
structured type encoding and decoding. This follows the standard
TensorStore pattern (as used in zarr v2 with internal::EncodeArray)
where array copies are done via IterateOverArrays which safely handles
any source/destination strides.

The key insight is creating an ArrayView with strides that represent
the interleaved field positions within the struct layout:
- For a field at byte_offset B within a struct of size S
- The strides are [..., S] instead of [..., field_size]
- This allows CopyArray to correctly interleave/deinterleave fields

This approach:
1. Removes the need for contiguity assertions (CopyArray handles any layout)
2. Is consistent with zarr v2's use of internal::EncodeArray
3. Uses the standard IterateOverArrays iteration pattern

The void access decode path retains its memcpy with assertion because
it's a simple byte reinterpretation where both arrays are known to be
C-contiguous (destination freshly allocated, source from codec chain).
---
 tensorstore/driver/zarr3/chunk_cache.cc | 79 +++++++++++++------------
 1 file changed, 42 insertions(+), 37 deletions(-)

diff --git a/tensorstore/driver/zarr3/chunk_cache.cc b/tensorstore/driver/zarr3/chunk_cache.cc
index 5675ff2ff..127f5a4c9 100644
--- a/tensorstore/driver/zarr3/chunk_cache.cc
+++ b/tensorstore/driver/zarr3/chunk_cache.cc
@@ -230,29 +230,35 @@ ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
       auto byte_array, codec_state_->DecodeArray(decode_shape, std::move(data)));
 
   // Extract each field from the byte array.
-  // Note: decoded byte_array should be C-contiguous (codec chain guarantees).
-  assert(IsContiguousLayout(byte_array.layout(), c_order,
-                            byte_array.dtype().size()));
-  const Index num_elements = byte_array.num_elements() /
-                             dtype_.bytes_per_outer_element;
-  const auto* src_bytes = static_cast<const std::byte*>(byte_array.data());
-
+  // We create a strided view into the source that maps to each field's
+  // position within the interleaved struct layout, then use CopyArray which
+  // safely handles any layout differences via IterateOverArrays.
   for (size_t field_i = 0; field_i < num_fields; ++field_i) {
     const auto& field = dtype_.fields[field_i];
     // Use the component's shape (from the grid) for the result array
     const auto& component_shape = grid().components[field_i].shape();
     auto result_array =
         AllocateArray(component_shape, c_order, default_init, field.dtype);
-    auto* dst = static_cast<std::byte*>(result_array.data());
-    const Index field_size = field.dtype->size;
-
-    // Copy field data from each struct element
-    for (Index i = 0; i < num_elements; ++i) {
-      std::memcpy(dst + i * field_size,
-                  src_bytes + i * dtype_.bytes_per_outer_element +
-                      field.byte_offset,
-                  field_size);
+
+    // Build strides for the source view: each element is separated by
+    // bytes_per_outer_element (the struct size), not field_size.
+    std::vector<Index> src_byte_strides(chunk_shape.size());
+    Index stride = dtype_.bytes_per_outer_element;
+    for (DimensionIndex i = chunk_shape.size(); i-- > 0;) {
+      src_byte_strides[i] = stride;
+      stride *= chunk_shape[i];
     }
+
+    // Create source ArrayView pointing to this field's offset within
+    // the interleaved byte array, with strides that skip over other fields.
+    ArrayView<const void> src_field_view(
+        {static_cast<const void*>(
+             static_cast<const std::byte*>(byte_array.data()) + field.byte_offset),
+         field.dtype},
+        StridedLayoutView<>(chunk_shape, src_byte_strides));
+
+    // Use CopyArray which safely handles any layout differences
+    CopyArray(src_field_view, result_array);
     field_arrays[field_i] = std::move(result_array);
   }
 
@@ -311,35 +317,34 @@ Result<absl::Cord> ZarrLeafChunkCache::EncodeChunk(
   std::vector<Index> encode_shape(chunk_shape.begin(), chunk_shape.end());
   encode_shape.push_back(dtype_.bytes_per_outer_element);
 
-  // Calculate number of outer elements
-  Index num_elements = 1;
-  for (size_t i = 0; i < chunk_shape.size(); ++i) {
-    num_elements *= chunk_shape[i];
-  }
-
   // Allocate byte array for combined fields
   auto byte_array = AllocateArray<std::byte>(encode_shape, c_order, value_init);
-  auto* dst_bytes = byte_array.data();
 
   // Copy each field's data into the byte array at their respective offsets.
-  // Note: This assumes component arrays are C-contiguous, which is guaranteed
-  // by the chunk cache's write path (AsyncWriteArray allocates C-order arrays).
+  // We create a strided view into the destination that maps to each field's
+  // position within the interleaved struct layout, then use CopyArray which
+  // safely handles any source array strides via IterateOverArrays.
   for (size_t field_i = 0; field_i < num_fields; ++field_i) {
     const auto& field = dtype_.fields[field_i];
     const auto& field_array = component_arrays[field_i];
-    // Verify the array is C-contiguous as expected
-    assert(IsContiguousLayout(field_array.layout(), c_order,
-                              field_array.dtype().size()));
-    const auto* src = static_cast<const std::byte*>(field_array.data());
-    const Index field_size = field.dtype->size;
-
-    // Copy field data to each struct element
-    for (Index i = 0; i < num_elements; ++i) {
-      std::memcpy(dst_bytes + i * dtype_.bytes_per_outer_element +
-                      field.byte_offset,
-                  src + i * field_size,
-                  field_size);
+
+    // Build strides for the destination view: each element is separated by
+    // bytes_per_outer_element (the struct size), not field_size.
+    std::vector<Index> dest_byte_strides(chunk_shape.size());
+    Index stride = dtype_.bytes_per_outer_element;
+    for (DimensionIndex i = chunk_shape.size(); i-- > 0;) {
+      dest_byte_strides[i] = stride;
+      stride *= chunk_shape[i];
     }
+
+    // Create destination ArrayView pointing to this field's offset within
+    // the interleaved byte array, with strides that skip over other fields.
+    ArrayView<void> dest_field_view(
+        {static_cast<void*>(byte_array.data() + field.byte_offset), field.dtype},
+        StridedLayoutView<>(chunk_shape, dest_byte_strides));
+
+    // Use CopyArray which safely handles any source strides via IterateOverArrays
+    CopyArray(field_array, dest_field_view);
   }
 
   return codec_state_->EncodeArray(byte_array);

From faf4d4fff55796e29e4538f9091877aefb78adc7 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 21:40:02 +0000
Subject: [PATCH 46/59] zarr3: Use ComputeStrides utility for stride
 computation

Replace manual stride computation loops with ComputeStrides() from
contiguous_layout.h. This is the standard TensorStore utility for
computing C-order (or Fortran-order) byte strides given a shape
and innermost element stride.

The manual loop:
  Index stride = bytes_per_outer_element;
  for (DimensionIndex i = rank; i-- > 0;) {
    strides[i] = stride;
    stride *= shape[i];
  }

Is exactly equivalent to:
  ComputeStrides(c_order, bytes_per_outer_element, shape, strides);
---
 tensorstore/driver/zarr3/chunk_cache.cc | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/tensorstore/driver/zarr3/chunk_cache.cc b/tensorstore/driver/zarr3/chunk_cache.cc
index 127f5a4c9..9f37934eb 100644
--- a/tensorstore/driver/zarr3/chunk_cache.cc
+++ b/tensorstore/driver/zarr3/chunk_cache.cc
@@ -243,11 +243,8 @@ ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
     // Build strides for the source view: each element is separated by
     // bytes_per_outer_element (the struct size), not field_size.
     std::vector<Index> src_byte_strides(chunk_shape.size());
-    Index stride = dtype_.bytes_per_outer_element;
-    for (DimensionIndex i = chunk_shape.size(); i-- > 0;) {
-      src_byte_strides[i] = stride;
-      stride *= chunk_shape[i];
-    }
+    ComputeStrides(c_order, dtype_.bytes_per_outer_element, chunk_shape,
+                   src_byte_strides);
 
     // Create source ArrayView pointing to this field's offset within
     // the interleaved byte array, with strides that skip over other fields.
@@ -331,11 +328,8 @@ Result<absl::Cord> ZarrLeafChunkCache::EncodeChunk(
     // Build strides for the destination view: each element is separated by
     // bytes_per_outer_element (the struct size), not field_size.
     std::vector<Index> dest_byte_strides(chunk_shape.size());
-    Index stride = dtype_.bytes_per_outer_element;
-    for (DimensionIndex i = chunk_shape.size(); i-- > 0;) {
-      dest_byte_strides[i] = stride;
-      stride *= chunk_shape[i];
-    }
+    ComputeStrides(c_order, dtype_.bytes_per_outer_element, chunk_shape,
+                   dest_byte_strides);
 
     // Create destination ArrayView pointing to this field's offset within
     // the interleaved byte array, with strides that skip over other fields.

From f0a5dbcc83fc5b2e63da8e7c93abbb2d065d9f6e Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 21:41:20 +0000
Subject: [PATCH 47/59] zarr3: Use DimensionSet::UpTo and
 std::fill_n/std::copy_n utilities

Replace manual loops with standard library and TensorStore utilities:

1. DimensionSet::UpTo(rank) - Creates a DimensionSet with bits [0, rank)
   set to true. Replaces:
     DimensionSet s(false);
     for (i = 0; i < rank; ++i) s[i] = true;

2. std::fill_n for origins (all zeros) and std::copy_n for shape copy.
   This is more idiomatic and clearer than explicit index loops.

These are standard patterns used throughout TensorStore for similar
operations on dimension sets and shape vectors.
---
 tensorstore/driver/zarr3/driver.cc | 32 +++++++++---------------------
 1 file changed, 9 insertions(+), 23 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index e6b9cf617..61c573381 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -189,24 +189,15 @@ class ZarrDriverSpec
       const DimensionIndex original_rank = metadata_constraints.shape->size();
       IndexDomainBuilder builder(original_rank + 1);
 
-      // Set original dimensions from metadata
-      for (DimensionIndex i = 0; i < original_rank; ++i) {
-        builder.origin()[i] = 0;
-        builder.shape()[i] = (*metadata_constraints.shape)[i];
-      }
-
-      // Add bytes dimension
-      builder.origin()[original_rank] = 0;
+      // Set original dimensions from metadata (all origins are 0)
+      std::fill_n(builder.origin().begin(), original_rank + 1, Index{0});
+      std::copy_n(metadata_constraints.shape->begin(), original_rank,
+                  builder.shape().begin());
       builder.shape()[original_rank] = bytes_per_elem;
 
-      // Set implicit bounds: array dims are implicit, bytes dim is explicit
-      DimensionSet implicit_lower(false);
-      DimensionSet implicit_upper(false);
-      for (DimensionIndex i = 0; i < original_rank; ++i) {
-        implicit_upper[i] = true;  // Array dimensions are resizable
-      }
-      builder.implicit_lower_bounds(implicit_lower);
-      builder.implicit_upper_bounds(implicit_upper);
+      // Set implicit bounds: array dims are implicit (resizable), bytes dim is explicit
+      builder.implicit_lower_bounds(DimensionSet(false));
+      builder.implicit_upper_bounds(DimensionSet::UpTo(original_rank));
 
       // Copy dimension names if available
       if (metadata_constraints.dimension_names) {
@@ -769,13 +760,8 @@ class ZarrDataCache : public ChunkCacheImpl, public DataCacheBase {
 
       // Set implicit bounds: array dims have implicit upper bounds (resizable),
       // bytes dim has explicit bounds (fixed size).
-      DimensionSet implicit_lower_bounds(false);
-      DimensionSet implicit_upper_bounds(false);
-      for (DimensionIndex i = 0; i < rank; ++i) {
-        implicit_upper_bounds[i] = true;
-      }
-      builder.implicit_lower_bounds(implicit_lower_bounds);
-      builder.implicit_upper_bounds(implicit_upper_bounds);
+      builder.implicit_lower_bounds(DimensionSet(false));
+      builder.implicit_upper_bounds(DimensionSet::UpTo(rank));
 
       for (DimensionIndex i = 0; i < total_rank; ++i) {
         builder.output_single_input_dimension(i, i);

From ac98313f5b2bb35bad4eaa338ac7285d86701bbd Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 21:59:36 +0000
Subject: [PATCH 48/59] zarr3: Fix open_as_void with sharding for
 non-structured types

The sub-chunk cache in sharding mode uses a grid from the sharding
codec state, which doesn't know about void access. This caused issues:

1. Shape mismatch: The grid's component shape was [4, 4] but decoded
   arrays had shape [4, 4, 4] (with bytes dimension)

2. Invalid key generation: The grid's chunk_shape affected cell indexing

Fix by:
- Add `grid_has_void_dimension_` flag to track whether the grid includes
  the bytes dimension (false for sub-chunk caches)
- For sub-chunk caches with void access on non-structured types, create
  a modified grid with:
  - Component chunk_shape including bytes dimension [4, 4, 4]
  - Grid chunk_shape unchanged [4, 4] (for cell indexing)
  - Proper chunked_to_cell_dimensions mapping

This enables void access to work correctly with sharding codecs.
---
 tensorstore/driver/zarr3/chunk_cache.cc | 36 ++++++++++++++-----
 tensorstore/driver/zarr3/chunk_cache.h  | 48 ++++++++++++++++++++++---
 2 files changed, 72 insertions(+), 12 deletions(-)

diff --git a/tensorstore/driver/zarr3/chunk_cache.cc b/tensorstore/driver/zarr3/chunk_cache.cc
index 9f37934eb..b60963288 100644
--- a/tensorstore/driver/zarr3/chunk_cache.cc
+++ b/tensorstore/driver/zarr3/chunk_cache.cc
@@ -78,13 +78,15 @@ ZarrChunkCache::~ZarrChunkCache() = default;
 ZarrLeafChunkCache::ZarrLeafChunkCache(
     kvstore::DriverPtr store, ZarrCodecChain::PreparedState::Ptr codec_state,
     ZarrDType dtype, internal::CachePool::WeakPtr /*data_cache_pool*/,
-    bool open_as_void, bool original_is_structured, DataType original_dtype)
+    bool open_as_void, bool original_is_structured, DataType original_dtype,
+    bool grid_has_void_dimension)
     : Base(std::move(store)),
       codec_state_(std::move(codec_state)),
       dtype_(std::move(dtype)),
       open_as_void_(open_as_void),
       original_is_structured_(original_is_structured),
-      original_dtype_(original_dtype) {}
+      original_dtype_(original_dtype),
+      grid_has_void_dimension_(grid_has_void_dimension) {}
 
 void ZarrLeafChunkCache::Read(ZarrChunkCache::ReadRequest request,
                               AnyFlowReceiver<absl::Status, internal::ReadChunk,
@@ -184,10 +186,23 @@ ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
 
     // Non-structured types: codec expects original dtype without extra
     // dimension. Decode, then reinterpret as bytes.
-    const auto& void_chunk_shape = grid().chunk_shape;
-    std::vector<Index> original_chunk_shape(
-        void_chunk_shape.begin(),
-        void_chunk_shape.end() - 1);  // Strip bytes dimension
+    //
+    // For top-level caches, grid().chunk_shape includes bytes dimension.
+    // For sub-chunk caches (inside sharding), grid() returns the sharding
+    // codec's sub_chunk_grid which doesn't have bytes dimension.
+    const Index bytes_per_element = dtype_.bytes_per_outer_element;
+    const auto& grid_chunk_shape = grid().chunk_shape;
+
+    std::vector<Index> original_chunk_shape;
+    if (grid_has_void_dimension_) {
+      // Strip the bytes dimension to get original shape
+      original_chunk_shape.assign(grid_chunk_shape.begin(),
+                                  grid_chunk_shape.end() - 1);
+    } else {
+      // Sub-chunk cache: grid shape is already the original shape
+      original_chunk_shape.assign(grid_chunk_shape.begin(),
+                                  grid_chunk_shape.end());
+    }
 
     // Decode using original codec shape
     TENSORSTORE_ASSIGN_OR_RETURN(
@@ -198,9 +213,13 @@ ZarrLeafChunkCache::DecodeChunk(span<const Index> chunk_indices,
     assert(IsContiguousLayout(decoded_array.layout(), c_order,
                               decoded_array.dtype().size()));
 
+    // Build the void output shape: original_shape + [bytes_per_element]
+    std::vector<Index> void_output_shape = original_chunk_shape;
+    void_output_shape.push_back(bytes_per_element);
+
     // Reinterpret the decoded array's bytes as [chunk_shape..., bytes_per_elem]
     auto byte_array = AllocateArray(
-        void_component_shape, c_order, default_init,
+        void_output_shape, c_order, default_init,
         dtype_v<tensorstore::dtypes::byte_t>);
 
     // Copy decoded data to byte array (handles potential layout differences)
@@ -351,7 +370,8 @@ kvstore::Driver* ZarrLeafChunkCache::GetKvStoreDriver() {
 ZarrShardedChunkCache::ZarrShardedChunkCache(
     kvstore::DriverPtr store, ZarrCodecChain::PreparedState::Ptr codec_state,
     ZarrDType dtype, internal::CachePool::WeakPtr data_cache_pool,
-    bool open_as_void, bool original_is_structured, DataType original_dtype)
+    bool open_as_void, bool original_is_structured, DataType original_dtype,
+    bool /*grid_has_void_dimension*/)
     : base_kvstore_(std::move(store)),
       codec_state_(std::move(codec_state)),
       dtype_(std::move(dtype)),
diff --git a/tensorstore/driver/zarr3/chunk_cache.h b/tensorstore/driver/zarr3/chunk_cache.h
index 58b1d4c68..6e5bacdb9 100644
--- a/tensorstore/driver/zarr3/chunk_cache.h
+++ b/tensorstore/driver/zarr3/chunk_cache.h
@@ -18,6 +18,8 @@
 #include <stddef.h>
 
 #include <memory>
+#include <numeric>
+#include <optional>
 #include <string>
 #include <utility>
 
@@ -161,7 +163,8 @@ class ZarrLeafChunkCache : public internal::KvsBackedChunkCache,
                               internal::CachePool::WeakPtr data_cache_pool,
                               bool open_as_void,
                               bool original_is_structured,
-                              DataType original_dtype);
+                              DataType original_dtype,
+                              bool grid_has_void_dimension = true);
 
   void Read(ZarrChunkCache::ReadRequest request,
             AnyFlowReceiver<absl::Status, internal::ReadChunk,
@@ -192,6 +195,7 @@ class ZarrLeafChunkCache : public internal::KvsBackedChunkCache,
   bool open_as_void_;
   bool original_is_structured_;
   DataType original_dtype_;  // Original dtype for void access encoding
+  bool grid_has_void_dimension_;  // Whether grid().chunk_shape includes bytes dim
 };
 
 /// Chunk cache for a Zarr array where each chunk is a shard.
@@ -205,7 +209,8 @@ class ZarrShardedChunkCache : public internal::Cache, public ZarrChunkCache {
                                  internal::CachePool::WeakPtr data_cache_pool,
                                  bool open_as_void,
                                  bool original_is_structured,
-                                 DataType original_dtype);
+                                 DataType original_dtype,
+                                 bool grid_has_void_dimension = true);
 
   const ZarrShardingCodec::PreparedState& sharding_codec_state() const {
     return static_cast<const ZarrShardingCodec::PreparedState&>(
@@ -278,9 +283,37 @@ class ZarrShardSubChunkCache : public ChunkCacheImpl {
                        ZarrCodecChain::PreparedState::Ptr(
                            sharding_state->sub_chunk_codec_state),
                        std::move(dtype), std::move(data_cache_pool),
-                       open_as_void, original_is_structured, original_dtype),
+                       open_as_void, original_is_structured, original_dtype,
+                       /*grid_has_void_dimension=*/false),
         sharding_state_(std::move(sharding_state)),
-        executor_(std::move(executor)) {}
+        executor_(std::move(executor)),
+        open_as_void_(open_as_void),
+        original_is_structured_(original_is_structured),
+        bytes_per_element_(dtype.bytes_per_outer_element) {
+    // For void access on non-structured types, create a modified grid
+    // with the bytes dimension added to the component shape.
+    // The grid's chunk_shape stays the same (determines cell layout).
+    if (open_as_void_ && !original_is_structured_) {
+      const auto& original_grid = *sharding_state_->sub_chunk_grid;
+      const auto& orig_comp = original_grid.components[0];
+      // Component chunk_shape gets bytes dimension, grid chunk_shape doesn't
+      std::vector<Index> void_comp_shape = orig_comp.chunk_shape;
+      void_comp_shape.push_back(bytes_per_element_);
+      // Create zero fill value with the void shape
+      auto fill_value = AllocateArray(void_comp_shape, c_order, value_init,
+                                       dtype_v<tensorstore::dtypes::byte_t>);
+      // chunked_to_cell_dimensions maps the grid dimensions to cell dimensions
+      // (the bytes dimension is unchunked, so not included here)
+      std::vector<DimensionIndex> chunked_to_cell(original_grid.chunk_shape.size());
+      std::iota(chunked_to_cell.begin(), chunked_to_cell.end(), 0);
+      internal::ChunkGridSpecification::ComponentList components;
+      components.emplace_back(
+          internal::AsyncWriteArray::Spec{std::move(fill_value),
+                                          Box<>(void_comp_shape.size())},
+          void_comp_shape, std::move(chunked_to_cell));
+      void_grid_.emplace(std::move(components));
+    }
+  }
 
   const internal::LexicographicalGridIndexKeyParser& GetChunkStorageKeyParser()
       override {
@@ -288,6 +321,9 @@ class ZarrShardSubChunkCache : public ChunkCacheImpl {
   }
 
   const internal::ChunkGridSpecification& grid() const override {
+    if (void_grid_) {
+      return *void_grid_;
+    }
     return *sharding_state_->sub_chunk_grid;
   }
   const Executor& executor() const override { return executor_; }
@@ -296,6 +332,10 @@ class ZarrShardSubChunkCache : public ChunkCacheImpl {
 
   ZarrShardingCodec::PreparedState::Ptr sharding_state_;
   Executor executor_;
+  bool open_as_void_;
+  bool original_is_structured_;
+  Index bytes_per_element_;
+  std::optional<internal::ChunkGridSpecification> void_grid_;
 };
 
 // Creates a `ZarrChunkCache` for the specified `codec_chain`.

From 2d7f34c90c0ac9fe27872667d3c42222056c4e31 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Mon, 26 Jan 2026 22:03:03 +0000
Subject: [PATCH 49/59] zarr3: Remove redundant member variables in
 ZarrShardSubChunkCache

The ZarrShardSubChunkCache template had duplicate member variables
(open_as_void_, original_is_structured_, bytes_per_element_) that
were already present in the base class ChunkCacheImpl (ZarrLeafChunkCache).

Access these through ChunkCacheImpl:: prefix instead to follow DRY
principle and maintain consistency with other TensorStore patterns.
---
 tensorstore/driver/zarr3/chunk_cache.h | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/tensorstore/driver/zarr3/chunk_cache.h b/tensorstore/driver/zarr3/chunk_cache.h
index 6e5bacdb9..f698ee232 100644
--- a/tensorstore/driver/zarr3/chunk_cache.h
+++ b/tensorstore/driver/zarr3/chunk_cache.h
@@ -286,19 +286,17 @@ class ZarrShardSubChunkCache : public ChunkCacheImpl {
                        open_as_void, original_is_structured, original_dtype,
                        /*grid_has_void_dimension=*/false),
         sharding_state_(std::move(sharding_state)),
-        executor_(std::move(executor)),
-        open_as_void_(open_as_void),
-        original_is_structured_(original_is_structured),
-        bytes_per_element_(dtype.bytes_per_outer_element) {
+        executor_(std::move(executor)) {
     // For void access on non-structured types, create a modified grid
     // with the bytes dimension added to the component shape.
     // The grid's chunk_shape stays the same (determines cell layout).
-    if (open_as_void_ && !original_is_structured_) {
+    if (ChunkCacheImpl::open_as_void_ &&
+        !ChunkCacheImpl::original_is_structured_) {
       const auto& original_grid = *sharding_state_->sub_chunk_grid;
       const auto& orig_comp = original_grid.components[0];
       // Component chunk_shape gets bytes dimension, grid chunk_shape doesn't
       std::vector<Index> void_comp_shape = orig_comp.chunk_shape;
-      void_comp_shape.push_back(bytes_per_element_);
+      void_comp_shape.push_back(ChunkCacheImpl::dtype_.bytes_per_outer_element);
       // Create zero fill value with the void shape
       auto fill_value = AllocateArray(void_comp_shape, c_order, value_init,
                                        dtype_v<tensorstore::dtypes::byte_t>);
@@ -332,9 +330,6 @@ class ZarrShardSubChunkCache : public ChunkCacheImpl {
 
   ZarrShardingCodec::PreparedState::Ptr sharding_state_;
   Executor executor_;
-  bool open_as_void_;
-  bool original_is_structured_;
-  Index bytes_per_element_;
   std::optional<internal::ChunkGridSpecification> void_grid_;
 };
 

From a20a68657f21fea90170db86089a3d2fe369b510 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 3 Feb 2026 17:16:16 +0000
Subject: [PATCH 50/59] Fix includes. Resolves:
 https://github.com/google/tensorstore/pull/271#discussion_r2757581279,
 https://github.com/google/tensorstore/pull/271#discussion_r2757585156,
 https://github.com/google/tensorstore/pull/271#discussion_r2757612952,
 https://github.com/google/tensorstore/pull/271#discussion_r2757620298,

---
 tensorstore/driver/zarr3/driver.cc   |  2 +-
 tensorstore/driver/zarr3/dtype.cc    | 12 ++++++++++++
 tensorstore/driver/zarr3/dtype.h     |  7 +++++++
 tensorstore/driver/zarr3/metadata.cc |  8 ++++----
 tensorstore/driver/zarr3/metadata.h  |  6 +++++-
 5 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index 61c573381..b55246a62 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -25,6 +25,7 @@
 #include <string_view>
 #include <utility>
 
+#include <nlohmann/json.hpp>
 #include "absl/status/status.h"
 #include "absl/strings/ascii.h"
 #include "absl/strings/cord.h"
@@ -32,7 +33,6 @@
 #include "absl/strings/str_cat.h"
 #include "absl/time/clock.h"
 #include "absl/time/time.h"
-#include <nlohmann/json.hpp>
 #include "tensorstore/array.h"
 #include "tensorstore/array_storage_statistics.h"
 #include "tensorstore/box.h"
diff --git a/tensorstore/driver/zarr3/dtype.cc b/tensorstore/driver/zarr3/dtype.cc
index b8aacaa68..965f2d63a 100644
--- a/tensorstore/driver/zarr3/dtype.cc
+++ b/tensorstore/driver/zarr3/dtype.cc
@@ -16,15 +16,27 @@
 
 #include <stddef.h>
 
+#include <algorithm>
+#include <limits>
+#include <optional>
 #include <string>
+#include <string_view>
+#include <vector>
 
+#include <nlohmann/json.hpp>
 #include "absl/base/optimization.h"
+#include "absl/status/status.h"
 #include "absl/strings/ascii.h"
+#include "absl/strings/numbers.h"
 #include "tensorstore/data_type.h"
+#include "tensorstore/index.h"
+#include "tensorstore/internal/integer_overflow.h"
 #include "tensorstore/internal/json_binding/json_binding.h"
 #include "tensorstore/util/endian.h"
 #include "tensorstore/util/extents.h"
 #include "tensorstore/util/quote_string.h"
+#include "tensorstore/util/result.h"
+#include "tensorstore/util/span.h"
 #include "tensorstore/util/str_cat.h"
 
 namespace tensorstore {
diff --git a/tensorstore/driver/zarr3/dtype.h b/tensorstore/driver/zarr3/dtype.h
index 73a6b0961..d9be43db0 100644
--- a/tensorstore/driver/zarr3/dtype.h
+++ b/tensorstore/driver/zarr3/dtype.h
@@ -20,9 +20,16 @@
 /// See: https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#data-type
 
 #include <optional>
+#include <string>
+#include <string_view>
+#include <vector>
+
 #include <nlohmann/json.hpp>
+#include "absl/status/status.h"
 #include "tensorstore/data_type.h"
+#include "tensorstore/index.h"
 #include "tensorstore/internal/json_binding/bindable.h"
+#include "tensorstore/json_serialization_options_base.h"
 #include "tensorstore/util/endian.h"
 #include "tensorstore/util/result.h"
 
diff --git a/tensorstore/driver/zarr3/metadata.cc b/tensorstore/driver/zarr3/metadata.cc
index ba4454de4..4f8146f10 100644
--- a/tensorstore/driver/zarr3/metadata.cc
+++ b/tensorstore/driver/zarr3/metadata.cc
@@ -20,6 +20,7 @@
 #include <array>
 #include <cassert>
 #include <charconv>
+#include <cstring>
 #include <limits>
 #include <memory>
 #include <numeric>
@@ -31,18 +32,16 @@
 #include <utility>
 #include <vector>
 
-#include <cstring>
-
+#include <nlohmann/json.hpp>
 #include "absl/algorithm/container.h"
-#include "absl/strings/escaping.h"
 #include "absl/base/casts.h"
 #include "absl/base/optimization.h"
 #include "absl/meta/type_traits.h"
 #include "absl/status/status.h"
+#include "absl/strings/escaping.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
 #include "absl/strings/str_join.h"
-#include <nlohmann/json.hpp>
 #include "tensorstore/array.h"
 #include "tensorstore/box.h"
 #include "tensorstore/chunk_layout.h"
@@ -78,6 +77,7 @@
 #include "tensorstore/serialization/fwd.h"
 #include "tensorstore/serialization/json_bindable.h"
 #include "tensorstore/util/constant_vector.h"
+#include "tensorstore/util/dimension_set.h"
 #include "tensorstore/util/iterate.h"
 #include "tensorstore/util/quote_string.h"
 #include "tensorstore/util/result.h"
diff --git a/tensorstore/driver/zarr3/metadata.h b/tensorstore/driver/zarr3/metadata.h
index d091dea22..88961cbb3 100644
--- a/tensorstore/driver/zarr3/metadata.h
+++ b/tensorstore/driver/zarr3/metadata.h
@@ -19,14 +19,18 @@
 /// Support for encoding/decoding the JSON metadata for zarr arrays
 /// See: https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#metadata
 
+#include <stddef.h>
+
+#include <array>
 #include <memory>
 #include <optional>
 #include <string>
+#include <string_view>
 #include <type_traits>
 #include <vector>
 
-#include "absl/status/status.h"
 #include <nlohmann/json.hpp>
+#include "absl/status/status.h"
 #include "tensorstore/array.h"
 #include "tensorstore/chunk_layout.h"
 #include "tensorstore/codec_spec.h"

From e9ac8286dc87dadd02a69eafa12d966a5739672e Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 3 Feb 2026 17:18:17 +0000
Subject: [PATCH 51/59] Fix indentation

---
 tensorstore/driver/zarr3/dtype.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorstore/driver/zarr3/dtype.h b/tensorstore/driver/zarr3/dtype.h
index d9be43db0..dc4c8e4f3 100644
--- a/tensorstore/driver/zarr3/dtype.h
+++ b/tensorstore/driver/zarr3/dtype.h
@@ -137,13 +137,13 @@ absl::Status ValidateDType(ZarrDType& dtype);
 /// unstructured scalar array, otherwise `std::nullopt`.
 std::optional<DataType> GetScalarDataType(const ZarrDType& dtype);
 
-  /// Parses a Zarr 3 data type string.
-  ///
-  /// \error `absl::StatusCode::kInvalidArgument` if `dtype` is not valid.
-  Result<ZarrDType::BaseDType> ParseBaseDType(std::string_view dtype);
+/// Parses a Zarr 3 data type string.
+///
+/// \error `absl::StatusCode::kInvalidArgument` if `dtype` is not valid.
+Result<ZarrDType::BaseDType> ParseBaseDType(std::string_view dtype);
 
-  /// Chooses a zarr data type corresponding to `dtype`.
-  Result<ZarrDType::BaseDType> ChooseBaseDType(DataType dtype);
+/// Chooses a zarr data type corresponding to `dtype`.
+Result<ZarrDType::BaseDType> ChooseBaseDType(DataType dtype);
 
 }  // namespace internal_zarr3
 }  // namespace tensorstore

From 858e40d8ea7cb02c041eb6eff4ff25f04d259cf7 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 3 Feb 2026 17:23:39 +0000
Subject: [PATCH 52/59] Fix imports. Resolves:
 https://github.com/google/tensorstore/pull/271#discussion_r2757654099,
 https://github.com/google/tensorstore/pull/271#discussion_r2757655270,
 https://github.com/google/tensorstore/pull/271#discussion_r2757659108

---
 tensorstore/driver/zarr3/chunk_cache.cc | 5 +++--
 tensorstore/driver/zarr3/chunk_cache.h  | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorstore/driver/zarr3/chunk_cache.cc b/tensorstore/driver/zarr3/chunk_cache.cc
index b60963288..16942b009 100644
--- a/tensorstore/driver/zarr3/chunk_cache.cc
+++ b/tensorstore/driver/zarr3/chunk_cache.cc
@@ -18,7 +18,6 @@
 
 #include <algorithm>
 #include <cassert>
-#include <cstddef>
 #include <cstring>
 #include <functional>
 #include <memory>
@@ -31,14 +30,16 @@
 #include "absl/time/time.h"
 #include "tensorstore/array.h"
 #include "tensorstore/array_storage_statistics.h"
-#include "tensorstore/contiguous_layout.h"
 #include "tensorstore/batch.h"
 #include "tensorstore/box.h"
+#include "tensorstore/contiguous_layout.h"
+#include "tensorstore/data_type.h"
 #include "tensorstore/driver/chunk.h"
 #include "tensorstore/driver/chunk_receiver_utils.h"
 #include "tensorstore/driver/read_request.h"
 #include "tensorstore/driver/write_request.h"
 #include "tensorstore/driver/zarr3/codec/codec.h"
+#include "tensorstore/driver/zarr3/dtype.h"
 #include "tensorstore/index.h"
 #include "tensorstore/index_interval.h"
 #include "tensorstore/index_space/index_transform.h"
diff --git a/tensorstore/driver/zarr3/chunk_cache.h b/tensorstore/driver/zarr3/chunk_cache.h
index f698ee232..b574652a9 100644
--- a/tensorstore/driver/zarr3/chunk_cache.h
+++ b/tensorstore/driver/zarr3/chunk_cache.h
@@ -29,6 +29,7 @@
 #include "absl/synchronization/mutex.h"
 #include "absl/time/time.h"
 #include "tensorstore/array.h"
+#include "tensorstore/data_type.h"
 #include "tensorstore/driver/chunk.h"
 #include "tensorstore/driver/read_request.h"
 #include "tensorstore/driver/write_request.h"

From be1ab7c14836fa282461d12cdca5bdc4e17cbe6f Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 3 Feb 2026 19:28:00 +0000
Subject: [PATCH 53/59] friend inline the equality and inequality operator
 overloads. Resolves:
 https://github.com/google/tensorstore/pull/271#discussion_r2757596957

---
 tensorstore/driver/zarr3/dtype.cc | 32 --------------------------
 tensorstore/driver/zarr3/dtype.h  | 38 +++++++++++++++++++++++--------
 2 files changed, 29 insertions(+), 41 deletions(-)

diff --git a/tensorstore/driver/zarr3/dtype.cc b/tensorstore/driver/zarr3/dtype.cc
index 965f2d63a..f48167357 100644
--- a/tensorstore/driver/zarr3/dtype.cc
+++ b/tensorstore/driver/zarr3/dtype.cc
@@ -296,38 +296,6 @@ Result<ZarrDType> ParseDType(const nlohmann::json& value) {
   return dtype;
 }
 
-bool operator==(const ZarrDType::BaseDType& a,
-                const ZarrDType::BaseDType& b) {
-  return a.encoded_dtype == b.encoded_dtype && a.dtype == b.dtype &&
-         a.flexible_shape == b.flexible_shape;
-}
-
-bool operator!=(const ZarrDType::BaseDType& a,
-                const ZarrDType::BaseDType& b) {
-  return !(a == b);
-}
-
-bool operator==(const ZarrDType::Field& a, const ZarrDType::Field& b) {
-  return static_cast<const ZarrDType::BaseDType&>(a) ==
-             static_cast<const ZarrDType::BaseDType&>(b) &&
-         a.outer_shape == b.outer_shape && a.name == b.name &&
-         a.field_shape == b.field_shape &&
-         a.num_inner_elements == b.num_inner_elements &&
-         a.byte_offset == b.byte_offset && a.num_bytes == b.num_bytes;
-}
-
-bool operator!=(const ZarrDType::Field& a, const ZarrDType::Field& b) {
-  return !(a == b);
-}
-
-bool operator==(const ZarrDType& a, const ZarrDType& b) {
-  return a.has_fields == b.has_fields &&
-         a.bytes_per_outer_element == b.bytes_per_outer_element &&
-         a.fields == b.fields;
-}
-
-bool operator!=(const ZarrDType& a, const ZarrDType& b) { return !(a == b); }
-
 void to_json(::nlohmann::json& out, const ZarrDType::Field& field) {
   using array_t = ::nlohmann::json::array_t;
   if (field.outer_shape.empty()) {
diff --git a/tensorstore/driver/zarr3/dtype.h b/tensorstore/driver/zarr3/dtype.h
index dc4c8e4f3..039aaf072 100644
--- a/tensorstore/driver/zarr3/dtype.h
+++ b/tensorstore/driver/zarr3/dtype.h
@@ -66,6 +66,14 @@ struct ZarrDType {
     /// For "flexible" data types that are themselves arrays, this specifies the
     /// shape.  For regular data types, this is empty.
     std::vector<Index> flexible_shape;
+
+    friend bool operator==(const BaseDType& a, const BaseDType& b) {
+      return a.encoded_dtype == b.encoded_dtype && a.dtype == b.dtype &&
+             a.flexible_shape == b.flexible_shape;
+    }
+    friend bool operator!=(const BaseDType& a, const BaseDType& b) {
+      return !(a == b);
+    }
   };
 
   /// Decoded representation of a single field.
@@ -92,6 +100,18 @@ struct ZarrDType {
     /// Number of bytes occupied by this field within an "outer" element
     /// (derived value).
     Index num_bytes;
+
+    friend bool operator==(const Field& a, const Field& b) {
+      return static_cast<const BaseDType&>(a) ==
+                 static_cast<const BaseDType&>(b) &&
+             a.outer_shape == b.outer_shape && a.name == b.name &&
+             a.field_shape == b.field_shape &&
+             a.num_inner_elements == b.num_inner_elements &&
+             a.byte_offset == b.byte_offset && a.num_bytes == b.num_bytes;
+    }
+    friend bool operator!=(const Field& a, const Field& b) {
+      return !(a == b);
+    }
   };
 
   /// Equal to `true` if the zarr "dtype" was specified as an array, in which
@@ -110,16 +130,16 @@ struct ZarrDType {
 
   friend void to_json(::nlohmann::json& out,  // NOLINT
                       const ZarrDType& dtype);
-};
 
-bool operator==(const ZarrDType::BaseDType& a,
-                const ZarrDType::BaseDType& b);
-bool operator!=(const ZarrDType::BaseDType& a,
-                const ZarrDType::BaseDType& b);
-bool operator==(const ZarrDType::Field& a, const ZarrDType::Field& b);
-bool operator!=(const ZarrDType::Field& a, const ZarrDType::Field& b);
-bool operator==(const ZarrDType& a, const ZarrDType& b);
-bool operator!=(const ZarrDType& a, const ZarrDType& b);
+  friend bool operator==(const ZarrDType& a, const ZarrDType& b) {
+    return a.has_fields == b.has_fields &&
+           a.bytes_per_outer_element == b.bytes_per_outer_element &&
+           a.fields == b.fields;
+  }
+  friend bool operator!=(const ZarrDType& a, const ZarrDType& b) {
+    return !(a == b);
+  }
+};
 
 /// Parses a zarr metadata "dtype" JSON specification.
 ///

From 24e16c254343428b849288eb1bcb6649a4b24d04 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 3 Feb 2026 19:51:16 +0000
Subject: [PATCH 54/59] Prefer absl::StrFormat over tensorstore::StrCat

---
 tensorstore/driver/zarr3/dtype.cc | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/tensorstore/driver/zarr3/dtype.cc b/tensorstore/driver/zarr3/dtype.cc
index f48167357..e9ba761f6 100644
--- a/tensorstore/driver/zarr3/dtype.cc
+++ b/tensorstore/driver/zarr3/dtype.cc
@@ -28,6 +28,7 @@
 #include "absl/status/status.h"
 #include "absl/strings/ascii.h"
 #include "absl/strings/numbers.h"
+#include "absl/strings/str_format.h"
 #include "tensorstore/data_type.h"
 #include "tensorstore/index.h"
 #include "tensorstore/internal/integer_overflow.h"
@@ -77,9 +78,10 @@ Result<ZarrDType::BaseDType> ParseBaseDType(std::string_view dtype) {
     if (!absl::SimpleAtoi(suffix, &num_bits) ||
         num_bits == 0 ||
         num_bits % 8 != 0) {
-      return absl::InvalidArgumentError(tensorstore::StrCat(
-          dtype, " data type is invalid; expected r<N> where N is a positive "
-                 "multiple of 8"));
+      return absl::InvalidArgumentError(absl::StrFormat(
+          "%s data type is invalid; expected r<N> where N is a positive "
+          "multiple of 8",
+          dtype));
     }
     Index num_bytes = num_bits / 8;
     return ZarrDType::BaseDType{std::string(dtype),
@@ -89,18 +91,18 @@ Result<ZarrDType::BaseDType> ParseBaseDType(std::string_view dtype) {
 
   // Handle bare "r" - must have a number after it
   if (dtype.size() >= 1 && dtype[0] == 'r') {
-    return absl::InvalidArgumentError(tensorstore::StrCat(
-        dtype, " data type is invalid; expected r<N> where N is a positive "
-               "multiple of 8"));
+    return absl::InvalidArgumentError(absl::StrFormat(
+        "%s data type is invalid; expected r<N> where N is a positive "
+        "multiple of 8",
+        dtype));
   }
 
   constexpr std::string_view kSupported =
       "bool, uint8, uint16, uint32, uint64, int8, int16, int32, int64, "
       "bfloat16, float16, float32, float64, complex64, complex128, r<N>";
-  return absl::InvalidArgumentError(
-      tensorstore::StrCat(dtype, " data type is not one of the supported "
-                                 "data types: ",
-                          kSupported));
+  return absl::InvalidArgumentError(absl::StrFormat(
+      "%s data type is not one of the supported data types: %s", dtype,
+      kSupported));
 }
 
 namespace {

From f9c675057e4257654c3a0dfdf0c66cbc463c10a0 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 3 Feb 2026 20:46:38 +0000
Subject: [PATCH 55/59] Add return type annotation to lambdas. Resolves:
 https://github.com/google/tensorstore/pull/271#discussion_r2757673423

---
 tensorstore/driver/zarr3/driver.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index b55246a62..b9c39cc97 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -118,7 +118,7 @@ class ZarrDriverSpec
 
   static inline const auto default_json_binder = jb::Sequence(
       jb::Validate(
-          [](const auto& options, auto* obj) {
+          [](const auto& options, auto* obj) -> absl::Status {
             if (obj->schema.dtype().valid()) {
               return ValidateDataType(obj->schema.dtype());
             }
@@ -128,7 +128,7 @@ class ZarrDriverSpec
       jb::Member(
           "metadata",
           jb::Validate(
-              [](const auto& options, auto* obj) {
+              [](const auto& options, auto* obj) -> absl::Status {
                 if (obj->metadata_constraints.data_type) {
                   if (auto dtype = GetScalarDataType(
                           *obj->metadata_constraints.data_type)) {
@@ -153,7 +153,7 @@ class ZarrDriverSpec
       jb::Member("open_as_void", jb::Projection<&ZarrDriverSpec::open_as_void>(
                   jb::DefaultValue<jb::kNeverIncludeDefaults>(
                       [](auto* v) { *v = false; }))),
-      jb::Initialize([](auto* obj) {
+      jb::Initialize([](auto* obj) -> absl::Status {
         // Validate that field and open_as_void are mutually exclusive
         if (obj->open_as_void && !obj->selected_field.empty()) {
           return absl::InvalidArgumentError(

From 6a773e27badcc64ba6bc3883e2622d741f739ae0 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Tue, 3 Feb 2026 21:15:14 +0000
Subject: [PATCH 56/59] Update key generation to handle grid indices safely by
 ensuring the subspan does not exceed the grid size. This prevents potential
 out-of-bounds access when generating keys. Resolves:
 https://github.com/google/tensorstore/pull/271#issuecomment-3839662461

---
 tensorstore/driver/zarr3/driver.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index b9c39cc97..2255abb3c 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -557,7 +557,9 @@ class DataCacheBase
         [](std::string& out, DimensionIndex dim, Index grid_index) {
           absl::StrAppend(&out, grid_index);
         },
-        rank, grid_indices.subspan(0, rank));
+        rank,
+        grid_indices.subspan(
+            0, std::min<ptrdiff_t>(grid_indices.size(), rank)));
     return key;
   }
 

From 6028b5b7dbc77c63bf7d757ac146a149ecf38983 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Wed, 4 Feb 2026 15:17:47 +0000
Subject: [PATCH 57/59] Prefer `empty()` over `size()` Resolves:
 https://github.com/google/tensorstore/pull/271#discussion_r2761074528,
 https://github.com/google/tensorstore/pull/271#discussion_r2761076335

---
 tensorstore/driver/zarr3/driver.cc | 2 +-
 tensorstore/driver/zarr3/dtype.cc  | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index 2255abb3c..bbb5f29f3 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -998,7 +998,7 @@ class ZarrDriver::OpenState : public ZarrDriver::OpenStateBase {
 
     // Get the original dtype for void access encoding (needed by EncodeChunk).
     // For non-structured types, this is the single field's dtype.
-    DataType original_dtype = metadata.data_type.fields.size() > 0
+    DataType original_dtype = !metadata.data_type.fields.empty()
                                   ? metadata.data_type.fields[0].dtype
                                   : DataType{};
 
diff --git a/tensorstore/driver/zarr3/dtype.cc b/tensorstore/driver/zarr3/dtype.cc
index e9ba761f6..b48e8bc33 100644
--- a/tensorstore/driver/zarr3/dtype.cc
+++ b/tensorstore/driver/zarr3/dtype.cc
@@ -72,7 +72,8 @@ Result<ZarrDType::BaseDType> ParseBaseDType(std::string_view dtype) {
     return make_dtype(dtype_v<::tensorstore::dtypes::complex128_t>);
 
   // Handle r<N> raw bits type where N is number of bits (must be multiple of 8)
-  if (dtype.size() > 1 && dtype[0] == 'r' && absl::ascii_isdigit(dtype[1])) {
+  if (!dtype.empty() && dtype[0] == 'r' && dtype.size() > 1 &&
+      absl::ascii_isdigit(dtype[1])) {
     std::string_view suffix = dtype.substr(1);
     Index num_bits = 0;
     if (!absl::SimpleAtoi(suffix, &num_bits) ||
@@ -90,7 +91,7 @@ Result<ZarrDType::BaseDType> ParseBaseDType(std::string_view dtype) {
   }
 
   // Handle bare "r" - must have a number after it
-  if (dtype.size() >= 1 && dtype[0] == 'r') {
+  if (!dtype.empty() && dtype[0] == 'r') {
     return absl::InvalidArgumentError(absl::StrFormat(
         "%s data type is invalid; expected r<N> where N is a positive "
         "multiple of 8",

From 0cb06370de34ec2c644a32005cb4f7b45b460b2a Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Fri, 6 Feb 2026 15:54:32 +0000
Subject: [PATCH 58/59] Refactor `tensorstore::StrCat` to `absl::StrFormat`.
 Resolves
 https://github.com/google/tensorstore/pull/271#discussion_r2761080035 with
 TODO question pending

---
 tensorstore/driver/zarr3/codec/blosc.cc       |  6 +-
 tensorstore/driver/zarr3/codec/bytes.cc       | 21 ++++--
 .../driver/zarr3/codec/codec_chain_spec.cc    | 12 ++--
 .../driver/zarr3/codec/sharding_indexed.cc    |  4 ++
 tensorstore/driver/zarr3/codec/transpose.cc   |  5 ++
 tensorstore/driver/zarr3/driver.cc            |  7 +-
 tensorstore/driver/zarr3/dtype.cc             | 23 +++---
 tensorstore/driver/zarr3/metadata.cc          | 72 ++++++++++---------
 8 files changed, 86 insertions(+), 64 deletions(-)

diff --git a/tensorstore/driver/zarr3/codec/blosc.cc b/tensorstore/driver/zarr3/codec/blosc.cc
index ea8718d85..b11677411 100644
--- a/tensorstore/driver/zarr3/codec/blosc.cc
+++ b/tensorstore/driver/zarr3/codec/blosc.cc
@@ -160,9 +160,9 @@ constexpr auto CodecBinder() {
   return jb::Validate([](const auto& options, std::string* cname) {
     if (cname->find('\0') != std::string::npos ||
         blosc_compname_to_compcode(cname->c_str()) == -1) {
-      return absl::InvalidArgumentError(
-          tensorstore::StrCat("Expected one of ", blosc_list_compressors(),
-                              " but received: ", QuoteString(*cname)));
+      return absl::InvalidArgumentError(absl::StrFormat(
+          "Expected one of %s but received: %s", blosc_list_compressors(),
+          QuoteString(*cname)));
     }
     return absl::OkStatus();
   });
diff --git a/tensorstore/driver/zarr3/codec/bytes.cc b/tensorstore/driver/zarr3/codec/bytes.cc
index cb3c62934..c8c4de059 100644
--- a/tensorstore/driver/zarr3/codec/bytes.cc
+++ b/tensorstore/driver/zarr3/codec/bytes.cc
@@ -22,6 +22,7 @@
 #include <utility>
 
 #include "absl/status/status.h"
+#include "absl/strings/str_format.h"
 #include "riegeli/bytes/reader.h"
 #include "riegeli/bytes/writer.h"
 #include "tensorstore/array.h"
@@ -52,8 +53,8 @@ namespace internal_zarr3 {
 
 namespace {
 absl::Status InvalidDataTypeError(DataType dtype) {
-  return absl::InvalidArgumentError(tensorstore::StrCat(
-      "Data type ", dtype, " not compatible with \"bytes\" codec"));
+  return absl::InvalidArgumentError(absl::StrFormat(
+      "Data type %v not compatible with \"bytes\" codec", dtype));
 }
 
 class BytesCodec : public ZarrArrayToBytesCodec {
@@ -118,23 +119,27 @@ Result<ZarrArrayToBytesCodec::Ptr> BytesCodecSpec::Resolve(
   const bool is_endian_invariant =
       internal::IsEndianInvariantDataType(decoded.dtype);
   if (!options.constraints && !is_endian_invariant && !options.endianness) {
-    return absl::InvalidArgumentError(
-        tensorstore::StrCat("\"bytes\" codec requires that \"endian\" option "
-                            "is specified for data type ",
-                            decoded.dtype));
+    return absl::InvalidArgumentError(absl::StrFormat(
+        "\"bytes\" codec requires that \"endian\" option is specified for "
+        "data type %v",
+        decoded.dtype));
   }
   encoded.item_bits = decoded.dtype.size() * 8;
   DimensionIndex rank = decoded.rank;
   if (decoded.codec_chunk_shape) {
+    // TODO(BrianMichell): Convert to absl::StrFormat once tensorstore::span has
+    // AbslStringify support, allowing use of %v format specifier.
     return absl::InvalidArgumentError(tensorstore::StrCat(
         "\"bytes\" codec does not support codec_chunk_shape (",
         span<const Index>(decoded.codec_chunk_shape->data(), rank),
-        " was specified"));
+        " was specified)"));
   }
   if (decoded.inner_order) {
     auto& decoded_inner_order = *decoded.inner_order;
     for (DimensionIndex i = 0; i < rank; ++i) {
       if (decoded_inner_order[i] != i) {
+        // TODO(BrianMichell): Convert to absl::StrFormat once tensorstore::span has
+        // AbslStringify support, allowing use of %v format specifier.
         return absl::InvalidArgumentError(tensorstore::StrCat(
             "\"bytes\" codec does not support inner_order of ",
             span<const DimensionIndex>(decoded_inner_order.data(), rank)));
@@ -206,6 +211,8 @@ Result<ZarrArrayToBytesCodec::PreparedState::Ptr> BytesCodec::Prepare(
   int64_t bytes = dtype_.size();
   for (auto size : decoded_shape) {
     if (internal::MulOverflow(size, bytes, &bytes)) {
+      // TODO(BrianMichell): Convert to absl::StrFormat once tensorstore::span has
+      // AbslStringify support, allowing use of %v format specifier.
       return absl::OutOfRangeError(tensorstore::StrCat(
           "Integer overflow computing encoded size of array of shape ",
           decoded_shape));
diff --git a/tensorstore/driver/zarr3/codec/codec_chain_spec.cc b/tensorstore/driver/zarr3/codec/codec_chain_spec.cc
index c3bacd6cd..7dc9b6ced 100644
--- a/tensorstore/driver/zarr3/codec/codec_chain_spec.cc
+++ b/tensorstore/driver/zarr3/codec/codec_chain_spec.cc
@@ -132,8 +132,8 @@ constexpr auto ZarrCodecChainSpecJsonBinderImpl = jb::Compose<
         }
         for (; it != end; ++it) {
           if ((*it)->kind() != ZarrCodecKind::kBytesToBytes) {
-            return absl::InvalidArgumentError(tensorstore::StrCat(
-                "Expected bytes -> bytes codec, but received: ",
+            return absl::InvalidArgumentError(absl::StrFormat(
+                "Expected bytes -> bytes codec, but received: %s",
                 jb::ToJson(*it, ZarrCodecJsonBinder).value().dump()));
           }
           obj->bytes_to_bytes.push_back(
@@ -164,16 +164,16 @@ Result<ZarrArrayToBytesCodecSpec::Ptr> GetDefaultArrayToBytesCodecSpec(
   if (internal::IsTrivialDataType(decoded.dtype)) {
     return DefaultBytesCodec();
   }
-  return absl::InternalError(tensorstore::StrCat(
-      "No default codec defined for data type ", decoded.dtype));
+  return absl::InternalError(absl::StrFormat(
+      "No default codec defined for data type %v", decoded.dtype));
 }
 
 absl::Status CodecResolveError(const ZarrCodecSpec& codec_spec,
                                std::string_view message,
                                const absl::Status& status) {
   return tensorstore::MaybeAnnotateStatus(
-      status, tensorstore::StrCat(
-                  "Error ", message, " through ",
+      status, absl::StrFormat(
+                  "Error %s through %s", message,
                   jb::ToJson(&codec_spec, ZarrCodecJsonBinder).value().dump()));
 }
 }  // namespace
diff --git a/tensorstore/driver/zarr3/codec/sharding_indexed.cc b/tensorstore/driver/zarr3/codec/sharding_indexed.cc
index 3f11298c2..2700f9887 100644
--- a/tensorstore/driver/zarr3/codec/sharding_indexed.cc
+++ b/tensorstore/driver/zarr3/codec/sharding_indexed.cc
@@ -60,6 +60,8 @@
 namespace tensorstore {
 namespace internal_zarr3 {
 
+// TODO(BrianMichell): Convert to absl::StrFormat once tensorstore::span has
+// AbslStringify support, allowing use of %v format specifier.
 absl::Status SubChunkRankMismatch(span<const Index> sub_chunk_shape,
                                   DimensionIndex outer_rank) {
   return absl::InvalidArgumentError(tensorstore::StrCat(
@@ -67,6 +69,8 @@ absl::Status SubChunkRankMismatch(span<const Index> sub_chunk_shape,
       " is not compatible with array of rank ", outer_rank));
 }
 
+// TODO(BrianMichell): Convert to absl::StrFormat once tensorstore::span has
+// AbslStringify support, allowing use of %v format specifier.
 absl::Status SubChunkShapeMismatch(span<const Index> sub_chunk_shape,
                                    span<const Index> chunk_shape) {
   return absl::InvalidArgumentError(tensorstore::StrCat(
diff --git a/tensorstore/driver/zarr3/codec/transpose.cc b/tensorstore/driver/zarr3/codec/transpose.cc
index f52609c9b..13e05ae17 100644
--- a/tensorstore/driver/zarr3/codec/transpose.cc
+++ b/tensorstore/driver/zarr3/codec/transpose.cc
@@ -50,6 +50,9 @@ namespace internal_zarr3 {
 
 namespace {
 namespace jb = ::tensorstore::internal_json_binding;
+
+// TODO(BrianMichell): Convert to absl::StrFormat once tensorstore::span has
+// AbslStringify support, allowing use of %v format specifier.
 absl::Status InvalidPermutationError(span<const DimensionIndex> order,
                                      DimensionIndex rank) {
   return absl::InvalidArgumentError(tensorstore::StrCat(
@@ -62,6 +65,8 @@ constexpr auto OrderJsonBinder() {
       jb::Validate(
           [](const auto& options, auto* obj) {
             if (!IsValidPermutation(*obj)) {
+              // TODO(BrianMichell): Convert to absl::StrFormat once
+              // tensorstore::span has AbslStringify support.
               return absl::InvalidArgumentError(
                   tensorstore::StrCat(span<const DimensionIndex>(*obj),
                                       " is not a valid permutation"));
diff --git a/tensorstore/driver/zarr3/driver.cc b/tensorstore/driver/zarr3/driver.cc
index bbb5f29f3..36ea61e20 100644
--- a/tensorstore/driver/zarr3/driver.cc
+++ b/tensorstore/driver/zarr3/driver.cc
@@ -31,6 +31,7 @@
 #include "absl/strings/cord.h"
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
 #include "absl/time/clock.h"
 #include "absl/time/time.h"
 #include "tensorstore/array.h"
@@ -385,9 +386,9 @@ class DataCacheBase
     auto existing_key = existing_metadata.GetCompatibilityKey();
     auto new_key = new_metadata.GetCompatibilityKey();
     if (existing_key == new_key) return absl::OkStatus();
-    return absl::FailedPreconditionError(tensorstore::StrCat(
-        "Updated zarr metadata ", new_key,
-        " is incompatible with existing metadata ", existing_key));
+    return absl::FailedPreconditionError(absl::StrFormat(
+        "Updated zarr metadata %s is incompatible with existing metadata %s",
+        new_key, existing_key));
   }
 
   void GetChunkGridBounds(const void* metadata_ptr, MutableBoxView<> bounds,
diff --git a/tensorstore/driver/zarr3/dtype.cc b/tensorstore/driver/zarr3/dtype.cc
index b48e8bc33..31fb3644d 100644
--- a/tensorstore/driver/zarr3/dtype.cc
+++ b/tensorstore/driver/zarr3/dtype.cc
@@ -132,8 +132,9 @@ absl::Status ParseFieldsArray(const nlohmann::json& fields_json,
             x,
             [&](ptrdiff_t size) {
               if (size < 2 || size > 3) {
-                return absl::InvalidArgumentError(tensorstore::StrCat(
-                    "Expected array of size 2 or 3, but received: ", x.dump()));
+                return absl::InvalidArgumentError(absl::StrFormat(
+                    "Expected array of size 2 or 3, but received: %s",
+                    x.dump()));
               }
               return absl::OkStatus();
             },
@@ -143,8 +144,8 @@ absl::Status ParseFieldsArray(const nlohmann::json& fields_json,
                   if (internal_json::JsonRequireValueAs(v, &field.name).ok()) {
                     if (!field.name.empty()) return absl::OkStatus();
                   }
-                  return absl::InvalidArgumentError(tensorstore::StrCat(
-                      "Expected non-empty string, but received: ", v.dump()));
+                  return absl::InvalidArgumentError(absl::StrFormat(
+                      "Expected non-empty string, but received: %s", v.dump()));
                 case 1: {
                   std::string dtype_string;
                   TENSORSTORE_RETURN_IF_ERROR(
@@ -238,9 +239,9 @@ Result<ZarrDType> ParseDTypeNoDerived(const nlohmann::json& value) {
           ParseBaseDType(type_name));
       return out;
     }
-    return absl::InvalidArgumentError(tensorstore::StrCat(
+    return absl::InvalidArgumentError(absl::StrFormat(
         "Expected string, array, or object with 'name' and 'configuration', "
-        "but received: ",
+        "but received: %s",
         value.dump()));
   }
   // Handle array format: [["field1", "type1"], ["field2", "type2"], ...]
@@ -257,17 +258,19 @@ absl::Status ValidateDType(ZarrDType& dtype) {
     if (std::any_of(
             dtype.fields.begin(), dtype.fields.begin() + field_i,
             [&](const ZarrDType::Field& f) { return f.name == field.name; })) {
-      return absl::InvalidArgumentError(tensorstore::StrCat(
-          "Field name ", QuoteString(field.name), " occurs more than once"));
+      return absl::InvalidArgumentError(absl::StrFormat(
+          "Field name %s occurs more than once", QuoteString(field.name)));
     }
     field.field_shape.resize(field.flexible_shape.size() +
-                             field.outer_shape.size());
+                             field.outer_shape.size());ß
     std::copy(field.flexible_shape.begin(), field.flexible_shape.end(),
               std::copy(field.outer_shape.begin(), field.outer_shape.end(),
                         field.field_shape.begin()));
 
     field.num_inner_elements = ProductOfExtents(span(field.field_shape));
     if (field.num_inner_elements == std::numeric_limits<Index>::max()) {
+      // TODO(BrianMichell): Convert to absl::StrFormat once tensorstore::span has
+      // AbslStringify support, allowing use of %v format specifier.
       return absl::InvalidArgumentError(tensorstore::StrCat(
           "Product of dimensions ", span(field.field_shape), " is too large"));
     }
@@ -378,7 +381,7 @@ Result<ZarrDType::BaseDType> ChooseBaseDType(DataType dtype) {
     return base_dtype;
   }
   return absl::InvalidArgumentError(
-      tensorstore::StrCat("Data type not supported: ", dtype));
+      absl::StrFormat("Data type not supported: %v", dtype));
 }
 
 }  // namespace internal_zarr3
diff --git a/tensorstore/driver/zarr3/metadata.cc b/tensorstore/driver/zarr3/metadata.cc
index 4f8146f10..29652f911 100644
--- a/tensorstore/driver/zarr3/metadata.cc
+++ b/tensorstore/driver/zarr3/metadata.cc
@@ -116,9 +116,9 @@ std::string GetSupportedDataTypes() {
 
 absl::Status ValidateDataType(DataType dtype) {
   if (!absl::c_linear_search(kSupportedDataTypes, dtype.id())) {
-    return absl::InvalidArgumentError(tensorstore::StrCat(
-        dtype, " data type is not one of the supported data types: ",
-        GetSupportedDataTypes()));
+    return absl::InvalidArgumentError(absl::StrFormat(
+        "%v data type is not one of the supported data types: %s",
+        dtype, GetSupportedDataTypes()));
   }
   return absl::OkStatus();
 }
@@ -296,17 +296,17 @@ absl::Status FillValueJsonBinder::operator()(
       }
       std::string b64_decoded;
       if (!absl::Base64Unescape(j->get<std::string>(), &b64_decoded)) {
-        return absl::InvalidArgumentError(tensorstore::StrCat(
-            "Expected valid base64-encoded fill value, but received: ",
+        return absl::InvalidArgumentError(absl::StrFormat(
+            "Expected valid base64-encoded fill value, but received: %s",
             j->dump()));
       }
       // Verify size matches expected byte array size
       Index expected_size = dtype.fields[0].num_inner_elements;
       if (static_cast<Index>(b64_decoded.size()) != expected_size) {
-        return absl::InvalidArgumentError(tensorstore::StrCat(
-            "Expected ", expected_size,
-            " base64-encoded bytes for fill_value, but received ",
-            b64_decoded.size(), " bytes"));
+        return absl::InvalidArgumentError(absl::StrFormat(
+            "Expected %d base64-encoded bytes for fill_value, but received "
+            "%d bytes",
+            expected_size, b64_decoded.size()));
       }
       // Create fill value array
       auto fill_arr = AllocateArray(dtype.fields[0].field_shape, c_order,
@@ -323,17 +323,17 @@ absl::Status FillValueJsonBinder::operator()(
       // Decode base64-encoded fill value for entire struct
       std::string b64_decoded;
       if (!absl::Base64Unescape(j->get<std::string>(), &b64_decoded)) {
-        return absl::InvalidArgumentError(tensorstore::StrCat(
-            "Expected valid base64-encoded fill value, but received: ",
+        return absl::InvalidArgumentError(absl::StrFormat(
+            "Expected valid base64-encoded fill value, but received: %s",
             j->dump()));
       }
       // Verify size matches expected struct size
       if (static_cast<Index>(b64_decoded.size()) !=
           dtype.bytes_per_outer_element) {
-        return absl::InvalidArgumentError(tensorstore::StrCat(
-            "Expected ", dtype.bytes_per_outer_element,
-            " base64-encoded bytes for fill_value, but received ",
-            b64_decoded.size(), " bytes"));
+        return absl::InvalidArgumentError(absl::StrFormat(
+            "Expected %d base64-encoded bytes for fill_value, but received "
+            "%d bytes",
+            dtype.bytes_per_outer_element, b64_decoded.size()));
       }
       // Extract per-field fill values from decoded bytes
       for (size_t i = 0; i < dtype.fields.size(); ++i) {
@@ -347,7 +347,7 @@ absl::Status FillValueJsonBinder::operator()(
     } else if (j->is_array()) {
       if (j->size() != dtype.fields.size()) {
         return internal_json::ExpectedError(
-            *j, tensorstore::StrCat("array of size ", dtype.fields.size()));
+            *j, absl::StrFormat("array of size %d", dtype.fields.size()));
       }
       for (size_t i = 0; i < dtype.fields.size(); ++i) {
         TENSORSTORE_RETURN_IF_ERROR(
@@ -480,9 +480,10 @@ constexpr auto UnknownExtensionAttributesJsonBinder =
             continue;
           }
         }
-        return absl::InvalidArgumentError(tensorstore::StrCat(
-            "Unsupported metadata field ", tensorstore::QuoteString(key),
-            " is not marked {\"must_understand\": false}"));
+        return absl::InvalidArgumentError(absl::StrFormat(
+            "Unsupported metadata field %s is not marked "
+            "{\"must_understand\": false}",
+            tensorstore::QuoteString(key)));
       }
       return absl::OkStatus();
     });
@@ -813,23 +814,23 @@ Result<size_t> GetFieldIndex(const ZarrDType& dtype,
 
   if (selected_field.empty()) {
     if (dtype.fields.size() != 1) {
-      return absl::FailedPreconditionError(tensorstore::StrCat(
-          "Must specify a \"field\" that is one of: ", GetFieldNames(dtype)));
+      return absl::FailedPreconditionError(absl::StrFormat(
+          "Must specify a \"field\" that is one of: %s", GetFieldNames(dtype)));
     }
     return 0;
   }
   if (!dtype.has_fields) {
-    return absl::FailedPreconditionError(
-        tensorstore::StrCat("Requested field ", QuoteString(selected_field),
-                            " but dtype does not have named fields"));
+    return absl::FailedPreconditionError(absl::StrFormat(
+        "Requested field %s but dtype does not have named fields",
+        QuoteString(selected_field)));
   }
   for (size_t field_index = 0; field_index < dtype.fields.size();
        ++field_index) {
     if (dtype.fields[field_index].name == selected_field) return field_index;
   }
-  return absl::FailedPreconditionError(
-      tensorstore::StrCat("Requested field ", QuoteString(selected_field),
-                          " is not one of: ", GetFieldNames(dtype)));
+  return absl::FailedPreconditionError(absl::StrFormat(
+      "Requested field %s is not one of: %s", QuoteString(selected_field),
+      GetFieldNames(dtype)));
 }
 
 SpecRankAndFieldInfo GetSpecRankAndFieldInfo(const ZarrMetadata& metadata,
@@ -1056,10 +1057,10 @@ absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
   const auto& field = metadata.data_type.fields[field_index];
 
   if (!RankConstraint::EqualOrUnspecified(schema.rank(), info.chunked_rank)) {
-    return absl::FailedPreconditionError(tensorstore::StrCat(
-        "Rank specified by schema (", schema.rank(),
-        ") does not match rank specified by metadata (", info.chunked_rank,
-        ")"));
+    return absl::FailedPreconditionError(absl::StrFormat(
+        "Rank specified by schema (%d) does not match rank specified by "
+        "metadata (%d)",
+        schema.rank(), info.chunked_rank));
   }
 
   if (schema.domain().valid()) {
@@ -1075,9 +1076,9 @@ absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
 
   if (auto dtype = schema.dtype();
       !IsPossiblySameDataType(field.dtype, dtype)) {
-    return absl::FailedPreconditionError(
-        tensorstore::StrCat("data_type from metadata (", field.dtype,
-                            ") does not match dtype in schema (", dtype, ")"));
+    return absl::FailedPreconditionError(absl::StrFormat(
+        "data_type from metadata (%v) does not match dtype in schema (%v)",
+        field.dtype, dtype));
   }
 
   if (schema.chunk_layout().rank() != dynamic_rank) {
@@ -1103,7 +1104,8 @@ absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
                               skip_repeated_elements, field.dtype));
     if (!AreArraysIdenticallyEqual(converted_fill_value, fill_value)) {
       auto binder = FillValueJsonBinder{metadata.data_type};
-      // Error message generation might be tricky with binder
+      // TODO(BrianMichellß): Convert to absl::StrFormat once SharedArray has
+      // AbslStringify support, allowing use of %v format specifier.
       return absl::FailedPreconditionError(tensorstore::StrCat(
           "Invalid fill_value: schema requires fill value of ",
           schema_fill_value, ", but metadata specifies fill value of ",

From 6ba070f9fd9e8b2b16f9fffdffb07427b760eb25 Mon Sep 17 00:00:00 2001
From: BrianMichell <brianm314@comcast.net>
Date: Fri, 6 Feb 2026 17:26:08 +0000
Subject: [PATCH 59/59] Use stringify for spans as well. Resolves:
 https://github.com/google/tensorstore/pull/271/#discussion_r2774836435

---
 tensorstore/driver/zarr3/codec/bytes.cc       | 27 ++++++++-----------
 .../driver/zarr3/codec/sharding_indexed.cc    | 20 +++++++-------
 tensorstore/driver/zarr3/dtype.cc             | 11 ++++----
 tensorstore/driver/zarr3/metadata.cc          | 18 ++++++-------
 4 files changed, 34 insertions(+), 42 deletions(-)

diff --git a/tensorstore/driver/zarr3/codec/bytes.cc b/tensorstore/driver/zarr3/codec/bytes.cc
index abc3f8909..95633f446 100644
--- a/tensorstore/driver/zarr3/codec/bytes.cc
+++ b/tensorstore/driver/zarr3/codec/bytes.cc
@@ -127,22 +127,19 @@ Result<ZarrArrayToBytesCodec::Ptr> BytesCodecSpec::Resolve(
   encoded.item_bits = decoded.dtype.size() * 8;
   DimensionIndex rank = decoded.rank;
   if (decoded.codec_chunk_shape) {
-    // TODO(BrianMichell): Convert to absl::StrFormat once tensorstore::span has
-    // AbslStringify support, allowing use of %v format specifier.
-    return absl::InvalidArgumentError(tensorstore::StrCat(
-        "\"bytes\" codec does not support codec_chunk_shape (",
-        span<const Index>(decoded.codec_chunk_shape->data(), rank),
-        " was specified)"));
+    return absl::InvalidArgumentError(absl::StrFormat(
+        "\"bytes\" codec does not support codec_chunk_shape (%s was specified)",
+        absl::FormatStreamed(
+            span<const Index>(decoded.codec_chunk_shape->data(), rank))));
   }
   if (decoded.inner_order) {
     auto& decoded_inner_order = *decoded.inner_order;
     for (DimensionIndex i = 0; i < rank; ++i) {
       if (decoded_inner_order[i] != i) {
-        // TODO(BrianMichell): Convert to absl::StrFormat once tensorstore::span has
-        // AbslStringify support, allowing use of %v format specifier.
-        return absl::InvalidArgumentError(tensorstore::StrCat(
-            "\"bytes\" codec does not support inner_order of ",
-            span<const DimensionIndex>(decoded_inner_order.data(), rank)));
+        return absl::InvalidArgumentError(absl::StrFormat(
+            "\"bytes\" codec does not support inner_order of %s",
+            absl::FormatStreamed(
+                span<const DimensionIndex>(decoded_inner_order.data(), rank))));
       }
     }
   }
@@ -211,11 +208,9 @@ Result<ZarrArrayToBytesCodec::PreparedState::Ptr> BytesCodec::Prepare(
   int64_t bytes = dtype_.size();
   for (auto size : decoded_shape) {
     if (internal::MulOverflow(size, bytes, &bytes)) {
-      // TODO(BrianMichell): Convert to absl::StrFormat once tensorstore::span has
-      // AbslStringify support, allowing use of %v format specifier.
-      return absl::OutOfRangeError(tensorstore::StrCat(
-          "Integer overflow computing encoded size of array of shape ",
-          decoded_shape));
+      return absl::OutOfRangeError(absl::StrFormat(
+          "Integer overflow computing encoded size of array of shape %s",
+          absl::FormatStreamed(decoded_shape)));
     }
   }
   auto state = internal::MakeIntrusivePtr<BytesCodecPreparedState>();
diff --git a/tensorstore/driver/zarr3/codec/sharding_indexed.cc b/tensorstore/driver/zarr3/codec/sharding_indexed.cc
index 2700f9887..453a9168d 100644
--- a/tensorstore/driver/zarr3/codec/sharding_indexed.cc
+++ b/tensorstore/driver/zarr3/codec/sharding_indexed.cc
@@ -24,6 +24,7 @@
 #include <vector>
 
 #include "absl/status/status.h"
+#include "absl/strings/str_format.h"
 #include "riegeli/bytes/reader.h"
 #include "riegeli/bytes/writer.h"
 #include "tensorstore/array.h"
@@ -60,22 +61,21 @@
 namespace tensorstore {
 namespace internal_zarr3 {
 
-// TODO(BrianMichell): Convert to absl::StrFormat once tensorstore::span has
-// AbslStringify support, allowing use of %v format specifier.
 absl::Status SubChunkRankMismatch(span<const Index> sub_chunk_shape,
                                   DimensionIndex outer_rank) {
-  return absl::InvalidArgumentError(tensorstore::StrCat(
-      "sharding_indexed sub-chunk shape of ", sub_chunk_shape,
-      " is not compatible with array of rank ", outer_rank));
+  return absl::InvalidArgumentError(absl::StrFormat(
+      "sharding_indexed sub-chunk shape of %s is not compatible with array of "
+      "rank %d",
+      absl::FormatStreamed(sub_chunk_shape), outer_rank));
 }
 
-// TODO(BrianMichell): Convert to absl::StrFormat once tensorstore::span has
-// AbslStringify support, allowing use of %v format specifier.
 absl::Status SubChunkShapeMismatch(span<const Index> sub_chunk_shape,
                                    span<const Index> chunk_shape) {
-  return absl::InvalidArgumentError(tensorstore::StrCat(
-      "sharding_indexed sub-chunk shape of ", sub_chunk_shape,
-      " does not evenly divide chunk shape of  ", chunk_shape));
+  return absl::InvalidArgumentError(absl::StrFormat(
+      "sharding_indexed sub-chunk shape of %s does not evenly divide chunk "
+      "shape of %s",
+      absl::FormatStreamed(sub_chunk_shape),
+      absl::FormatStreamed(chunk_shape)));
 }
 
 namespace {
diff --git a/tensorstore/driver/zarr3/dtype.cc b/tensorstore/driver/zarr3/dtype.cc
index 31fb3644d..c799fff2f 100644
--- a/tensorstore/driver/zarr3/dtype.cc
+++ b/tensorstore/driver/zarr3/dtype.cc
@@ -259,20 +259,19 @@ absl::Status ValidateDType(ZarrDType& dtype) {
             dtype.fields.begin(), dtype.fields.begin() + field_i,
             [&](const ZarrDType::Field& f) { return f.name == field.name; })) {
       return absl::InvalidArgumentError(absl::StrFormat(
-          "Field name %s occurs more than once", QuoteString(field.name)));
+          "Field name %v occurs more than once", QuoteString(field.name)));
     }
     field.field_shape.resize(field.flexible_shape.size() +
-                             field.outer_shape.size());ß
+                             field.outer_shape.size());
     std::copy(field.flexible_shape.begin(), field.flexible_shape.end(),
               std::copy(field.outer_shape.begin(), field.outer_shape.end(),
                         field.field_shape.begin()));
 
     field.num_inner_elements = ProductOfExtents(span(field.field_shape));
     if (field.num_inner_elements == std::numeric_limits<Index>::max()) {
-      // TODO(BrianMichell): Convert to absl::StrFormat once tensorstore::span has
-      // AbslStringify support, allowing use of %v format specifier.
-      return absl::InvalidArgumentError(tensorstore::StrCat(
-          "Product of dimensions ", span(field.field_shape), " is too large"));
+      return absl::InvalidArgumentError(absl::StrFormat(
+          "Product of dimensions %s is too large",
+          absl::FormatStreamed(span(field.field_shape))));
     }
     if (internal::MulOverflow(field.num_inner_elements,
                               static_cast<Index>(field.dtype->size),
diff --git a/tensorstore/driver/zarr3/metadata.cc b/tensorstore/driver/zarr3/metadata.cc
index 29652f911..92f01368c 100644
--- a/tensorstore/driver/zarr3/metadata.cc
+++ b/tensorstore/driver/zarr3/metadata.cc
@@ -481,7 +481,7 @@ constexpr auto UnknownExtensionAttributesJsonBinder =
           }
         }
         return absl::InvalidArgumentError(absl::StrFormat(
-            "Unsupported metadata field %s is not marked "
+            "Unsupported metadata field %v is not marked "
             "{\"must_understand\": false}",
             tensorstore::QuoteString(key)));
       }
@@ -821,7 +821,7 @@ Result<size_t> GetFieldIndex(const ZarrDType& dtype,
   }
   if (!dtype.has_fields) {
     return absl::FailedPreconditionError(absl::StrFormat(
-        "Requested field %s but dtype does not have named fields",
+        "Requested field %v but dtype does not have named fields",
         QuoteString(selected_field)));
   }
   for (size_t field_index = 0; field_index < dtype.fields.size();
@@ -829,7 +829,7 @@ Result<size_t> GetFieldIndex(const ZarrDType& dtype,
     if (dtype.fields[field_index].name == selected_field) return field_index;
   }
   return absl::FailedPreconditionError(absl::StrFormat(
-      "Requested field %s is not one of: %s", QuoteString(selected_field),
+      "Requested field %v is not one of: %s", QuoteString(selected_field),
       GetFieldNames(dtype)));
 }
 
@@ -1103,13 +1103,11 @@ absl::Status ValidateMetadataSchema(const ZarrMetadata& metadata,
         tensorstore::MakeCopy(std::move(broadcast_fill_value),
                               skip_repeated_elements, field.dtype));
     if (!AreArraysIdenticallyEqual(converted_fill_value, fill_value)) {
-      auto binder = FillValueJsonBinder{metadata.data_type};
-      // TODO(BrianMichellß): Convert to absl::StrFormat once SharedArray has
-      // AbslStringify support, allowing use of %v format specifier.
-      return absl::FailedPreconditionError(tensorstore::StrCat(
-          "Invalid fill_value: schema requires fill value of ",
-          schema_fill_value, ", but metadata specifies fill value of ",
-          fill_value));
+      return absl::FailedPreconditionError(absl::StrFormat(
+          "Invalid fill_value: schema requires fill value of %s, but metadata "
+          "specifies fill value of %s",
+          absl::FormatStreamed(schema_fill_value),
+          absl::FormatStreamed(fill_value)));
     }
   }