NHSDigital · georgeRobertson · Jan 24, 2026 · Feb 10, 2026 · Feb 10, 2026 · Feb 10, 2026
diff --git a/.github/workflows/ci_docs_publish.yml b/.github/workflows/ci_docs_publish.yml
@@ -0,0 +1,38 @@
+name: Publish Documentation
+
+on:
+  push:
+    branches: main
+
+permissions:
+  contents: write
+
+jobs:
+  deploy:
+    runs-on: ubuntu-24.04
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Configure Git Credentials
+        run: |
+          git config user.name github-actions[bot]
+          git config user.email 41898282+github-actions[bot]@users.noreply.github.com
+
+      - name: Install extra dependencies for a python install
+        run: |
+          sudo apt-get update
+          sudo apt -y install --no-install-recommends liblzma-dev libbz2-dev libreadline-dev
+
+      - name: Install asdf cli
+        uses: asdf-vm/actions/setup@b7bcd026f18772e44fe1026d729e1611cc435d47
+
+      - name: Install software through asdf
+        uses: asdf-vm/actions/install@b7bcd026f18772e44fe1026d729e1611cc435d47
+
+      - name: reshim asdf
+        run: asdf reshim
+
+      - name: ensure poetry using desired python version
+        run: poetry env use $(asdf which python)
+
+      - run: mkdocs gh-deploy --force
diff --git a/.github/workflows/ci_linting.yml b/.github/workflows/ci_linting.yml
@@ -3,8 +3,6 @@ name: CI Formatting & Linting
 on:
   pull_request:
     types: [opened, reopened, synchronize]
-    branches:
-      - main
 
 
 jobs:
@@ -19,28 +17,20 @@ jobs:
           sudo apt -y install --no-install-recommends liblzma-dev libbz2-dev libreadline-dev
 
       - name: Install asdf cli
-        uses: asdf-vm/actions/setup@v4
+        uses: asdf-vm/actions/setup@b7bcd026f18772e44fe1026d729e1611cc435d47
 
       - name: Install software through asdf
-        uses: asdf-vm/actions/install@v4
+        uses: asdf-vm/actions/install@b7bcd026f18772e44fe1026d729e1611cc435d47
 
       - name: reshim asdf
         run: asdf reshim
 
       - name: ensure poetry using desired python version
         run: poetry env use $(asdf which python)
 
-      - name: Cache Poetry virtualenv
-        uses: actions/cache@v4
-        with:
-          path: ~/.cache/pypoetry
-          key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
-          restore-keys: |
-            ${{ runner.os }}-poetry-
-
       - name: Install lint dependencies
         run: |
-          make install
+          poetry install --sync --no-interaction --with lint
 
       - name: Run black
         run: poetry run black src

diff --git a/.github/workflows/ci_testing.yml b/.github/workflows/ci_testing.yml
@@ -20,28 +20,20 @@ jobs:
           sudo apt -y install --no-install-recommends liblzma-dev libbz2-dev libreadline-dev libxml2-utils
 
       - name: Install asdf cli
-        uses: asdf-vm/actions/setup@v4
+        uses: asdf-vm/actions/setup@b7bcd026f18772e44fe1026d729e1611cc435d47
 
       - name: Install software through asdf
-        uses: asdf-vm/actions/install@v4
+        uses: asdf-vm/actions/install@b7bcd026f18772e44fe1026d729e1611cc435d47
 
       - name: reshim asdf
         run: asdf reshim
 
       - name: ensure poetry using desired python version
         run: poetry env use $(asdf which python)
-
-      - name: Cache Poetry virtualenv
-        uses: actions/cache@v4
-        with:
-          path: ~/.cache/pypoetry
-          key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
-          restore-keys: |
-            ${{ runner.os }}-poetry-
 
       - name: Install test dependencies
         run: |
-          make install
+          poetry install --sync --no-interaction --with test
 
       - name: Run pytest and coverage
         run: |

diff --git a/.gitignore b/.gitignore
@@ -32,7 +32,6 @@ share/python-wheels/
 .installed.cfg
 *.egg
 MANIFEST
-poetry.lock
 
 # PyInstaller
 #  Usually these files are written by a python script from a template

diff --git a/docs/advanced_guidance/backends.md b/docs/advanced_guidance/backends.md
diff --git a/docs/advanced_guidance/code_docs/core_engine.md b/docs/advanced_guidance/code_docs/core_engine.md
@@ -0,0 +1,5 @@
+::: dve.core_engine.engine.CoreEngine
+    handler: python
+    options:
+        show_root_heading: false
+        show_source: true
diff --git a/docs/assets/images/favicon.ico b/docs/assets/images/favicon.ico
diff --git a/docs/assets/images/favicon.svg b/docs/assets/images/favicon.svg
diff --git a/docs/assets/images/nhsuk-icon-180.png b/docs/assets/images/nhsuk-icon-180.png
diff --git a/docs/assets/images/nhsuk-icon-192.png b/docs/assets/images/nhsuk-icon-192.png
diff --git a/docs/assets/images/nhsuk-icon-512.png b/docs/assets/images/nhsuk-icon-512.png
diff --git a/docs/assets/images/nhsuk-icon-mask.svg b/docs/assets/images/nhsuk-icon-mask.svg
diff --git a/docs/assets/images/nhsuk-opengraph-image.png b/docs/assets/images/nhsuk-opengraph-image.png
diff --git a/docs/assets/stylesheets/extra.css b/docs/assets/stylesheets/extra.css
@@ -0,0 +1,42 @@
+:root {
+  --nhs-blue: #005EB8
+}
+
+.md-footer-meta {
+  background-color: transparent !important;
+  box-shadow: none;
+}
+
+.md-footer-meta::before {
+  content: "";
+  display: block;
+  height: 3px;
+  background-color: var(--nhs-blue);
+}
+
+.md-footer-meta.md-typeset a svg {
+  transition: transform 0.2s ease, filter 0.2s ease;
+}
+
+.md-footer-meta.md-typeset a:hover svg {
+  transform: scale(1.12);
+  filter: brightness(1.2) drop-shadow(0 0 6px rgba(30, 136, 229, 0.7));
+}
+
+/* Light mode */
+[data-md-color-scheme="default"] {
+  --md-primary-fg-color: var(--nhs-blue);
+  --md-footer-bg-color: var(--md-default-bg-color);
+  --md-footer-fg-color: #000000;
+  --md-footer-fg-color--light: #333333;
+  --md-footer-fg-color--lighter: #555555;
+}
+
+/* Dark mode */
+[data-md-color-scheme="slate"] {
+  --md-primary-fg-color:var(--nhs-blue);
+  --md-footer-bg-color: var(--md-default-bg-color);
+  --md-footer-fg-color: #e0e0e0;
+  --md-footer-fg-color--light: #bdbdbd;
+  --md-footer-fg-color--lighter: #9e9e9e;
+}
diff --git a/docs/detailed_guidance/feedback_messages.md b/docs/detailed_guidance/feedback_messages.md
diff --git a/docs/README.md → docs/index.md b/docs/README.md → docs/index.md
@@ -1,16 +1,24 @@
+---
+title: Data Validation Engine
+tags:
+    - Home
+---
+
+# Data Validation Engine
+
 The Data Validation Engine (DVE) is a configuration driven data validation library.
 
 There are 3 core steps within the DVE:
 
-1. [File transformation](./detailed_guidance/file_transformation.md) - Parsing files from their submitted format into a common format.
-2. [Data contract](./detailed_guidance/data_contract.md) - Validating the types that have been submitted and casting them.
-3. [Business rules](./detailed_guidance/business_rules.md) - Performing more complex validations such as comparisons between fields and tables.
+1. [File transformation](user_guidance/file_transformation.md){ data-preview } - Parsing files from their submitted format into a common format.
+2. [Data contract](user_guidance/data_contract.md){ data-preview } - Validating the types that have been submitted and casting them.
+3. [Business rules](user_guidance/business_rules.md){ data-preview } - Performing more complex validations such as comparisons between fields and tables.
 
 with a 4th step being important but more variable depending on platform and users:
 
-4. [Error reports](./detailed_guidance/feedback_messages.md) - Compiles the errors generated from the previous stages and presents them within an Excel report. However, this could be reconfigured to meet the needs of your users.
+4. [Error reports](user_guidance/feedback_messages.md){ data-preview } - Compiles the errors generated from the previous stages and presents them within an Excel report. However, this could be reconfigured to meet the needs of your users.
 
-Each of these steps produce a list of [Feedback message](details/Feedback%20message.md) objects which can be reported back to the user for them to fix any issues.
+Each of these steps produce a list of [Feedback message](user_guidance/feedback_messages.md){ data-preview } objects which can be reported back to the user for them to fix any issues.
 
 DVE configuration can be instantiated from a json (dischema) file which might be structured like this:
 
@@ -83,7 +91,7 @@ DVE configuration can be instantiated from a json (dischema) file which might be
     }
 }
 ```
-"Contract" is where [Data Contract](./detailed_guidance/data_contract.md) and [File Transformation](./detailed_guidance/file_transformation.md) (in the reader configs) are configured, and (due to legacy naming) transformations are where [Business rules](./detailed_guidance/business_rules.md) are configured.
+"Contract" is where [Data Contract](user_guidance/data_contract.md) and [File Transformation](user_guidance/file_transformation.md) (in the reader configs) are configured, and (due to legacy naming) transformations are where [Business rules](user_guidance/business_rules.md) are configured.
 
 ## Quick start
 In the code example shared above we have a json file named `cwt_example.dischema.json` and an xml file with the following structure:
@@ -103,7 +111,7 @@ We can see in `config.contract.datasets` that there is a `CWTHeader` entity decl
 
 `version` is declared to be a `constr` which is the constrained string type from the Pydantic library. Therefore, any keyword arguments `constr` can be passed as `constraints` here. In this case we are constraining it to a regex 1-2 digits, followed by a literal period followed by 1-2 digits. This should match an `max n2` data type.
 
-`periodStartDate` on the other hand is a `conformatteddate`, this type is one that's defined in the DVE library as a `domain_type` see [Domain types](./detailed_guidance/domain_types.md). The output of a `conformatteddate` is a date type. 
+`periodStartDate` on the other hand is a `conformatteddate`, this type is one that's defined in the DVE library as a `domain_type` see [Domain types](user_guidance/domain_types.md). The output of a `conformatteddate` is a date type. 
 
 This means that after the data contract step the resulting data will have the types: `version::string` and `periodStartDate::date`. 
 
@@ -155,8 +163,8 @@ readers = {"XMLStreamReader": SparkXMLStreamReader}
 # File transformation step here
 entities = {}
 for entity in data_contract_config.schemas:
-	# get config based on file type you're parsing
-	ext_config = reader_configs[entity][".xml"] 
+    # get config based on file type you're parsing
+    ext_config = reader_configs[entity][".xml"] 
     reader = readers[ext_config.reader](**ext_config.parameters)
     df = reader.read_to_dataframe(
         "cwt_example.xml", entity, stringify_model(data_contract_config.schemas[entity])
@@ -187,6 +195,7 @@ from the top down we
 **data contract**
 - instatiate the SparkDataContract class with a spark session
 - apply the data contract to the dict of entities returning the entities in the correct types. any validation messages and a success bool
+
 ### Business rules
 
 Now we have typed entities we can apply business rules to them. We need a step implementation. we'll import that from the spark rules backend.
@@ -200,10 +209,10 @@ business_rule_config = config.get_rule_metadata()
 messages = business_rules.apply_rules(entities, business_rule_config)
 ```
 
-There we go. Messages is a list of [Feedback message](./detailed_guidance/feedback_messages.md) for every failed rule.
+There we go. Messages is a list of [Feedback message](user_guidance/feedback_messages.md) for every failed rule.
 
 ### Utilising the Pipeline objects to run the DVE
-Within the DVE package, we have also created the ability to build pipeline objects to help orchestrate the running of the DVE from start to finish. We currently have an implementation for `Spark` and `DuckDB`. These pipeline objects abstract some of the complexity described above and only requires you to supply a few objects to run the DVE from start (file transformation) to finish (error reports). These can be read in further detail [here](../src/pipeline/) and we have tests [here](../tests/test_pipeline/) to ensure they are working as expected. Furthermore, if you have a situation where maybe you only want to run the Data Contract, then you can utilise the pipeline objects in a way that only runs the specific stages that you want. Below will showcase an example where the full e2e pipeline is run and how you can  trigger the stages that you want.
+Within the DVE package, we have also created the ability to build pipeline objects to help orchestrate the running of the DVE from start to finish. We currently have an implementation for `Spark` and `DuckDB`. These pipeline objects abstract some of the complexity described above and only requires you to supply a few objects to run the DVE from start (file transformation) to finish (error reports). These can be read in further detail [here](https://github.com/NHSDigital/data-validation-engine/tree/main/src/dve/pipeline) and we have tests [here](https://github.com/NHSDigital/data-validation-engine/tree/main/tests/test_pipeline) to ensure they are working as expected. Furthermore, if you have a situation where maybe you only want to run the Data Contract, then you can utilise the pipeline objects in a way that only runs the specific stages that you want. Below will showcase an example where the full e2e pipeline is run and how you can  trigger the stages that you want.
 
 > **note in the version that comes from gitlab, the dve library is spread across a number of modules. We are looking to put this in a top level `dve` module**
 
@@ -272,7 +281,7 @@ If you'd rather not rely on needing a `metadata.json` associated with your submi
 
 ### Mixing backends
 
-The examples shown above are using the Spark Backend. DVE also has a DuckDB backend found at [core_engine.backends.implementations.duckdb](../src/core_engine/backends/implementations/duckdb/). In order to mix the two you will need to convert from one type of entity to the other. For example from a spark `Dataframe` to DuckDB `relation`. The easiest way to do this is to use the `write_parquet` method from one backend and use `read_parquet` from another backend. 
+The examples shown above are using the Spark Backend. DVE also has a DuckDB backend found at [core_engine.backends.implementations.duckdb](https://github.com/NHSDigital/data-validation-engine/tree/main/src/dve/core_engine/backends/implementations/duckdb). In order to mix the two you will need to convert from one type of entity to the other. For example from a spark `Dataframe` to DuckDB `relation`. The easiest way to do this is to use the `write_parquet` method from one backend and use `read_parquet` from another backend. 
 
 Currently the configuration isn't backend agnostic for applying business rules. So if you want to swap between spark and duckdb, the business rules need to be written using only features that are common to both backends. For example, a regex check in spark would be something along the lines of...
 ```sql
@@ -285,20 +294,20 @@ regexp_matches(nhsnumber, '^\d{10}$')
 Failures in parsing the expressions lead to failure messages such as 
 ```python
 FeedbackMessage(
-	entity=None,
-	record=None,
-	failure_type='integrity',
-	is_informational=False,
-	error_type=None,
-	error_location=None,
-	error_message="Unexpected error (AnalysisException: Undefined function: 'regexp_matches'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 5) in transformations (rule: root; step: 0; id: None)",
-	error_code=None,
-	reporting_field=None,
-	reporting_field_name=None,
-	value=None,
-	category=None
+    entity=None,
+    record=None,
+    failure_type='integrity',
+    is_informational=False,
+    error_type=None,
+    error_location=None,
+    error_message="Unexpected error (AnalysisException: Undefined function: 'regexp_matches'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 5) in transformations (rule: root; step: 0; id: None)",
+    error_code=None,
+    reporting_field=None,
+    reporting_field_name=None,
+    value=None,
+    category=None
 )
 ```
 
 # Extra information
-Thanks for reading the documentation and looking into utilising the DVE. If you need more information on any of the steps you can find the following guidance below. If you need additional support, please raise an issue ([see guidance here](../CONTRIBUTE.md)) and we will try and respond to you as quickly as possible.
+Thanks for reading the documentation and looking into utilising the DVE. If you need more information on any of the steps you can find the following guidance below. If you need additional support, please raise an issue ([see guidance here](https://github.com/NHSDigital/data-validation-engine/blob/main/CONTRIBUTE.md)) and we will try and respond to you as quickly as possible.
diff --git a/docs/json_schemas/README.md → docs/json_schemas/jsonschemas.md b/docs/json_schemas/README.md → docs/json_schemas/jsonschemas.md
@@ -28,3 +28,6 @@ For autocomplete support in VS Code, alter `settings.json` and add new entries t
 
 Data Ingest JSON schemas (when saved with file_name `dataset.dischema.json`) should then have
 autocomplete support.
+
+# Components
+[https://github.com/NHSDigital/data-validation-engine/tree/main/docs/json_schemas](https://github.com/NHSDigital/data-validation-engine/tree/main/docs/json_schemas)
diff --git a/docs/tags.md b/docs/tags.md
@@ -0,0 +1,5 @@
+# Tags
+
+Following is a list of relevant tags:
+
+<!-- material/tags -->
diff --git a/docs/detailed_guidance/business_rules.md → docs/user_guidance/business_rules.md b/docs/detailed_guidance/business_rules.md → docs/user_guidance/business_rules.md
@@ -1,5 +1,5 @@
 # Business Rules
-Business rules are defined in the `transformations` section of the config. There are 6 keys within the json document that we will discuss in more detail throughout this document.
+Business rules are defined within the `transformations` section of the config. There are 6 keys within the json document that we will discuss in more detail throughout this document.
 
 ## Keys
 | Key | Purpose |
@@ -15,19 +15,20 @@ Business rules are defined in the `transformations` section of the config. There
 These are the most simple of the business rules. These are defined as a json object with the following structure:
 ```json
 {
-"entity": "APCActivity",
-"name": "EpiNo_is_valid",
-"expression": "EpiNo IS NULL OR EpiNo RLIKE '^(0[1-9]|[1-7][0-9]|8[0-7]|9[89])$'",
-"failure_type": "submission",
-"failure_message": "is invalid",
-"error_code": "1203",
-"reporting_field": "EpiNo",
-"is_informational" : false,
-"category": "Bad value"
+	"entity": "APCActivity",
+	"name": "EpiNo_is_valid",
+	"expression": "EpiNo IS NULL OR EpiNo RLIKE '^(0[1-9]|[1-7][0-9]|8[0-7]|9[89])$'",
+	"failure_type": "submission",
+	"failure_message": "is invalid",
+	"error_code": "1203",
+	"reporting_field": "EpiNo",
+	"is_informational" : false,
+	"category": "Bad value"
 }
 ```
 This rule checks that EpiNo must be present and that the value is 01-87 or 98 or 99. If EpiNo is missing this rule doesnt fire (to prevent double dinging a missing value). Any EpiNo that are present but not one of the values expected will raise a 1203 error with the message "is invalid". 
 Lets break it down:
+
 | Key | Purpose |
 | --- | ------- |
 | `entity` | This is the name of the entity to perform the filter on. In this Case the `APCActivity` dataframe |
@@ -220,7 +221,7 @@ We've covered adding filters to complex rules, but we can add rules to them aswe
 - used for things like checking submitting all dates in a file match the header 
 - one_to_one_join
 - join to another entity expecting no change in the number of rows. integrity check can be toggled off
-> see [json_schemas/transformations](../json_schemas/transformations/) for expected fields for each operation
+> see [json_schemas/transformations](../json_schemas/jsonschemas.md) for expected fields for each operation
 
 Rules are executed in the order they are put into the array. So a join then select should be implemented in that order.