From bebe60c66f28c9d85d92049d3165deffb037c631 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 29 Jan 2026 13:35:12 +0000
Subject: [PATCH 01/18] chore: add pandas and sort alphabetically

---
 requirements-test.txt | 4 ++--
 requirements.txt      | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/requirements-test.txt b/requirements-test.txt
index b30f9dc..517e35c 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,3 +1,3 @@
-pytest==7.4.3
+pytest-emoji==0.2.0
 pytest-md==0.2.0
-pytest-emoji==0.2.0
\ No newline at end of file
+pytest==7.4.3
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 64993a3..d0ec330 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
 click==8.1.7
+pandas==2.2.3
+pdf2image==1.17.0
 python-docx==1.1.0
 python-pptx==0.6.23
 xlrd==2.0.1
 xlutils==2.0.0
-pdf2image==1.17.0

From 2d84df795dbfe8bac062fbd6dc2549c3a98abf1f Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 29 Jan 2026 13:36:00 +0000
Subject: [PATCH 02/18] feat: add function to normalize column indices

---
 pronto/pronto.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/pronto/pronto.py b/pronto/pronto.py
index 3f87cbb..c5ff8e1 100644
--- a/pronto/pronto.py
+++ b/pronto/pronto.py
@@ -1,6 +1,7 @@
 import glob
 import logging
 import os
+import pandas
 
 # get tumor mutational burden label
 def get_tmb_string(val):
@@ -27,3 +28,15 @@ def glob_tsoppi_file(is_error, root, run_id, *path_units):
 	else:
 		logging.error("unsuccessful glob strings for {}:\n{}\n{}".format(run_id, glob_string_ous, glob_string_hus))
 		raise ValueError
+
+def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list):
+	# determine current, missing and additional column indices
+	curr_col_idx = df.columns.tolist()
+	miss_col_idx = list(set(exp_col_idx) - set(curr_col_idx))
+	add_col_idx = list(set(curr_col_idx) - set(exp_col_idx))
+	# add missing column indices
+	for i in miss_col_idx:
+		df[i] = ' '
+	# combine expected with additional to get all present column indices and rearrange columns accordingly, additional columns are moved to the right
+	all_col_idx = exp_col_idx + add_col_idx
+	return df[all_col_idx]
\ No newline at end of file

From fd2e0acfcd06c69bc49ac8fd23f4439aeba10b07 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 29 Jan 2026 13:36:41 +0000
Subject: [PATCH 03/18] test: add unittests for column index normalization
 function

---
 pronto/tests/pronto_test.py | 83 +++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/pronto/tests/pronto_test.py b/pronto/tests/pronto_test.py
index c0e6aa7..9213355 100644
--- a/pronto/tests/pronto_test.py
+++ b/pronto/tests/pronto_test.py
@@ -1,3 +1,4 @@
+import pandas
 import pytest
 import pronto.pronto
 
@@ -99,3 +100,85 @@ def test_get_tmb_string(input, exception, want):
 def test_glob_tsoppi_file(inputs, exception, want):
     with exception:
         assert pronto.pronto.glob_tsoppi_file(*inputs) == want
+
+@pytest.mark.parametrize(
+    "inputs, exception, want",
+    [
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2],
+                    "two": [3, 4],
+                    "three": [5, 6],
+                    "four": [7, 8],
+                }),
+                ["one", "two", "three", "four"],
+            ),
+            does_not_raise(),
+            pandas.DataFrame({
+                "one": [1, 2],
+                "two": [3, 4],
+                "three": [5, 6],
+                "four": [7, 8],
+            }),
+        ),
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2],
+                    "two": [3, 4],
+                    "four": [7, 8],
+                }),
+                ["one", "two", "three", "four"],
+            ),
+            does_not_raise(),
+            pandas.DataFrame({
+                "one": [1, 2],
+                "two": [3, 4],
+                "three": [' ', ' '],
+                "four": [7, 8],
+            }),
+        ),
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2],
+                    "two": [3, 4],
+                    "three": [5, 6],
+                    "four": [7, 8],
+                }),
+                ["two", "three", "four"],
+            ),
+            does_not_raise(),
+            pandas.DataFrame({
+                "two": [3, 4],
+                "three": [5, 6],
+                "four": [7, 8],
+                "one": [1, 2],
+            }),
+        ),
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2],
+                    "two": [3, 4],
+                    "four": [7, 8],
+                    "five": [9, 10],
+                }),
+                ["one", "two", "three", "four"],
+            ),
+            does_not_raise(),
+            pandas.DataFrame({
+                "one": [1, 2],
+                "two": [3, 4],
+                "three": [' ', ' '],
+                "four": [7, 8],
+                "five": [9, 10],
+            }),
+        ),
+    ]
+)
+def test_normalize_column_index(inputs, exception, want):
+    with exception:
+        get = pronto.pronto.normalize_column_index(*inputs)
+        assert want.equals(get)
\ No newline at end of file

From 09e6aee619e1d6b958cef5aab48f21c3680d8488 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 29 Jan 2026 13:42:39 +0000
Subject: [PATCH 04/18] feat: use column index normalizing function in main
 script, add comments and clean up

---
 Script/PRONTO.py | 56 +++++++++++++++++++++---------------------------
 1 file changed, 25 insertions(+), 31 deletions(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index 3aa3c4a..2efa4be 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -28,6 +28,7 @@
 from decimal import Decimal
 from copy import deepcopy
 import pronto.pronto as pronto
+import pandas
 from pdf2image import convert_from_path
 
 runID = ""
@@ -731,45 +732,38 @@ def insert_image_to_ppt(DNA_sampleID,DNA_normal_sampleID,RNA_sampleID,DNA_image_
 	ppt.save(output_ppt_file)
 
 
-def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide):
-	table_file = open(table_data_file)
-	lines = table_file.readlines()
-	if not lines:
+def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide):
+
+	# load table data
+	try:
+		table_data = pandas.read_csv(table_file, sep='\t')
+	except pandas.errors.EmptyDataError:
+		logging.warning("The file is empty.")
 		return
-	first_line = lines[0]
-	first_line_cells = first_line.split('\t')
+	
+	# add empty columns for missing header columns and move additional columns to the right
+	table_data = pronto.normalize_column_index(table_data, table_header)
+
+	# determine column and row number
 	cols = len(table_header)
-	header_not_exist_in_table = []
-	for n in range(len(table_header)):
-		if_exist = False
-		if(table_header[n] in first_line_cells):
-			if_exist = True
-		if not if_exist:
-			header_not_exist_in_table.append(n)
-	data_rows = []
-	for line in lines[1:]:
-		line_cells =  line.split('\t')
-		if header_not_exist_in_table:
-			for num in header_not_exist_in_table:
-				line_cells.insert(num," ")
-		row_data = [cell.strip() for cell in line.split('\t')]
-		data_rows.append(row_data)
-	total_rows = len(data_rows)
+	rows = len(table_data)
 
-	ppt = Presentation(output_ppt_file)
-	if(table_max_rows_per_slide is None or total_rows <= table_max_rows_per_slide):
+	# how many slides, rows per slide, and start slide index
+	if(table_max_rows_per_slide is None or rows <= table_max_rows_per_slide):
 		total_slides_needed = 1
-		rows_per_page = total_rows
+		rows_per_page = rows
 		start_slide_index = slide_n
 	else:
-		total_slides_needed = (total_rows + table_max_rows_per_slide -1) // table_max_rows_per_slide
+		total_slides_needed = (rows + table_max_rows_per_slide -1) // table_max_rows_per_slide
 		rows_per_page = table_max_rows_per_slide
 		start_slide_index = None
 
+	ppt = Presentation(output_ppt_file)
 	for page_num in range(total_slides_needed):
 		start_idx = page_num * rows_per_page
-		end_idx = min(start_idx + rows_per_page, total_rows)
-		current_page_data = data_rows[start_idx:end_idx]
+		end_idx = min(start_idx + rows_per_page, rows)
+		data_rows = table_data.values.tolist()
+		current_page_data = data_rows[start_idx:end_idx] # use df
 		current_page_rows = len(current_page_data)
 		if(start_slide_index is not None and page_num == 0):
 			slide = ppt.slides[slide_n - 1]
@@ -797,9 +791,9 @@ def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,
 		tf = textbox.text_frame
 		if(if_print_rowNo == True):
 			if(table_max_rows_per_slide is not None):
-				tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ", Page " + str(page_num+1) + "/" + str(total_slides_needed) + ")"
+				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ", Page " + str(page_num+1) + "/" + str(total_slides_needed) + ")"
 			else:
-				tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ")"
+				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ")"
 		else:
 			tf.paragraphs[0].text = table_name
 		tf.paragraphs[0].font.size = Pt(8)
@@ -807,7 +801,7 @@ def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,
 		tf.paragraphs[0].alignment = PP_ALIGN.CENTER
 
 	ppt.save(output_ppt_file)
-	return total_rows
+	return rows
 
 
 def update_ppt_variant_summary_table(data_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP_nr,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file):

From 22edc4ba97d3151fb7e1996bf38ddb8df223120e Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 29 Jan 2026 13:43:54 +0000
Subject: [PATCH 05/18] feat: make warning on empty file more specific

---
 Script/PRONTO.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index 2efa4be..4d861fb 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -738,7 +738,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 	try:
 		table_data = pandas.read_csv(table_file, sep='\t')
 	except pandas.errors.EmptyDataError:
-		logging.warning("The file is empty.")
+		logging.warning("{} is empty".format(table_file))
 		return
 	
 	# add empty columns for missing header columns and move additional columns to the right

From 7f5509ac63491077e045db2c57aacbe4a3238992 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 29 Jan 2026 14:24:11 +0000
Subject: [PATCH 06/18] feat: simply variable setting via if statement

---
 Script/PRONTO.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index 4d861fb..5fcb630 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -29,6 +29,7 @@
 from copy import deepcopy
 import pronto.pronto as pronto
 import pandas
+import math
 from pdf2image import convert_from_path
 
 runID = ""
@@ -748,20 +749,16 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 	cols = len(table_header)
 	rows = len(table_data)
 
-	# how many slides, rows per slide, and start slide index
-	if(table_max_rows_per_slide is None or rows <= table_max_rows_per_slide):
-		total_slides_needed = 1
-		rows_per_page = rows
-		start_slide_index = slide_n
-	else:
-		total_slides_needed = (rows + table_max_rows_per_slide -1) // table_max_rows_per_slide
-		rows_per_page = table_max_rows_per_slide
-		start_slide_index = None
+	# how many slides, and start slide index
+	if not table_max_rows_per_slide:
+		table_max_rows_per_slide = rows
+	total_slides_needed = math.ceil(rows / table_max_rows_per_slide)
+	start_slide_index = None if total_slides_needed > 1 else slide_n
 
 	ppt = Presentation(output_ppt_file)
 	for page_num in range(total_slides_needed):
-		start_idx = page_num * rows_per_page
-		end_idx = min(start_idx + rows_per_page, rows)
+		start_idx = page_num * table_max_rows_per_slide
+		end_idx = min(start_idx + table_max_rows_per_slide, rows)
 		data_rows = table_data.values.tolist()
 		current_page_data = data_rows[start_idx:end_idx] # use df
 		current_page_rows = len(current_page_data)
@@ -790,7 +787,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 		textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25))
 		tf = textbox.text_frame
 		if(if_print_rowNo == True):
-			if(table_max_rows_per_slide is not None):
+			if(total_slides_needed > 1):
 				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ", Page " + str(page_num+1) + "/" + str(total_slides_needed) + ")"
 			else:
 				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ")"

From 282a54ec29d9bda552fcf74aeafd8623bd709649 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Tue, 3 Feb 2026 14:26:50 +0000
Subject: [PATCH 07/18] feat: round floats in AF_tumor_DNA to 2 decimal places

---
 Script/PRONTO.py | 3 +++
 pronto/pronto.py | 9 ++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index 5fcb630..23c543a 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -745,6 +745,9 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 	# add empty columns for missing header columns and move additional columns to the right
 	table_data = pronto.normalize_column_index(table_data, table_header)
 
+	# round floats to 2 decimal places
+	table_data = pronto.set_column_to_2_decimals(table_data, "AF_tumor_DNA")
+
 	# determine column and row number
 	cols = len(table_header)
 	rows = len(table_data)
diff --git a/pronto/pronto.py b/pronto/pronto.py
index c5ff8e1..ca34994 100644
--- a/pronto/pronto.py
+++ b/pronto/pronto.py
@@ -39,4 +39,11 @@ def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list):
 		df[i] = ' '
 	# combine expected with additional to get all present column indices and rearrange columns accordingly, additional columns are moved to the right
 	all_col_idx = exp_col_idx + add_col_idx
-	return df[all_col_idx]
\ No newline at end of file
+	return df[all_col_idx]
+
+def set_column_to_2_decimals(df: pandas.DataFrame, col_name: str):
+	if col_name in df.columns:
+		df[col_name] = df[col_name].map('{:.2f}'.format)
+	else:
+		logging.info("Column {} not found in dataframe".format(col_name))
+	return df
\ No newline at end of file

From b88f6523cdddd49eb0b6b1a580147f610d20c7d7 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Tue, 3 Feb 2026 14:27:12 +0000
Subject: [PATCH 08/18] test: add unittests for rounding function

---
 pronto/tests/pronto_test.py | 50 +++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/pronto/tests/pronto_test.py b/pronto/tests/pronto_test.py
index 9213355..d8ebbf2 100644
--- a/pronto/tests/pronto_test.py
+++ b/pronto/tests/pronto_test.py
@@ -181,4 +181,54 @@ def test_glob_tsoppi_file(inputs, exception, want):
 def test_normalize_column_index(inputs, exception, want):
     with exception:
         get = pronto.pronto.normalize_column_index(*inputs)
+        assert want.equals(get)
+
+@pytest.mark.parametrize(
+    "inputs, exception, want",
+    [
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2],
+                    "two": [3.333, 4.444],
+                }),
+                "two",
+            ),
+            does_not_raise(),
+            pandas.DataFrame({
+                "one": [1, 2],
+                "two": ["3.33", "4.44"],
+            }),
+        ),
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2],
+                    "two": [3.666, 4.777],
+                }),
+                "two",
+            ),
+            does_not_raise(),
+            pandas.DataFrame({
+                "one": [1, 2],
+                "two": ["3.67", "4.78"],
+            }),
+        ),
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2],
+                }),
+                "two",
+            ),
+            does_not_raise(),
+            pandas.DataFrame({
+                "one": [1, 2],
+            }),
+        ),
+    ]
+)
+def test_set_column_to_2_decimals(inputs, exception, want):
+    with exception:
+        get = pronto.pronto.set_column_to_2_decimals(*inputs)
         assert want.equals(get)
\ No newline at end of file

From 2ec761f8c21aca43ecf7ee0909b749163fb7cbd3 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Wed, 4 Feb 2026 08:16:54 +0000
Subject: [PATCH 09/18] style: be consistent with using idx for index

---
 Script/PRONTO.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index 23c543a..eed437d 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -756,7 +756,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 	if not table_max_rows_per_slide:
 		table_max_rows_per_slide = rows
 	total_slides_needed = math.ceil(rows / table_max_rows_per_slide)
-	start_slide_index = None if total_slides_needed > 1 else slide_n
+	start_slide_idx = None if total_slides_needed > 1 else slide_n
 
 	ppt = Presentation(output_ppt_file)
 	for page_num in range(total_slides_needed):
@@ -765,7 +765,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 		data_rows = table_data.values.tolist()
 		current_page_data = data_rows[start_idx:end_idx] # use df
 		current_page_rows = len(current_page_data)
-		if(start_slide_index is not None and page_num == 0):
+		if(start_slide_idx is not None and slide_idx == 0):
 			slide = ppt.slides[slide_n - 1]
 		else:
 			slide = ppt.slides.add_slide(ppt.slide_layouts[6])

From 7f080ea093e3356d94ec2e21ee22db82d8690a3e Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Wed, 4 Feb 2026 08:22:22 +0000
Subject: [PATCH 10/18] chore: rename page_num to slide_idx to be consistent
 with naming

---
 Script/PRONTO.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index eed437d..bf57686 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -759,7 +759,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 	start_slide_idx = None if total_slides_needed > 1 else slide_n
 
 	ppt = Presentation(output_ppt_file)
-	for page_num in range(total_slides_needed):
+	for slide_idx in range(total_slides_needed):
 		start_idx = page_num * table_max_rows_per_slide
 		end_idx = min(start_idx + table_max_rows_per_slide, rows)
 		data_rows = table_data.values.tolist()
@@ -791,7 +791,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 		tf = textbox.text_frame
 		if(if_print_rowNo == True):
 			if(total_slides_needed > 1):
-				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ", Page " + str(page_num+1) + "/" + str(total_slides_needed) + ")"
+				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ", Page " + str(slide_idx+1) + "/" + str(total_slides_needed) + ")"
 			else:
 				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ")"
 		else:

From a81fa30c6c2bf436cac8d9c43ac22ef880a5911b Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 5 Feb 2026 13:56:22 +0000
Subject: [PATCH 11/18] fix: handle rouding if type is string

---
 pronto/pronto.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pronto/pronto.py b/pronto/pronto.py
index ca34994..f91a22e 100644
--- a/pronto/pronto.py
+++ b/pronto/pronto.py
@@ -43,6 +43,8 @@ def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list):
 
 def set_column_to_2_decimals(df: pandas.DataFrame, col_name: str):
 	if col_name in df.columns:
+		if df[col_name].dtype != float:
+			df[col_name] = df[col_name].astype(float)
 		df[col_name] = df[col_name].map('{:.2f}'.format)
 	else:
 		logging.info("Column {} not found in dataframe".format(col_name))

From afdbf800146a0e58ff74a964db06ab3ec3a098f9 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 5 Feb 2026 13:57:10 +0000
Subject: [PATCH 12/18] test: include test case for decimal rounding that
 contains strings

---
 pronto/tests/pronto_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pronto/tests/pronto_test.py b/pronto/tests/pronto_test.py
index d8ebbf2..3bbc96d 100644
--- a/pronto/tests/pronto_test.py
+++ b/pronto/tests/pronto_test.py
@@ -204,7 +204,7 @@ def test_normalize_column_index(inputs, exception, want):
             (
                 pandas.DataFrame({
                     "one": [1, 2],
-                    "two": [3.666, 4.777],
+                    "two": ['3.666', '4.777'],
                 }),
                 "two",
             ),

From 976c0ca1dcd4d4bb8634dced6dcdcf130c178966 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 5 Feb 2026 13:59:45 +0000
Subject: [PATCH 13/18] feat: introduce functions to get table data per slide
 and add table name to slide

---
 pronto/pronto.py | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/pronto/pronto.py b/pronto/pronto.py
index f91a22e..94b5809 100644
--- a/pronto/pronto.py
+++ b/pronto/pronto.py
@@ -2,6 +2,7 @@
 import logging
 import os
 import pandas
+import pptx
 
 # get tumor mutational burden label
 def get_tmb_string(val):
@@ -48,4 +49,31 @@ def set_column_to_2_decimals(df: pandas.DataFrame, col_name: str):
 		df[col_name] = df[col_name].map('{:.2f}'.format)
 	else:
 		logging.info("Column {} not found in dataframe".format(col_name))
-	return df
\ No newline at end of file
+	return df
+
+# get data fitting on one slide based on slide index and max rows per slide
+def get_slide_table_data(df: pandas.DataFrame, slide_idx: int, max_rows: int):
+	start = slide_idx * max_rows
+	stop = min(start + max_rows, len(df))
+	if start >= len(df):
+		return []
+	table = df.values.tolist()
+	header = [df.columns.tolist()]
+	table_data = header + table[start:stop]
+	return table_data
+
+# add constructed table name to slide and format the textbox
+def add_table_name(shapes: pptx.shapes.shapetree.SlideShapes, table_name: str, left: float, top: float, width: float, height: float, font_size: float, print_row_num: bool, slide_idx: int, total_slides: int, rows: int):
+
+	# add textbox to slide
+	paragraph = shapes.add_textbox(pptx.util.Inches(left), pptx.util.Inches(top), pptx.util.Inches(width), pptx.util.Inches(height)).text_frame.paragraphs[0]
+
+	# construct table name with optional row number and slide count
+	part_1 = ", Page {}/{}".format(slide_idx + 1, total_slides) if total_slides > 1 else ''
+	part_2 = " (N={}{})".format(rows, part_1) if print_row_num else ''
+	paragraph.text = "{}{}".format(table_name, part_2)
+
+	# font formatting and placement
+	paragraph.font.size = pptx.util.Pt(font_size)
+	paragraph.font.bold = True
+	paragraph.alignment = pptx.enum.text.PP_ALIGN.CENTER

From 51628f8cc34c1648b7bb783049ab22edfb2c338c Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 5 Feb 2026 14:00:06 +0000
Subject: [PATCH 14/18] style: add comments to code

---
 pronto/pronto.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pronto/pronto.py b/pronto/pronto.py
index 94b5809..3712a9a 100644
--- a/pronto/pronto.py
+++ b/pronto/pronto.py
@@ -30,6 +30,7 @@ def glob_tsoppi_file(is_error, root, run_id, *path_units):
 		logging.error("unsuccessful glob strings for {}:\n{}\n{}".format(run_id, glob_string_ous, glob_string_hus))
 		raise ValueError
 
+# normalize dataframe to expected column indices
 def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list):
 	# determine current, missing and additional column indices
 	curr_col_idx = df.columns.tolist()
@@ -42,6 +43,7 @@ def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list):
 	all_col_idx = exp_col_idx + add_col_idx
 	return df[all_col_idx]
 
+# set dataframe column format to 2 decimal points
 def set_column_to_2_decimals(df: pandas.DataFrame, col_name: str):
 	if col_name in df.columns:
 		if df[col_name].dtype != float:

From a686731ffa14ce6b471747e011c7d79aea9c922c Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 5 Feb 2026 14:00:34 +0000
Subject: [PATCH 15/18] test: add tests for table data and table name functions

---
 pronto/tests/pronto_test.py | 186 +++++++++++++++++++++++++++++++++++-
 1 file changed, 185 insertions(+), 1 deletion(-)

diff --git a/pronto/tests/pronto_test.py b/pronto/tests/pronto_test.py
index 3bbc96d..7192c32 100644
--- a/pronto/tests/pronto_test.py
+++ b/pronto/tests/pronto_test.py
@@ -1,4 +1,5 @@
 import pandas
+import pptx
 import pytest
 import pronto.pronto
 
@@ -231,4 +232,187 @@ def test_normalize_column_index(inputs, exception, want):
 def test_set_column_to_2_decimals(inputs, exception, want):
     with exception:
         get = pronto.pronto.set_column_to_2_decimals(*inputs)
-        assert want.equals(get)
\ No newline at end of file
+        assert want.equals(get)
+
+def list_of_lists_equal(list1, list2):
+    if len(list1) != len(list2):
+        return False
+    for sublist1, sublist2 in zip(list1, list2):
+        print(sublist1, sublist2)
+        if sublist1 != sublist2:
+            return False
+    return True
+
+@pytest.mark.parametrize(
+    "inputs, exception, want",
+    [
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2],
+                    "two": [3, 4],
+                }),
+                0,
+                3,
+            ),
+            does_not_raise(),
+            [
+                ["one", "two"],
+                [1, 3],
+                [2, 4],
+            ],
+        ),
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2, 3 ,4],
+                    "two": [5, 6, 7, 8],
+                }),
+                1,
+                2,
+            ),
+            does_not_raise(),
+            [
+                ["one", "two"],
+                [3, 7],
+                [4, 8],
+            ],
+        ),
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2, 3],
+                    "two": [5, 6, 7],
+                }),
+                1,
+                2,
+            ),
+            does_not_raise(),
+            [
+                ["one", "two"],
+                [3, 7],
+            ],
+        ),
+        (
+            (
+                pandas.DataFrame({
+                    "one": [1, 2, 3],
+                    "two": [5, 6, 7],
+                }),
+                2,
+                2,
+            ),
+            does_not_raise(),
+            [],
+        ),
+    ]
+)
+def test_get_slide_table_data(inputs, exception, want):
+    with exception:
+        get = pronto.pronto.get_slide_table_data(*inputs)
+        assert list_of_lists_equal(get, want)
+
+def check_shape(shape, want_left, want_top, want_width, want_height):
+    assert shape.left == pptx.util.Inches(want_left)
+    assert shape.top == pptx.util.Inches(want_top)
+    assert shape.width == pptx.util.Inches(want_width)
+    assert shape.height == pptx.util.Inches(want_height)
+
+def check_paragraph(paragraph, want_text, want_font_size, want_bold, want_alignment):
+    assert paragraph.text == want_text
+    assert paragraph.font.size.pt == want_font_size
+    assert paragraph.font.bold == want_bold
+    assert paragraph.alignment == want_alignment
+
+@pytest.mark.parametrize(
+    "inputs, exception, want_shape, want_paragraph",
+    [
+        (
+            (
+                'Test',
+                0.5,
+                0.5,
+                4,
+                1,
+                12,
+                True,
+                0,
+                3,
+                4,
+            ),
+            does_not_raise(),
+            (
+                0.5,
+                0.5,
+                4,
+                1,
+            ),
+            (
+                'Test (N=4, Page 1/3)',
+                12.0,
+                True,
+                pptx.enum.text.PP_ALIGN.CENTER,
+            )
+        ),
+        (
+            (
+                'Test',
+                0.5,
+                0.5,
+                4,
+                1,
+                12,
+                True,
+                0,
+                1,
+                4,
+            ),
+            does_not_raise(),
+            (
+                0.5,
+                0.5,
+                4,
+                1,
+            ),
+            (
+                'Test (N=4)',
+                12.0,
+                True,
+                pptx.enum.text.PP_ALIGN.CENTER,
+            )
+        ),
+        (
+            (
+                'Test',
+                0.5,
+                0.5,
+                4,
+                1,
+                12,
+                False,
+                0,
+                3,
+                4,
+            ),
+            does_not_raise(),
+            (
+                0.5,
+                0.5,
+                4,
+                1,
+            ),
+            (
+                'Test',
+                12.0,
+                True,
+                pptx.enum.text.PP_ALIGN.CENTER,
+            )
+        ),
+    ]
+)
+def test_add_table_name(inputs, exception, want_shape, want_paragraph):
+    with exception:
+        shapes = pptx.Presentation().slides.add_slide(pptx.Presentation().slide_layouts[6]).shapes
+        pronto.pronto.add_table_name(shapes, *inputs)
+        check_shape(shapes[0], *want_shape)
+        check_paragraph(shapes[0].text_frame.paragraphs[0], *want_paragraph)
\ No newline at end of file

From bca5f7e78329d0f996b0075bc219163fa44f3276 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 5 Feb 2026 14:02:19 +0000
Subject: [PATCH 16/18] feat: refactor and simplify insert_table_to_ppt

---
 Script/PRONTO.py | 60 +++++++++++++++++++-----------------------------
 1 file changed, 24 insertions(+), 36 deletions(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index bf57686..c5be538 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -733,7 +733,7 @@ def insert_image_to_ppt(DNA_sampleID,DNA_normal_sampleID,RNA_sampleID,DNA_image_
 	ppt.save(output_ppt_file)
 
 
-def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide):
+def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,print_row_num,table_column_width,table_max_rows_per_slide):
 
 	# load table data
 	try:
@@ -752,53 +752,41 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 	cols = len(table_header)
 	rows = len(table_data)
 
-	# how many slides, and start slide index
+	# how many slides are required
 	if not table_max_rows_per_slide:
 		table_max_rows_per_slide = rows
 	total_slides_needed = math.ceil(rows / table_max_rows_per_slide)
-	start_slide_idx = None if total_slides_needed > 1 else slide_n
 
+	# Add data to ppt
 	ppt = Presentation(output_ppt_file)
 	for slide_idx in range(total_slides_needed):
-		start_idx = page_num * table_max_rows_per_slide
-		end_idx = min(start_idx + table_max_rows_per_slide, rows)
-		data_rows = table_data.values.tolist()
-		current_page_data = data_rows[start_idx:end_idx] # use df
-		current_page_rows = len(current_page_data)
-		if(start_slide_idx is not None and slide_idx == 0):
-			slide = ppt.slides[slide_n - 1]
+		current_slide_data = pronto.get_slide_table_data(table_data, slide_idx, table_max_rows_per_slide)
+		if(total_slides_needed == 1):
+			shapes = ppt.slides[slide_n - 1].shapes
 		else:
-			slide = ppt.slides.add_slide(ppt.slide_layouts[6])
-		shapes = slide.shapes
+			shapes = ppt.slides.add_slide(ppt.slide_layouts[6]).shapes
+
+		# create new table on slide
 		left = Inches(left_t)
 		top = Inches(top_t)
 		width = Inches(width_t)
 		height = Inches(height_t)
-		table_rows = current_page_rows + 1
+		table_rows = len(current_slide_data)
 		table = shapes.add_table(table_rows,cols,left,top,width,height).table
-		for c in range(cols):
-			if table_column_width:
-				table.columns[c].width = Inches(table_column_width[c])
-			table.cell(0,c).text = table_header[c]
-			table.cell(0,c).text_frame.paragraphs[0].font.size = Pt(font_size)
-
-		for row_idx, row_data in enumerate(current_page_data, start=1):
-			for col_idx in range(cols):
-				table.cell(row_idx,col_idx).text = str(row_data[col_idx])
-				table.cell(row_idx,col_idx).text_frame.paragraphs[0].font.size = Pt(font_size)
-
-		textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25))
-		tf = textbox.text_frame
-		if(if_print_rowNo == True):
-			if(total_slides_needed > 1):
-				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ", Page " + str(slide_idx+1) + "/" + str(total_slides_needed) + ")"
-			else:
-				tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ")"
-		else:
-			tf.paragraphs[0].text = table_name
-		tf.paragraphs[0].font.size = Pt(8)
-		tf.paragraphs[0].font.bold = True
-		tf.paragraphs[0].alignment = PP_ALIGN.CENTER
+
+		# if table_column_width is provided, set the column width
+		if len(table_column_width) == cols:
+			for col_idx, width in enumerate(table_column_width):
+				table.columns[col_idx].width = Inches(width)
+		
+		# fill in the table data and set font size
+		for row_idx, row in enumerate(table.rows):
+			for col_idx, cell in enumerate(row.cells):
+				cell.text = current_slide_data[row_idx][col_idx]
+				cell.text_frame.paragraphs[0].font.size = Pt(font_size)
+
+		# add table title
+		pronto.add_table_title(shapes, table_name, left_h, top_h, width_h, 0.25, 8, print_row_num, slide_idx, total_slides_needed, rows)
 
 	ppt.save(output_ppt_file)
 	return rows

From ce3dcedfcefef7dc3691a2bb3df6a2fa843739a2 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Thu, 12 Feb 2026 12:01:02 +0000
Subject: [PATCH 17/18] fix: use correct function name, thanks @xiaoliz0

---
 Script/PRONTO.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Script/PRONTO.py b/Script/PRONTO.py
index c5be538..2062067 100755
--- a/Script/PRONTO.py
+++ b/Script/PRONTO.py
@@ -786,7 +786,7 @@ def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_
 				cell.text_frame.paragraphs[0].font.size = Pt(font_size)
 
 		# add table title
-		pronto.add_table_title(shapes, table_name, left_h, top_h, width_h, 0.25, 8, print_row_num, slide_idx, total_slides_needed, rows)
+		pronto.add_table_name(shapes, table_name, left_h, top_h, width_h, 0.25, 8, print_row_num, slide_idx, total_slides_needed, rows)
 
 	ppt.save(output_ppt_file)
 	return rows

From bb2daccad7a32f0cd91a175bcfc0558186ef5910 Mon Sep 17 00:00:00 2001
From: Martin Rippin <marrip@ihelse.net>
Date: Fri, 13 Feb 2026 12:40:50 +0000
Subject: [PATCH 18/18] fix: handle strings with % and floats in
 set_column_to_2_decimals

---
 pronto/pronto.py            | 7 +++----
 pronto/tests/pronto_test.py | 4 ++--
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/pronto/pronto.py b/pronto/pronto.py
index 3712a9a..fc22dde 100644
--- a/pronto/pronto.py
+++ b/pronto/pronto.py
@@ -43,12 +43,11 @@ def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list):
 	all_col_idx = exp_col_idx + add_col_idx
 	return df[all_col_idx]
 
-# set dataframe column format to 2 decimal points
+# set dataframe column format to 2 decimal points if float type
 def set_column_to_2_decimals(df: pandas.DataFrame, col_name: str):
 	if col_name in df.columns:
-		if df[col_name].dtype != float:
-			df[col_name] = df[col_name].astype(float)
-		df[col_name] = df[col_name].map('{:.2f}'.format)
+		if df[col_name].dtype == float:
+			df[col_name] = df[col_name].map('{:.2f}'.format)
 	else:
 		logging.info("Column {} not found in dataframe".format(col_name))
 	return df
diff --git a/pronto/tests/pronto_test.py b/pronto/tests/pronto_test.py
index 7192c32..e58028e 100644
--- a/pronto/tests/pronto_test.py
+++ b/pronto/tests/pronto_test.py
@@ -205,14 +205,14 @@ def test_normalize_column_index(inputs, exception, want):
             (
                 pandas.DataFrame({
                     "one": [1, 2],
-                    "two": ['3.666', '4.777'],
+                    "two": ['21.0%', '0.5%'],
                 }),
                 "two",
             ),
             does_not_raise(),
             pandas.DataFrame({
                 "one": [1, 2],
-                "two": ["3.67", "4.78"],
+                "two": ["21.0%", "0.5%"],
             }),
         ),
         (