Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 47 additions & 65 deletions Script/PRONTO.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
from decimal import Decimal
from copy import deepcopy
import pronto.pronto as pronto
import pandas
import math
from pdf2image import convert_from_path

runID = ""
Expand Down Expand Up @@ -731,83 +733,63 @@ def insert_image_to_ppt(DNA_sampleID,DNA_normal_sampleID,RNA_sampleID,DNA_image_
ppt.save(output_ppt_file)


def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide):
table_file = open(table_data_file)
lines = table_file.readlines()
if not lines:
def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,print_row_num,table_column_width,table_max_rows_per_slide):

# load table data
try:
table_data = pandas.read_csv(table_file, sep='\t')
except pandas.errors.EmptyDataError:
logging.warning("{} is empty".format(table_file))
return
first_line = lines[0]
first_line_cells = first_line.split('\t')

# add empty columns for missing header columns and move additional columns to the right
table_data = pronto.normalize_column_index(table_data, table_header)

# round floats to 2 decimal places
table_data = pronto.set_column_to_2_decimals(table_data, "AF_tumor_DNA")

# determine column and row number
cols = len(table_header)
header_not_exist_in_table = []
for n in range(len(table_header)):
if_exist = False
if(table_header[n] in first_line_cells):
if_exist = True
if not if_exist:
header_not_exist_in_table.append(n)
data_rows = []
for line in lines[1:]:
line_cells = line.split('\t')
if header_not_exist_in_table:
for num in header_not_exist_in_table:
line_cells.insert(num," ")
row_data = [cell.strip() for cell in line.split('\t')]
data_rows.append(row_data)
total_rows = len(data_rows)
rows = len(table_data)

# how many slides are required
if not table_max_rows_per_slide:
table_max_rows_per_slide = rows
total_slides_needed = math.ceil(rows / table_max_rows_per_slide)

# Add data to ppt
ppt = Presentation(output_ppt_file)
if(table_max_rows_per_slide is None or total_rows <= table_max_rows_per_slide):
total_slides_needed = 1
rows_per_page = total_rows
start_slide_index = slide_n
else:
total_slides_needed = (total_rows + table_max_rows_per_slide -1) // table_max_rows_per_slide
rows_per_page = table_max_rows_per_slide
start_slide_index = None

for page_num in range(total_slides_needed):
start_idx = page_num * rows_per_page
end_idx = min(start_idx + rows_per_page, total_rows)
current_page_data = data_rows[start_idx:end_idx]
current_page_rows = len(current_page_data)
if(start_slide_index is not None and page_num == 0):
slide = ppt.slides[slide_n - 1]
for slide_idx in range(total_slides_needed):
current_slide_data = pronto.get_slide_table_data(table_data, slide_idx, table_max_rows_per_slide)
if(total_slides_needed == 1):
shapes = ppt.slides[slide_n - 1].shapes
else:
slide = ppt.slides.add_slide(ppt.slide_layouts[6])
shapes = slide.shapes
shapes = ppt.slides.add_slide(ppt.slide_layouts[6]).shapes

# create new table on slide
left = Inches(left_t)
top = Inches(top_t)
width = Inches(width_t)
height = Inches(height_t)
table_rows = current_page_rows + 1
table_rows = len(current_slide_data)
table = shapes.add_table(table_rows,cols,left,top,width,height).table
for c in range(cols):
if table_column_width:
table.columns[c].width = Inches(table_column_width[c])
table.cell(0,c).text = table_header[c]
table.cell(0,c).text_frame.paragraphs[0].font.size = Pt(font_size)

for row_idx, row_data in enumerate(current_page_data, start=1):
for col_idx in range(cols):
table.cell(row_idx,col_idx).text = str(row_data[col_idx])
table.cell(row_idx,col_idx).text_frame.paragraphs[0].font.size = Pt(font_size)

textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25))
tf = textbox.text_frame
if(if_print_rowNo == True):
if(table_max_rows_per_slide is not None):
tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ", Page " + str(page_num+1) + "/" + str(total_slides_needed) + ")"
else:
tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ")"
else:
tf.paragraphs[0].text = table_name
tf.paragraphs[0].font.size = Pt(8)
tf.paragraphs[0].font.bold = True
tf.paragraphs[0].alignment = PP_ALIGN.CENTER

# if table_column_width is provided, set the column width
if len(table_column_width) == cols:
for col_idx, width in enumerate(table_column_width):
table.columns[col_idx].width = Inches(width)

# fill in the table data and set font size
for row_idx, row in enumerate(table.rows):
for col_idx, cell in enumerate(row.cells):
cell.text = current_slide_data[row_idx][col_idx]
cell.text_frame.paragraphs[0].font.size = Pt(font_size)

# add table title
pronto.add_table_name(shapes, table_name, left_h, top_h, width_h, 0.25, 8, print_row_num, slide_idx, total_slides_needed, rows)

ppt.save(output_ppt_file)
return total_rows
return rows


def update_ppt_variant_summary_table(data_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP_nr,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file):
Expand Down
51 changes: 51 additions & 0 deletions pronto/pronto.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import glob
import logging
import os
import pandas
import pptx

# get tumor mutational burden label
def get_tmb_string(val):
Expand All @@ -27,3 +29,52 @@ def glob_tsoppi_file(is_error, root, run_id, *path_units):
else:
logging.error("unsuccessful glob strings for {}:\n{}\n{}".format(run_id, glob_string_ous, glob_string_hus))
raise ValueError

# normalize dataframe to expected column indices
def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list):
# determine current, missing and additional column indices
curr_col_idx = df.columns.tolist()
miss_col_idx = list(set(exp_col_idx) - set(curr_col_idx))
add_col_idx = list(set(curr_col_idx) - set(exp_col_idx))
# add missing column indices
for i in miss_col_idx:
df[i] = ' '
# combine expected with additional to get all present column indices and rearrange columns accordingly, additional columns are moved to the right
all_col_idx = exp_col_idx + add_col_idx
return df[all_col_idx]

# set dataframe column format to 2 decimal points if float type
def set_column_to_2_decimals(df: pandas.DataFrame, col_name: str):
if col_name in df.columns:
if df[col_name].dtype == float:
df[col_name] = df[col_name].map('{:.2f}'.format)
else:
logging.info("Column {} not found in dataframe".format(col_name))
return df

# get data fitting on one slide based on slide index and max rows per slide
def get_slide_table_data(df: pandas.DataFrame, slide_idx: int, max_rows: int):
start = slide_idx * max_rows
stop = min(start + max_rows, len(df))
if start >= len(df):
return []
table = df.values.tolist()
header = [df.columns.tolist()]
table_data = header + table[start:stop]
return table_data

# add constructed table name to slide and format the textbox
def add_table_name(shapes: pptx.shapes.shapetree.SlideShapes, table_name: str, left: float, top: float, width: float, height: float, font_size: float, print_row_num: bool, slide_idx: int, total_slides: int, rows: int):

# add textbox to slide
paragraph = shapes.add_textbox(pptx.util.Inches(left), pptx.util.Inches(top), pptx.util.Inches(width), pptx.util.Inches(height)).text_frame.paragraphs[0]

# construct table name with optional row number and slide count
part_1 = ", Page {}/{}".format(slide_idx + 1, total_slides) if total_slides > 1 else ''
part_2 = " (N={}{})".format(rows, part_1) if print_row_num else ''
paragraph.text = "{}{}".format(table_name, part_2)

# font formatting and placement
paragraph.font.size = pptx.util.Pt(font_size)
paragraph.font.bold = True
paragraph.alignment = pptx.enum.text.PP_ALIGN.CENTER
Loading