From 5b9eafdf6a92c3e52b8d461304da660266bdf955 Mon Sep 17 00:00:00 2001 From: Maurizio Dipierro Date: Wed, 19 Mar 2025 16:34:02 +0100 Subject: [PATCH] fix end of string --- app.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/app.py b/app.py index 10bdf28..be9ee73 100644 --- a/app.py +++ b/app.py @@ -116,11 +116,24 @@ def model_inference( buffer += html.escape(new_text) yield buffer + # After finishing the streamer loop: cleaned_output = full_output.replace("", "").strip() + final_output = cleaned_output - if cleaned_output: + # If markdown conversion is needed, combine it with cleaned_output + if any(tag in cleaned_output for tag in ["", "", "", "", ""]): doctag_output = cleaned_output - yield cleaned_output + if "" in doctag_output: + doctag_output = doctag_output.replace("", "").replace("", "") + doctag_output = re.sub(r'()(?!.*)<[^>]+>', r'\1', doctag_output) + doc = DoclingDocument(name="Document") + doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctag_output], images) + doc.load_from_doctags(doctags_doc) + final_output += "\n\n**MD Output:**\n\n" + doc.export_to_markdown() + + # Yield the final combined output only once + yield final_output + if any(tag in doctag_output for tag in ["", "", "", "", ""]): doc = DoclingDocument(name="Document")