From 61303b8155cb6d389cc4453ca83cc6a01934667d Mon Sep 17 00:00:00 2001
From: Ahmed Nassar <asnassar@users.noreply.huggingface.co>
Date: Mon, 17 Mar 2025 13:20:29 +0000
Subject: [PATCH] Update app.py

---
 app.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/app.py b/app.py
index f467107..b857022 100644
--- a/app.py
+++ b/app.py
@@ -99,7 +99,6 @@ def model_inference(
         "pixel_values": inputs.pixel_values,
         "attention_mask": inputs.attention_mask,
         "num_return_sequences": 1,
-        "no_repeat_ngram_size": 10,
         "max_new_tokens": 8192,
     }
 
@@ -111,24 +110,30 @@ def model_inference(
 
     yield "..."
     buffer = ""
-    doctag_output = ""
+    full_output = ""
 
     for new_text in streamer:
-        if new_text != "<end_of_utterance>":
-            buffer += html.escape(new_text)
-            doctag_output += new_text
+        full_output += new_text
+        buffer += html.escape(new_text)
         yield buffer
 
-    if any(tag in doctag_output for tag in ["<doctag>", "<otsl>", "<code>", "<formula>", "<chart>"]):
-        # final_output = buffer
-        # cleaned_output = final_output[len(inputs.input_ids):] if len(final_output) > prompt_length else final_output
+    cleaned_output = full_output.replace("<end_of_utterance>", "").strip()
+
+    if cleaned_output:
+        doctag_output = cleaned_output
+        yield cleaned_output
+
+    if any(tag in doctag_output for tag in ["<doctag>", "<otsl>", "<code>", "<chart>", "<formula>"]):
         doc = DoclingDocument(name="Document")
         if "<chart>" in doctag_output:
             doctag_output = doctag_output.replace("<chart>", "<otsl>").replace("</chart>", "</otsl>")
             doctag_output = re.sub(r'(<loc_500>)(?!.*<loc_500>)<[^>]+>', r'\1', doctag_output)
-   
+
+        print(doctag_output)
+
         doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctag_output], images)
         doc.load_from_doctags(doctags_doc)
+        print(doc)
         yield f"**MD Output:**\n\n{doc.export_to_markdown()}"
 
 examples=[[{"text": "Convert this page to docling.", "files": ["example_images/2d0fbcc50e88065a040a537b717620e964fb4453314b71d83f3ed3425addcef6.png"]}],