Update app.py
This commit is contained in:
parent
4ff2750a5c
commit
61303b8155
23
app.py
23
app.py
|
|
@ -99,7 +99,6 @@ def model_inference(
|
||||||
"pixel_values": inputs.pixel_values,
|
"pixel_values": inputs.pixel_values,
|
||||||
"attention_mask": inputs.attention_mask,
|
"attention_mask": inputs.attention_mask,
|
||||||
"num_return_sequences": 1,
|
"num_return_sequences": 1,
|
||||||
"no_repeat_ngram_size": 10,
|
|
||||||
"max_new_tokens": 8192,
|
"max_new_tokens": 8192,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -111,24 +110,30 @@ def model_inference(
|
||||||
|
|
||||||
yield "..."
|
yield "..."
|
||||||
buffer = ""
|
buffer = ""
|
||||||
doctag_output = ""
|
full_output = ""
|
||||||
|
|
||||||
for new_text in streamer:
|
for new_text in streamer:
|
||||||
if new_text != "<end_of_utterance>":
|
full_output += new_text
|
||||||
buffer += html.escape(new_text)
|
buffer += html.escape(new_text)
|
||||||
doctag_output += new_text
|
|
||||||
yield buffer
|
yield buffer
|
||||||
|
|
||||||
if any(tag in doctag_output for tag in ["<doctag>", "<otsl>", "<code>", "<formula>", "<chart>"]):
|
cleaned_output = full_output.replace("<end_of_utterance>", "").strip()
|
||||||
# final_output = buffer
|
|
||||||
# cleaned_output = final_output[len(inputs.input_ids):] if len(final_output) > prompt_length else final_output
|
if cleaned_output:
|
||||||
|
doctag_output = cleaned_output
|
||||||
|
yield cleaned_output
|
||||||
|
|
||||||
|
if any(tag in doctag_output for tag in ["<doctag>", "<otsl>", "<code>", "<chart>", "<formula>"]):
|
||||||
doc = DoclingDocument(name="Document")
|
doc = DoclingDocument(name="Document")
|
||||||
if "<chart>" in doctag_output:
|
if "<chart>" in doctag_output:
|
||||||
doctag_output = doctag_output.replace("<chart>", "<otsl>").replace("</chart>", "</otsl>")
|
doctag_output = doctag_output.replace("<chart>", "<otsl>").replace("</chart>", "</otsl>")
|
||||||
doctag_output = re.sub(r'(<loc_500>)(?!.*<loc_500>)<[^>]+>', r'\1', doctag_output)
|
doctag_output = re.sub(r'(<loc_500>)(?!.*<loc_500>)<[^>]+>', r'\1', doctag_output)
|
||||||
|
|
||||||
|
print(doctag_output)
|
||||||
|
|
||||||
doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctag_output], images)
|
doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctag_output], images)
|
||||||
doc.load_from_doctags(doctags_doc)
|
doc.load_from_doctags(doctags_doc)
|
||||||
|
print(doc)
|
||||||
yield f"**MD Output:**\n\n{doc.export_to_markdown()}"
|
yield f"**MD Output:**\n\n{doc.export_to_markdown()}"
|
||||||
|
|
||||||
examples=[[{"text": "Convert this page to docling.", "files": ["example_images/2d0fbcc50e88065a040a537b717620e964fb4453314b71d83f3ed3425addcef6.png"]}],
|
examples=[[{"text": "Convert this page to docling.", "files": ["example_images/2d0fbcc50e88065a040a537b717620e964fb4453314b71d83f3ed3425addcef6.png"]}],
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue