From f142dc8663a97890715ae9336833655ba88f5b84 Mon Sep 17 00:00:00 2001 From: Marin <3746994+petricevich@users.noreply.github.com> Date: Mon, 4 May 2026 15:03:29 +0200 Subject: [PATCH] fix quadratic complexity in fragments_join When emphasis/strikethrough postprocessing leaves a long run of adjacent text tokens (e.g. unmatched intraword `_` delimiters), fragments_join merged them via pairwise `a + b` concatenation. Each step rebuilds the growing prefix, costing O(L*k) per run. Walk the whole run once, collect content into a list, and "".join into the last token, making the work O(L). The kept token is still the last in the run so its non-content attributes (markup, etc.) are preserved. --- markdown_it/rules_inline/fragments_join.py | 25 ++++++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/markdown_it/rules_inline/fragments_join.py b/markdown_it/rules_inline/fragments_join.py index f795c136..5eb88a14 100644 --- a/markdown_it/rules_inline/fragments_join.py +++ b/markdown_it/rules_inline/fragments_join.py @@ -29,14 +29,25 @@ def fragments_join(state: StateInline) -> None: and curr + 1 < maximum and state.tokens[curr + 1].type == "text" ): - # collapse two adjacent text nodes - state.tokens[curr + 1].content = ( - state.tokens[curr].content + state.tokens[curr + 1].content - ) - else: - if curr != last: - state.tokens[last] = state.tokens[curr] + # Collapse a run of adjacent text nodes in a single join, instead + # of pairwise `a + b` concatenation. The pairwise form is O(L*k) + # in the size of the run because each step rebuilds the growing + # prefix; "".join is O(L). + parts = [state.tokens[curr].content] + curr += 1 + while curr < maximum and state.tokens[curr].type == "text": + parts.append(state.tokens[curr].content) + curr += 1 + merged = state.tokens[curr - 1] + merged.content = "".join(parts) + merged.level = level + state.tokens[last] = merged last += 1 + continue + + if curr != last: + state.tokens[last] = state.tokens[curr] + last += 1 curr += 1 if curr != last: