diff --git a/plugin/markdown/markdown.go b/plugin/markdown/markdown.go index c6498beb4..e74b7e99a 100644 --- a/plugin/markdown/markdown.go +++ b/plugin/markdown/markdown.go @@ -389,15 +389,18 @@ func uniqueLowercase(strs []string) []string { } // truncateAtWord truncates a string at the last word boundary before maxLength. +// maxLength is treated as a rune (character) count to properly handle UTF-8 multi-byte characters. func truncateAtWord(s string, maxLength int) string { - if len(s) <= maxLength { + // Convert to runes to properly handle multi-byte UTF-8 characters + runes := []rune(s) + if len(runes) <= maxLength { return s } - // Truncate to max length - truncated := s[:maxLength] + // Truncate to max length (by character count, not byte count) + truncated := string(runes[:maxLength]) - // Find last space + // Find last space to avoid cutting in the middle of a word lastSpace := strings.LastIndexAny(truncated, " \t\n\r") if lastSpace > 0 { truncated = truncated[:lastSpace] diff --git a/plugin/markdown/markdown_test.go b/plugin/markdown/markdown_test.go index 21a9f08cf..01d87a7cc 100644 --- a/plugin/markdown/markdown_test.go +++ b/plugin/markdown/markdown_test.go @@ -382,6 +382,18 @@ func TestTruncateAtWord(t *testing.T) { maxLength: 10, expected: "supercalif ...", }, + { + name: "CJK characters without spaces", + input: "这是一个很长的中文句子没有空格的情况下也要正确处理", + maxLength: 15, + expected: "这是一个很长的中文句子没有空格 ...", + }, + { + name: "mixed CJK and Latin", + input: "这是中文mixed with English文字", + maxLength: 10, + expected: "这是中文mixed ...", + }, } for _, tt := range tests {