package markdown import ( "bytes" "strings" "github.com/yuin/goldmark" gast "github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/extension" east "github.com/yuin/goldmark/extension/ast" "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/text" mast "github.com/usememos/memos/plugin/markdown/ast" "github.com/usememos/memos/plugin/markdown/extensions" "github.com/usememos/memos/plugin/markdown/renderer" storepb "github.com/usememos/memos/proto/gen/store" ) // ExtractedData contains all metadata extracted from markdown in a single pass type ExtractedData struct { Tags []string Property *storepb.MemoPayload_Property References []string } // Service handles markdown metadata extraction. // It uses goldmark to parse markdown and extract tags, properties, and snippets. // HTML rendering is primarily done on frontend using markdown-it, but backend provides // RenderHTML for RSS feeds and other server-side rendering needs. type Service interface { // ExtractAll extracts tags, properties, and references in a single parse (most efficient) ExtractAll(content []byte) (*ExtractedData, error) // ExtractTags returns all #tags found in content ExtractTags(content []byte) ([]string, error) // ExtractProperties computes boolean properties ExtractProperties(content []byte) (*storepb.MemoPayload_Property, error) // ExtractReferences returns all wikilink references ([[...]]) found in content ExtractReferences(content []byte) ([]string, error) // RenderMarkdown renders goldmark AST back to markdown text RenderMarkdown(content []byte) (string, error) // RenderHTML renders markdown content to HTML RenderHTML(content []byte) (string, error) // GenerateSnippet creates plain text summary GenerateSnippet(content []byte, maxLength int) (string, error) // ValidateContent checks for syntax errors ValidateContent(content []byte) error // RenameTag renames all occurrences of oldTag to newTag in content RenameTag(content []byte, oldTag, newTag string) (string, error) } // service implements the Service interface type service struct { md goldmark.Markdown } // Option configures the markdown service type Option func(*config) type config struct { enableTags bool enableWikilink bool } // WithTagExtension enables #tag parsing func WithTagExtension() Option { return func(c *config) { c.enableTags = true } } // WithWikilinkExtension enables [[wikilink]] parsing func WithWikilinkExtension() Option { return func(c *config) { c.enableWikilink = true } } // NewService creates a new markdown service with the given options func NewService(opts ...Option) Service { cfg := &config{} for _, opt := range opts { opt(cfg) } exts := []goldmark.Extender{ extension.GFM, // GitHub Flavored Markdown (tables, strikethrough, task lists, autolinks) } // Add custom extensions based on config if cfg.enableTags { exts = append(exts, extensions.TagExtension) } if cfg.enableWikilink { exts = append(exts, extensions.WikilinkExtension) } md := goldmark.New( goldmark.WithExtensions(exts...), goldmark.WithParserOptions( parser.WithAutoHeadingID(), // Generate heading IDs ), ) return &service{ md: md, } } // parse is an internal helper to parse content into AST func (s *service) parse(content []byte) (gast.Node, error) { reader := text.NewReader(content) doc := s.md.Parser().Parse(reader) return doc, nil } // ExtractTags returns all #tags found in content func (s *service) ExtractTags(content []byte) ([]string, error) { root, err := s.parse(content) if err != nil { return nil, err } var tags []string // Walk the AST to find tag nodes err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) { if !entering { return gast.WalkContinue, nil } // Check for custom TagNode if tagNode, ok := n.(*mast.TagNode); ok { tags = append(tags, string(tagNode.Tag)) } return gast.WalkContinue, nil }) if err != nil { return nil, err } // Deduplicate and normalize tags return uniqueLowercase(tags), nil } // ExtractProperties computes boolean properties about the content func (s *service) ExtractProperties(content []byte) (*storepb.MemoPayload_Property, error) { root, err := s.parse(content) if err != nil { return nil, err } prop := &storepb.MemoPayload_Property{} err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) { if !entering { return gast.WalkContinue, nil } switch n.Kind() { case gast.KindLink, mast.KindWikilink: prop.HasLink = true case mast.KindWikilink: prop.HasLink = true case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan: prop.HasCode = true case gast.KindCodeSpan: prop.HasCode = true case east.KindTaskCheckBox: prop.HasTaskList = true if checkBox, ok := n.(*east.TaskCheckBox); ok { if !checkBox.IsChecked { prop.HasIncompleteTasks = true } } } return gast.WalkContinue, nil }) if err != nil { return nil, err } return prop, nil } // ExtractReferences returns all wikilink references found in content func (s *service) ExtractReferences(content []byte) ([]string, error) { root, err := s.parse(content) if err != nil { return nil, err } references := []string{} // Initialize to empty slice, not nil // Walk the AST to find wikilink nodes err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) { if !entering { return gast.WalkContinue, nil } // Check for custom WikilinkNode if wikilinkNode, ok := n.(*mast.WikilinkNode); ok { references = append(references, string(wikilinkNode.Target)) } return gast.WalkContinue, nil }) if err != nil { return nil, err } return references, nil } // RenderMarkdown renders goldmark AST back to markdown text func (s *service) RenderMarkdown(content []byte) (string, error) { root, err := s.parse(content) if err != nil { return "", err } mdRenderer := renderer.NewMarkdownRenderer() return mdRenderer.Render(root, content), nil } // RenderHTML renders markdown content to HTML using goldmark's built-in HTML renderer func (s *service) RenderHTML(content []byte) (string, error) { var buf bytes.Buffer if err := s.md.Convert(content, &buf); err != nil { return "", err } return buf.String(), nil } // GenerateSnippet creates a plain text summary from markdown content func (s *service) GenerateSnippet(content []byte, maxLength int) (string, error) { root, err := s.parse(content) if err != nil { return "", err } var buf strings.Builder var lastNodeWasBlock bool err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) { if entering { // Skip code blocks and code spans entirely switch n.Kind() { case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan: return gast.WalkSkipChildren, nil } // Add space before block elements (except first) switch n.Kind() { case gast.KindParagraph, gast.KindHeading, gast.KindListItem: if buf.Len() > 0 && lastNodeWasBlock { buf.WriteByte(' ') } } } if !entering { // Mark that we just exited a block element switch n.Kind() { case gast.KindParagraph, gast.KindHeading, gast.KindListItem: lastNodeWasBlock = true } return gast.WalkContinue, nil } lastNodeWasBlock = false // Only extract plain text nodes if textNode, ok := n.(*gast.Text); ok { segment := textNode.Segment buf.Write(segment.Value(content)) // Add space if this is a soft line break if textNode.SoftLineBreak() { buf.WriteByte(' ') } } // Stop walking if we've exceeded double the max length // (we'll truncate precisely later) if buf.Len() > maxLength*2 { return gast.WalkStop, nil } return gast.WalkContinue, nil }) if err != nil { return "", err } snippet := buf.String() // Truncate at word boundary if needed if len(snippet) > maxLength { snippet = truncateAtWord(snippet, maxLength) } return strings.TrimSpace(snippet), nil } // ValidateContent checks if the markdown content is valid func (s *service) ValidateContent(content []byte) error { // Try to parse the content _, err := s.parse(content) return err } // ExtractAll extracts tags, properties, and references in a single parse for efficiency func (s *service) ExtractAll(content []byte) (*ExtractedData, error) { root, err := s.parse(content) if err != nil { return nil, err } data := &ExtractedData{ Tags: []string{}, Property: &storepb.MemoPayload_Property{}, References: []string{}, } // Single walk to collect all data err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) { if !entering { return gast.WalkContinue, nil } // Extract tags if tagNode, ok := n.(*mast.TagNode); ok { data.Tags = append(data.Tags, string(tagNode.Tag)) } // Extract references (wikilinks) if wikilinkNode, ok := n.(*mast.WikilinkNode); ok { data.References = append(data.References, string(wikilinkNode.Target)) } // Extract properties based on node kind switch n.Kind() { case gast.KindLink, mast.KindWikilink: data.Property.HasLink = true case mast.KindWikilink: data.Property.HasLink = true case gast.KindCodeBlock, gast.KindFencedCodeBlock, gast.KindCodeSpan: data.Property.HasCode = true case gast.KindCodeSpan: data.Property.HasCode = true case east.KindTaskCheckBox: data.Property.HasTaskList = true if checkBox, ok := n.(*east.TaskCheckBox); ok { if !checkBox.IsChecked { data.Property.HasIncompleteTasks = true } } } return gast.WalkContinue, nil }) if err != nil { return nil, err } // Deduplicate and normalize tags data.Tags = uniqueLowercase(data.Tags) return data, nil } // RenameTag renames all occurrences of oldTag to newTag in content func (s *service) RenameTag(content []byte, oldTag, newTag string) (string, error) { root, err := s.parse(content) if err != nil { return "", err } // Walk the AST to find and rename tag nodes err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) { if !entering { return gast.WalkContinue, nil } // Check for custom TagNode and rename if it matches if tagNode, ok := n.(*mast.TagNode); ok { if string(tagNode.Tag) == oldTag { tagNode.Tag = []byte(newTag) } } return gast.WalkContinue, nil }) if err != nil { return "", err } // Render back to markdown using the already-parsed AST mdRenderer := renderer.NewMarkdownRenderer() return mdRenderer.Render(root, content), nil } // uniqueLowercase returns unique lowercase strings from input func uniqueLowercase(strs []string) []string { seen := make(map[string]bool) var result []string for _, s := range strs { lower := strings.ToLower(s) if !seen[lower] { seen[lower] = true result = append(result, lower) } } return result } // truncateAtWord truncates a string at the last word boundary before maxLength func truncateAtWord(s string, maxLength int) string { if len(s) <= maxLength { return s } // Truncate to max length truncated := s[:maxLength] // Find last space lastSpace := strings.LastIndexAny(truncated, " \t\n\r") if lastSpace > 0 { truncated = truncated[:lastSpace] } return truncated + " ..." }