feat: extract title from first H1 heading into memo property (#5726)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
memoclaw 2026-03-16 11:05:03 +08:00 committed by GitHub
parent b8e9ee2b26
commit 1e688b2a5d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 154 additions and 11 deletions

View File

@ -138,6 +138,26 @@ func (s *service) ExtractTags(content []byte) ([]string, error) {
return uniquePreserveCase(tags), nil
}
// extractHeadingText extracts plain text content from a heading node.
func extractHeadingText(n gast.Node, source []byte) string {
var buf strings.Builder
for child := n.FirstChild(); child != nil; child = child.NextSibling() {
extractTextFromNode(child, source, &buf)
}
return buf.String()
}
// extractTextFromNode recursively extracts plain text from a node and its children.
func extractTextFromNode(n gast.Node, source []byte, buf *strings.Builder) {
if textNode, ok := n.(*gast.Text); ok {
buf.Write(textNode.Segment.Value(source))
return
}
for child := n.FirstChild(); child != nil; child = child.NextSibling() {
extractTextFromNode(child, source, buf)
}
}
// ExtractProperties computes boolean properties about the content.
func (s *service) ExtractProperties(content []byte) (*storepb.MemoPayload_Property, error) {
root, err := s.parse(content)
@ -146,12 +166,21 @@ func (s *service) ExtractProperties(content []byte) (*storepb.MemoPayload_Proper
}
prop := &storepb.MemoPayload_Property{}
firstBlockChecked := false
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
if !entering {
return gast.WalkContinue, nil
}
// Check if the first block-level child of the document is an H1 heading.
if !firstBlockChecked && n.Parent() != nil && n.Parent().Kind() == gast.KindDocument {
firstBlockChecked = true
if heading, ok := n.(*gast.Heading); ok && heading.Level == 1 {
prop.Title = extractHeadingText(n, content)
}
}
switch n.Kind() {
case gast.KindLink:
prop.HasLink = true
@ -302,6 +331,8 @@ func (s *service) ExtractAll(content []byte) (*ExtractedData, error) {
Property: &storepb.MemoPayload_Property{},
}
firstBlockChecked := false
// Single walk to collect all data
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
if !entering {
@ -313,6 +344,14 @@ func (s *service) ExtractAll(content []byte) (*ExtractedData, error) {
data.Tags = append(data.Tags, string(tagNode.Tag))
}
// Check if the first block-level child of the document is an H1 heading.
if !firstBlockChecked && n.Parent() != nil && n.Parent().Kind() == gast.KindDocument {
firstBlockChecked = true
if heading, ok := n.(*gast.Heading); ok && heading.Level == 1 {
data.Property.Title = extractHeadingText(n, content)
}
}
// Extract properties based on node kind
switch n.Kind() {
case gast.KindLink:

View File

@ -190,6 +190,7 @@ func TestExtractProperties(t *testing.T) {
hasCode bool
hasTasks bool
hasInc bool
title string
}{
{
name: "plain text",
@ -198,6 +199,7 @@ func TestExtractProperties(t *testing.T) {
hasCode: false,
hasTasks: false,
hasInc: false,
title: "",
},
{
name: "with link",
@ -206,6 +208,7 @@ func TestExtractProperties(t *testing.T) {
hasCode: false,
hasTasks: false,
hasInc: false,
title: "",
},
{
name: "with inline code",
@ -214,6 +217,7 @@ func TestExtractProperties(t *testing.T) {
hasCode: true,
hasTasks: false,
hasInc: false,
title: "",
},
{
name: "with code block",
@ -222,6 +226,7 @@ func TestExtractProperties(t *testing.T) {
hasCode: true,
hasTasks: false,
hasInc: false,
title: "",
},
{
name: "with completed task",
@ -230,6 +235,7 @@ func TestExtractProperties(t *testing.T) {
hasCode: false,
hasTasks: true,
hasInc: false,
title: "",
},
{
name: "with incomplete task",
@ -238,6 +244,7 @@ func TestExtractProperties(t *testing.T) {
hasCode: false,
hasTasks: true,
hasInc: true,
title: "",
},
{
name: "mixed tasks",
@ -246,6 +253,7 @@ func TestExtractProperties(t *testing.T) {
hasCode: false,
hasTasks: true,
hasInc: true,
title: "",
},
{
name: "everything",
@ -254,6 +262,32 @@ func TestExtractProperties(t *testing.T) {
hasCode: true,
hasTasks: true,
hasInc: true,
title: "Title",
},
{
name: "h1 as first node extracts title",
content: "# My Article Title\n\nBody text here.",
title: "My Article Title",
},
{
name: "h2 as first node does not extract title",
content: "## Sub Heading\n\nBody text.",
title: "",
},
{
name: "h1 not first node does not extract title",
content: "Some text\n\n# Heading Later",
title: "",
},
{
name: "h1 with inline formatting extracts plain text",
content: "# Title with **bold** and *italic*\n\nBody.",
title: "Title with bold and italic",
},
{
name: "empty content has no title",
content: "",
title: "",
},
}
@ -267,6 +301,41 @@ func TestExtractProperties(t *testing.T) {
assert.Equal(t, tt.hasCode, props.HasCode, "HasCode")
assert.Equal(t, tt.hasTasks, props.HasTaskList, "HasTaskList")
assert.Equal(t, tt.hasInc, props.HasIncompleteTasks, "HasIncompleteTasks")
assert.Equal(t, tt.title, props.Title, "Title")
})
}
}
func TestExtractAllTitle(t *testing.T) {
svc := NewService(WithTagExtension())
tests := []struct {
name string
content string
title string
}{
{
name: "h1 first node",
content: "# Article Title\n\nContent with #tag",
title: "Article Title",
},
{
name: "no h1",
content: "Just text with #tag",
title: "",
},
{
name: "h1 not first",
content: "Intro\n\n# Late Heading",
title: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
data, err := svc.ExtractAll([]byte(tt.content))
require.NoError(t, err)
assert.Equal(t, tt.title, data.Property.Title, "Title")
})
}
}

View File

@ -227,6 +227,8 @@ message Memo {
bool has_task_list = 2;
bool has_code = 3;
bool has_incomplete_tasks = 4;
// The title extracted from the first H1 heading, if present.
string title = 5;
}
}

View File

@ -1679,8 +1679,10 @@ type Memo_Property struct {
HasTaskList bool `protobuf:"varint,2,opt,name=has_task_list,json=hasTaskList,proto3" json:"has_task_list,omitempty"`
HasCode bool `protobuf:"varint,3,opt,name=has_code,json=hasCode,proto3" json:"has_code,omitempty"`
HasIncompleteTasks bool `protobuf:"varint,4,opt,name=has_incomplete_tasks,json=hasIncompleteTasks,proto3" json:"has_incomplete_tasks,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
// The title extracted from the first H1 heading, if present.
Title string `protobuf:"bytes,5,opt,name=title,proto3" json:"title,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *Memo_Property) Reset() {
@ -1741,6 +1743,13 @@ func (x *Memo_Property) GetHasIncompleteTasks() bool {
return false
}
func (x *Memo_Property) GetTitle() string {
if x != nil {
return x.Title
}
return ""
}
// Memo reference in relations.
type MemoRelation_Memo struct {
state protoimpl.MessageState `protogen:"open.v1"`
@ -1812,7 +1821,7 @@ const file_api_v1_memo_service_proto_rawDesc = "" +
"\rreaction_type\x18\x04 \x01(\tB\x03\xe0A\x02R\freactionType\x12@\n" +
"\vcreate_time\x18\x05 \x01(\v2\x1a.google.protobuf.TimestampB\x03\xe0A\x03R\n" +
"createTime:X\xeaAU\n" +
"\x15memos.api.v1/Reaction\x12!memos/{memo}/reactions/{reaction}\x1a\x04name*\treactions2\breaction\"\xd8\b\n" +
"\x15memos.api.v1/Reaction\x12!memos/{memo}/reactions/{reaction}\x1a\x04name*\treactions2\breaction\"\xee\b\n" +
"\x04Memo\x12\x17\n" +
"\x04name\x18\x01 \x01(\tB\x03\xe0A\bR\x04name\x12.\n" +
"\x05state\x18\x02 \x01(\x0e2\x13.memos.api.v1.StateB\x03\xe0A\x02R\x05state\x123\n" +
@ -1837,12 +1846,13 @@ const file_api_v1_memo_service_proto_rawDesc = "" +
"\x06parent\x18\x10 \x01(\tB\x19\xe0A\x03\xfaA\x13\n" +
"\x11memos.api.v1/MemoH\x00R\x06parent\x88\x01\x01\x12\x1d\n" +
"\asnippet\x18\x11 \x01(\tB\x03\xe0A\x03R\asnippet\x12<\n" +
"\blocation\x18\x12 \x01(\v2\x16.memos.api.v1.LocationB\x03\xe0A\x01H\x01R\blocation\x88\x01\x01\x1a\x96\x01\n" +
"\blocation\x18\x12 \x01(\v2\x16.memos.api.v1.LocationB\x03\xe0A\x01H\x01R\blocation\x88\x01\x01\x1a\xac\x01\n" +
"\bProperty\x12\x19\n" +
"\bhas_link\x18\x01 \x01(\bR\ahasLink\x12\"\n" +
"\rhas_task_list\x18\x02 \x01(\bR\vhasTaskList\x12\x19\n" +
"\bhas_code\x18\x03 \x01(\bR\ahasCode\x120\n" +
"\x14has_incomplete_tasks\x18\x04 \x01(\bR\x12hasIncompleteTasks:7\xeaA4\n" +
"\x14has_incomplete_tasks\x18\x04 \x01(\bR\x12hasIncompleteTasks\x12\x14\n" +
"\x05title\x18\x05 \x01(\tR\x05title:7\xeaA4\n" +
"\x11memos.api.v1/Memo\x12\fmemos/{memo}\x1a\x04name*\x05memos2\x04memoB\t\n" +
"\a_parentB\v\n" +
"\t_location\"u\n" +

View File

@ -2590,6 +2590,9 @@ components:
type: boolean
hasIncompleteTasks:
type: boolean
title:
type: string
description: The title extracted from the first H1 heading, if present.
description: Computed properties of a memo.
OAuth2Config:
type: object

View File

@ -88,8 +88,10 @@ type MemoPayload_Property struct {
HasTaskList bool `protobuf:"varint,2,opt,name=has_task_list,json=hasTaskList,proto3" json:"has_task_list,omitempty"`
HasCode bool `protobuf:"varint,3,opt,name=has_code,json=hasCode,proto3" json:"has_code,omitempty"`
HasIncompleteTasks bool `protobuf:"varint,4,opt,name=has_incomplete_tasks,json=hasIncompleteTasks,proto3" json:"has_incomplete_tasks,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
// The title extracted from the first H1 heading, if present.
Title string `protobuf:"bytes,5,opt,name=title,proto3" json:"title,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *MemoPayload_Property) Reset() {
@ -150,6 +152,13 @@ func (x *MemoPayload_Property) GetHasIncompleteTasks() bool {
return false
}
func (x *MemoPayload_Property) GetTitle() string {
if x != nil {
return x.Title
}
return ""
}
type MemoPayload_Location struct {
state protoimpl.MessageState `protogen:"open.v1"`
Placeholder string `protobuf:"bytes,1,opt,name=placeholder,proto3" json:"placeholder,omitempty"`
@ -214,16 +223,17 @@ var File_store_memo_proto protoreflect.FileDescriptor
const file_store_memo_proto_rawDesc = "" +
"\n" +
"\x10store/memo.proto\x12\vmemos.store\"\xa0\x03\n" +
"\x10store/memo.proto\x12\vmemos.store\"\xb6\x03\n" +
"\vMemoPayload\x12=\n" +
"\bproperty\x18\x01 \x01(\v2!.memos.store.MemoPayload.PropertyR\bproperty\x12=\n" +
"\blocation\x18\x02 \x01(\v2!.memos.store.MemoPayload.LocationR\blocation\x12\x12\n" +
"\x04tags\x18\x03 \x03(\tR\x04tags\x1a\x96\x01\n" +
"\x04tags\x18\x03 \x03(\tR\x04tags\x1a\xac\x01\n" +
"\bProperty\x12\x19\n" +
"\bhas_link\x18\x01 \x01(\bR\ahasLink\x12\"\n" +
"\rhas_task_list\x18\x02 \x01(\bR\vhasTaskList\x12\x19\n" +
"\bhas_code\x18\x03 \x01(\bR\ahasCode\x120\n" +
"\x14has_incomplete_tasks\x18\x04 \x01(\bR\x12hasIncompleteTasks\x1af\n" +
"\x14has_incomplete_tasks\x18\x04 \x01(\bR\x12hasIncompleteTasks\x12\x14\n" +
"\x05title\x18\x05 \x01(\tR\x05title\x1af\n" +
"\bLocation\x12 \n" +
"\vplaceholder\x18\x01 \x01(\tR\vplaceholder\x12\x1a\n" +
"\blatitude\x18\x02 \x01(\x01R\blatitude\x12\x1c\n" +

View File

@ -17,6 +17,8 @@ message MemoPayload {
bool has_task_list = 2;
bool has_code = 3;
bool has_incomplete_tasks = 4;
// The title extracted from the first H1 heading, if present.
string title = 5;
}
message Location {

View File

@ -192,6 +192,7 @@ func convertMemoPropertyFromStore(property *storepb.MemoPayload_Property) *v1pb.
HasTaskList: property.HasTaskList,
HasCode: property.HasCode,
HasIncompleteTasks: property.HasIncompleteTasks,
Title: property.Title,
}
}

File diff suppressed because one or more lines are too long