mirror of https://github.com/usememos/memos.git
feat: extract title from first H1 heading into memo property (#5726)
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b8e9ee2b26
commit
1e688b2a5d
|
|
@ -138,6 +138,26 @@ func (s *service) ExtractTags(content []byte) ([]string, error) {
|
|||
return uniquePreserveCase(tags), nil
|
||||
}
|
||||
|
||||
// extractHeadingText extracts plain text content from a heading node.
|
||||
func extractHeadingText(n gast.Node, source []byte) string {
|
||||
var buf strings.Builder
|
||||
for child := n.FirstChild(); child != nil; child = child.NextSibling() {
|
||||
extractTextFromNode(child, source, &buf)
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// extractTextFromNode recursively extracts plain text from a node and its children.
|
||||
func extractTextFromNode(n gast.Node, source []byte, buf *strings.Builder) {
|
||||
if textNode, ok := n.(*gast.Text); ok {
|
||||
buf.Write(textNode.Segment.Value(source))
|
||||
return
|
||||
}
|
||||
for child := n.FirstChild(); child != nil; child = child.NextSibling() {
|
||||
extractTextFromNode(child, source, buf)
|
||||
}
|
||||
}
|
||||
|
||||
// ExtractProperties computes boolean properties about the content.
|
||||
func (s *service) ExtractProperties(content []byte) (*storepb.MemoPayload_Property, error) {
|
||||
root, err := s.parse(content)
|
||||
|
|
@ -146,12 +166,21 @@ func (s *service) ExtractProperties(content []byte) (*storepb.MemoPayload_Proper
|
|||
}
|
||||
|
||||
prop := &storepb.MemoPayload_Property{}
|
||||
firstBlockChecked := false
|
||||
|
||||
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return gast.WalkContinue, nil
|
||||
}
|
||||
|
||||
// Check if the first block-level child of the document is an H1 heading.
|
||||
if !firstBlockChecked && n.Parent() != nil && n.Parent().Kind() == gast.KindDocument {
|
||||
firstBlockChecked = true
|
||||
if heading, ok := n.(*gast.Heading); ok && heading.Level == 1 {
|
||||
prop.Title = extractHeadingText(n, content)
|
||||
}
|
||||
}
|
||||
|
||||
switch n.Kind() {
|
||||
case gast.KindLink:
|
||||
prop.HasLink = true
|
||||
|
|
@ -302,6 +331,8 @@ func (s *service) ExtractAll(content []byte) (*ExtractedData, error) {
|
|||
Property: &storepb.MemoPayload_Property{},
|
||||
}
|
||||
|
||||
firstBlockChecked := false
|
||||
|
||||
// Single walk to collect all data
|
||||
err = gast.Walk(root, func(n gast.Node, entering bool) (gast.WalkStatus, error) {
|
||||
if !entering {
|
||||
|
|
@ -313,6 +344,14 @@ func (s *service) ExtractAll(content []byte) (*ExtractedData, error) {
|
|||
data.Tags = append(data.Tags, string(tagNode.Tag))
|
||||
}
|
||||
|
||||
// Check if the first block-level child of the document is an H1 heading.
|
||||
if !firstBlockChecked && n.Parent() != nil && n.Parent().Kind() == gast.KindDocument {
|
||||
firstBlockChecked = true
|
||||
if heading, ok := n.(*gast.Heading); ok && heading.Level == 1 {
|
||||
data.Property.Title = extractHeadingText(n, content)
|
||||
}
|
||||
}
|
||||
|
||||
// Extract properties based on node kind
|
||||
switch n.Kind() {
|
||||
case gast.KindLink:
|
||||
|
|
|
|||
|
|
@ -190,6 +190,7 @@ func TestExtractProperties(t *testing.T) {
|
|||
hasCode bool
|
||||
hasTasks bool
|
||||
hasInc bool
|
||||
title string
|
||||
}{
|
||||
{
|
||||
name: "plain text",
|
||||
|
|
@ -198,6 +199,7 @@ func TestExtractProperties(t *testing.T) {
|
|||
hasCode: false,
|
||||
hasTasks: false,
|
||||
hasInc: false,
|
||||
title: "",
|
||||
},
|
||||
{
|
||||
name: "with link",
|
||||
|
|
@ -206,6 +208,7 @@ func TestExtractProperties(t *testing.T) {
|
|||
hasCode: false,
|
||||
hasTasks: false,
|
||||
hasInc: false,
|
||||
title: "",
|
||||
},
|
||||
{
|
||||
name: "with inline code",
|
||||
|
|
@ -214,6 +217,7 @@ func TestExtractProperties(t *testing.T) {
|
|||
hasCode: true,
|
||||
hasTasks: false,
|
||||
hasInc: false,
|
||||
title: "",
|
||||
},
|
||||
{
|
||||
name: "with code block",
|
||||
|
|
@ -222,6 +226,7 @@ func TestExtractProperties(t *testing.T) {
|
|||
hasCode: true,
|
||||
hasTasks: false,
|
||||
hasInc: false,
|
||||
title: "",
|
||||
},
|
||||
{
|
||||
name: "with completed task",
|
||||
|
|
@ -230,6 +235,7 @@ func TestExtractProperties(t *testing.T) {
|
|||
hasCode: false,
|
||||
hasTasks: true,
|
||||
hasInc: false,
|
||||
title: "",
|
||||
},
|
||||
{
|
||||
name: "with incomplete task",
|
||||
|
|
@ -238,6 +244,7 @@ func TestExtractProperties(t *testing.T) {
|
|||
hasCode: false,
|
||||
hasTasks: true,
|
||||
hasInc: true,
|
||||
title: "",
|
||||
},
|
||||
{
|
||||
name: "mixed tasks",
|
||||
|
|
@ -246,6 +253,7 @@ func TestExtractProperties(t *testing.T) {
|
|||
hasCode: false,
|
||||
hasTasks: true,
|
||||
hasInc: true,
|
||||
title: "",
|
||||
},
|
||||
{
|
||||
name: "everything",
|
||||
|
|
@ -254,6 +262,32 @@ func TestExtractProperties(t *testing.T) {
|
|||
hasCode: true,
|
||||
hasTasks: true,
|
||||
hasInc: true,
|
||||
title: "Title",
|
||||
},
|
||||
{
|
||||
name: "h1 as first node extracts title",
|
||||
content: "# My Article Title\n\nBody text here.",
|
||||
title: "My Article Title",
|
||||
},
|
||||
{
|
||||
name: "h2 as first node does not extract title",
|
||||
content: "## Sub Heading\n\nBody text.",
|
||||
title: "",
|
||||
},
|
||||
{
|
||||
name: "h1 not first node does not extract title",
|
||||
content: "Some text\n\n# Heading Later",
|
||||
title: "",
|
||||
},
|
||||
{
|
||||
name: "h1 with inline formatting extracts plain text",
|
||||
content: "# Title with **bold** and *italic*\n\nBody.",
|
||||
title: "Title with bold and italic",
|
||||
},
|
||||
{
|
||||
name: "empty content has no title",
|
||||
content: "",
|
||||
title: "",
|
||||
},
|
||||
}
|
||||
|
||||
|
|
@ -267,6 +301,41 @@ func TestExtractProperties(t *testing.T) {
|
|||
assert.Equal(t, tt.hasCode, props.HasCode, "HasCode")
|
||||
assert.Equal(t, tt.hasTasks, props.HasTaskList, "HasTaskList")
|
||||
assert.Equal(t, tt.hasInc, props.HasIncompleteTasks, "HasIncompleteTasks")
|
||||
assert.Equal(t, tt.title, props.Title, "Title")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractAllTitle(t *testing.T) {
|
||||
svc := NewService(WithTagExtension())
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
content string
|
||||
title string
|
||||
}{
|
||||
{
|
||||
name: "h1 first node",
|
||||
content: "# Article Title\n\nContent with #tag",
|
||||
title: "Article Title",
|
||||
},
|
||||
{
|
||||
name: "no h1",
|
||||
content: "Just text with #tag",
|
||||
title: "",
|
||||
},
|
||||
{
|
||||
name: "h1 not first",
|
||||
content: "Intro\n\n# Late Heading",
|
||||
title: "",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
data, err := svc.ExtractAll([]byte(tt.content))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, tt.title, data.Property.Title, "Title")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -227,6 +227,8 @@ message Memo {
|
|||
bool has_task_list = 2;
|
||||
bool has_code = 3;
|
||||
bool has_incomplete_tasks = 4;
|
||||
// The title extracted from the first H1 heading, if present.
|
||||
string title = 5;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1679,8 +1679,10 @@ type Memo_Property struct {
|
|||
HasTaskList bool `protobuf:"varint,2,opt,name=has_task_list,json=hasTaskList,proto3" json:"has_task_list,omitempty"`
|
||||
HasCode bool `protobuf:"varint,3,opt,name=has_code,json=hasCode,proto3" json:"has_code,omitempty"`
|
||||
HasIncompleteTasks bool `protobuf:"varint,4,opt,name=has_incomplete_tasks,json=hasIncompleteTasks,proto3" json:"has_incomplete_tasks,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
// The title extracted from the first H1 heading, if present.
|
||||
Title string `protobuf:"bytes,5,opt,name=title,proto3" json:"title,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
|
||||
func (x *Memo_Property) Reset() {
|
||||
|
|
@ -1741,6 +1743,13 @@ func (x *Memo_Property) GetHasIncompleteTasks() bool {
|
|||
return false
|
||||
}
|
||||
|
||||
func (x *Memo_Property) GetTitle() string {
|
||||
if x != nil {
|
||||
return x.Title
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Memo reference in relations.
|
||||
type MemoRelation_Memo struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
|
|
@ -1812,7 +1821,7 @@ const file_api_v1_memo_service_proto_rawDesc = "" +
|
|||
"\rreaction_type\x18\x04 \x01(\tB\x03\xe0A\x02R\freactionType\x12@\n" +
|
||||
"\vcreate_time\x18\x05 \x01(\v2\x1a.google.protobuf.TimestampB\x03\xe0A\x03R\n" +
|
||||
"createTime:X\xeaAU\n" +
|
||||
"\x15memos.api.v1/Reaction\x12!memos/{memo}/reactions/{reaction}\x1a\x04name*\treactions2\breaction\"\xd8\b\n" +
|
||||
"\x15memos.api.v1/Reaction\x12!memos/{memo}/reactions/{reaction}\x1a\x04name*\treactions2\breaction\"\xee\b\n" +
|
||||
"\x04Memo\x12\x17\n" +
|
||||
"\x04name\x18\x01 \x01(\tB\x03\xe0A\bR\x04name\x12.\n" +
|
||||
"\x05state\x18\x02 \x01(\x0e2\x13.memos.api.v1.StateB\x03\xe0A\x02R\x05state\x123\n" +
|
||||
|
|
@ -1837,12 +1846,13 @@ const file_api_v1_memo_service_proto_rawDesc = "" +
|
|||
"\x06parent\x18\x10 \x01(\tB\x19\xe0A\x03\xfaA\x13\n" +
|
||||
"\x11memos.api.v1/MemoH\x00R\x06parent\x88\x01\x01\x12\x1d\n" +
|
||||
"\asnippet\x18\x11 \x01(\tB\x03\xe0A\x03R\asnippet\x12<\n" +
|
||||
"\blocation\x18\x12 \x01(\v2\x16.memos.api.v1.LocationB\x03\xe0A\x01H\x01R\blocation\x88\x01\x01\x1a\x96\x01\n" +
|
||||
"\blocation\x18\x12 \x01(\v2\x16.memos.api.v1.LocationB\x03\xe0A\x01H\x01R\blocation\x88\x01\x01\x1a\xac\x01\n" +
|
||||
"\bProperty\x12\x19\n" +
|
||||
"\bhas_link\x18\x01 \x01(\bR\ahasLink\x12\"\n" +
|
||||
"\rhas_task_list\x18\x02 \x01(\bR\vhasTaskList\x12\x19\n" +
|
||||
"\bhas_code\x18\x03 \x01(\bR\ahasCode\x120\n" +
|
||||
"\x14has_incomplete_tasks\x18\x04 \x01(\bR\x12hasIncompleteTasks:7\xeaA4\n" +
|
||||
"\x14has_incomplete_tasks\x18\x04 \x01(\bR\x12hasIncompleteTasks\x12\x14\n" +
|
||||
"\x05title\x18\x05 \x01(\tR\x05title:7\xeaA4\n" +
|
||||
"\x11memos.api.v1/Memo\x12\fmemos/{memo}\x1a\x04name*\x05memos2\x04memoB\t\n" +
|
||||
"\a_parentB\v\n" +
|
||||
"\t_location\"u\n" +
|
||||
|
|
|
|||
|
|
@ -2590,6 +2590,9 @@ components:
|
|||
type: boolean
|
||||
hasIncompleteTasks:
|
||||
type: boolean
|
||||
title:
|
||||
type: string
|
||||
description: The title extracted from the first H1 heading, if present.
|
||||
description: Computed properties of a memo.
|
||||
OAuth2Config:
|
||||
type: object
|
||||
|
|
|
|||
|
|
@ -88,8 +88,10 @@ type MemoPayload_Property struct {
|
|||
HasTaskList bool `protobuf:"varint,2,opt,name=has_task_list,json=hasTaskList,proto3" json:"has_task_list,omitempty"`
|
||||
HasCode bool `protobuf:"varint,3,opt,name=has_code,json=hasCode,proto3" json:"has_code,omitempty"`
|
||||
HasIncompleteTasks bool `protobuf:"varint,4,opt,name=has_incomplete_tasks,json=hasIncompleteTasks,proto3" json:"has_incomplete_tasks,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
// The title extracted from the first H1 heading, if present.
|
||||
Title string `protobuf:"bytes,5,opt,name=title,proto3" json:"title,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
|
||||
func (x *MemoPayload_Property) Reset() {
|
||||
|
|
@ -150,6 +152,13 @@ func (x *MemoPayload_Property) GetHasIncompleteTasks() bool {
|
|||
return false
|
||||
}
|
||||
|
||||
func (x *MemoPayload_Property) GetTitle() string {
|
||||
if x != nil {
|
||||
return x.Title
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type MemoPayload_Location struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
Placeholder string `protobuf:"bytes,1,opt,name=placeholder,proto3" json:"placeholder,omitempty"`
|
||||
|
|
@ -214,16 +223,17 @@ var File_store_memo_proto protoreflect.FileDescriptor
|
|||
|
||||
const file_store_memo_proto_rawDesc = "" +
|
||||
"\n" +
|
||||
"\x10store/memo.proto\x12\vmemos.store\"\xa0\x03\n" +
|
||||
"\x10store/memo.proto\x12\vmemos.store\"\xb6\x03\n" +
|
||||
"\vMemoPayload\x12=\n" +
|
||||
"\bproperty\x18\x01 \x01(\v2!.memos.store.MemoPayload.PropertyR\bproperty\x12=\n" +
|
||||
"\blocation\x18\x02 \x01(\v2!.memos.store.MemoPayload.LocationR\blocation\x12\x12\n" +
|
||||
"\x04tags\x18\x03 \x03(\tR\x04tags\x1a\x96\x01\n" +
|
||||
"\x04tags\x18\x03 \x03(\tR\x04tags\x1a\xac\x01\n" +
|
||||
"\bProperty\x12\x19\n" +
|
||||
"\bhas_link\x18\x01 \x01(\bR\ahasLink\x12\"\n" +
|
||||
"\rhas_task_list\x18\x02 \x01(\bR\vhasTaskList\x12\x19\n" +
|
||||
"\bhas_code\x18\x03 \x01(\bR\ahasCode\x120\n" +
|
||||
"\x14has_incomplete_tasks\x18\x04 \x01(\bR\x12hasIncompleteTasks\x1af\n" +
|
||||
"\x14has_incomplete_tasks\x18\x04 \x01(\bR\x12hasIncompleteTasks\x12\x14\n" +
|
||||
"\x05title\x18\x05 \x01(\tR\x05title\x1af\n" +
|
||||
"\bLocation\x12 \n" +
|
||||
"\vplaceholder\x18\x01 \x01(\tR\vplaceholder\x12\x1a\n" +
|
||||
"\blatitude\x18\x02 \x01(\x01R\blatitude\x12\x1c\n" +
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ message MemoPayload {
|
|||
bool has_task_list = 2;
|
||||
bool has_code = 3;
|
||||
bool has_incomplete_tasks = 4;
|
||||
// The title extracted from the first H1 heading, if present.
|
||||
string title = 5;
|
||||
}
|
||||
|
||||
message Location {
|
||||
|
|
|
|||
|
|
@ -192,6 +192,7 @@ func convertMemoPropertyFromStore(property *storepb.MemoPayload_Property) *v1pb.
|
|||
HasTaskList: property.HasTaskList,
|
||||
HasCode: property.HasCode,
|
||||
HasIncompleteTasks: property.HasIncompleteTasks,
|
||||
Title: property.Title,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue