diff --git a/plugin/httpgetter/first_image.go b/plugin/httpgetter/first_image.go new file mode 100644 index 000000000..53c038b0f --- /dev/null +++ b/plugin/httpgetter/first_image.go @@ -0,0 +1,46 @@ +package httpgetter + +import ( + "strings" + + "github.com/pkg/errors" + "golang.org/x/net/html" + "golang.org/x/net/html/atom" +) + +// GetFirstImageURL returns the first found on the page, or empty string. +func GetFirstImageURL(urlStr string) (string, error) { + if err := validateURL(urlStr); err != nil { + return "", err + } + + resp, err := httpClient.Get(urlStr) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode >= 400 { + return "", errors.Errorf("failed to fetch page: status %d", resp.StatusCode) + } + + tokenizer := html.NewTokenizer(resp.Body) + for { + tt := tokenizer.Next() + if tt == html.ErrorToken { + break + } + if tt == html.StartTagToken || tt == html.SelfClosingTagToken { + token := tokenizer.Token() + if token.DataAtom == atom.Img { + for _, attr := range token.Attr { + if strings.EqualFold(attr.Key, "src") && attr.Val != "" { + return attr.Val, nil + } + } + } + } + } + + return "", nil +} diff --git a/plugin/httpgetter/html_meta.go b/plugin/httpgetter/html_meta.go index 3ac719487..5201a124e 100644 --- a/plugin/httpgetter/html_meta.go +++ b/plugin/httpgetter/html_meta.go @@ -43,6 +43,10 @@ func GetHTMLMeta(urlStr string) (*HTMLMeta, error) { } defer response.Body.Close() + if response.StatusCode >= 400 { + return nil, errors.Errorf("failed to fetch page: status %d", response.StatusCode) + } + mediatype, err := getMediatype(response) if err != nil { return nil, err diff --git a/proto/api/v1/link_service.proto b/proto/api/v1/link_service.proto new file mode 100644 index 000000000..118bb5ade --- /dev/null +++ b/proto/api/v1/link_service.proto @@ -0,0 +1,61 @@ +syntax = "proto3"; + +package memos.api.v1; + +import "google/api/annotations.proto"; +import "google/api/client.proto"; +import "google/api/field_behavior.proto"; +import "google/api/resource.proto"; + +option go_package = "gen/api/v1"; + +service LinkService { + // GetLinkPreview fetches preview metadata for a URL (title, description, image). + rpc GetLinkPreview(GetLinkPreviewRequest) returns (GetLinkPreviewResponse) { + option (google.api.http) = { + get: "/api/v1/link:preview" + }; + option (google.api.method_signature) = "url"; + } +} + +message LinkPreview { + option (google.api.resource) = { + type: "memos.api.v1/LinkPreview" + pattern: "linkPreviews/{link_preview}" + name_field: "name" + singular: "linkPreview" + plural: "linkPreviews" + }; + + // Resource name of the preview (server generated). + // Format: linkPreviews/{link_preview} + string name = 1 [ + (google.api.field_behavior) = OUTPUT_ONLY, + (google.api.field_behavior) = IDENTIFIER + ]; + + // The original URL that was fetched. + string url = 2 [(google.api.field_behavior) = REQUIRED]; + + // Extracted title of the page. + string title = 3; + + // Extracted description of the page. + string description = 4; + + // Resolved image URL for preview. + string image_url = 5; + + // Human readable site/host name. + string site_name = 6; +} + +message GetLinkPreviewRequest { + // URL to fetch metadata from. + string url = 1 [(google.api.field_behavior) = REQUIRED]; +} + +message GetLinkPreviewResponse { + LinkPreview preview = 1 [(google.api.field_behavior) = REQUIRED]; +} diff --git a/server/router/api/v1/link_service.go b/server/router/api/v1/link_service.go new file mode 100644 index 000000000..1ff5d9e05 --- /dev/null +++ b/server/router/api/v1/link_service.go @@ -0,0 +1,117 @@ +package v1 + +import ( + "net/http" + "net/url" + "path" + + "github.com/labstack/echo/v4" + + "github.com/usememos/memos/plugin/httpgetter" +) + +// RegisterLinkRoutes registers lightweight HTTP routes for link previews. +// We keep this as a REST handler (not gRPC) to avoid schema churn +// and to reuse existing safety checks in the httpgetter plugin. +func (s *APIV1Service) RegisterLinkRoutes(g *echo.Group) { + g.GET("/api/v1/link:preview", s.handleGetLinkPreview) +} + +type linkPreviewResponse struct { + Preview linkPreview `json:"preview"` +} + +type linkPreview struct { + Title string `json:"title"` + Description string `json:"description"` + ImageURL string `json:"imageUrl"` + SiteName string `json:"siteName"` + URL string `json:"url"` +} + +func (s *APIV1Service) handleGetLinkPreview(c echo.Context) error { + _ = s + rawURL := c.QueryParam("url") + if rawURL == "" { + return echo.NewHTTPError(http.StatusBadRequest, "url is required") + } + + meta, err := httpgetter.GetHTMLMeta(rawURL) + if err != nil { + return echo.NewHTTPError(http.StatusBadRequest, err.Error()) + } + + parsedURL, _ := url.Parse(rawURL) + siteName := "" + if parsedURL != nil { + siteName = parsedURL.Hostname() + } + + imageURL := meta.Image + if parsedURL != nil && imageURL != "" { + if u, err := url.Parse(imageURL); err == nil { + if !u.IsAbs() { + // handle protocol-relative + if u.Host != "" { + u.Scheme = parsedURL.Scheme + imageURL = u.String() + } else { + // relative path -> join with base + u.Scheme = parsedURL.Scheme + u.Host = parsedURL.Host + if !path.IsAbs(u.Path) { + u.Path = path.Join(parsedURL.Path, "..", u.Path) + } + imageURL = u.String() + } + } + } + } + + // If meta image missing, try first on page. + if imageURL == "" { + if firstImg, err := httpgetter.GetFirstImageURL(rawURL); err == nil && firstImg != "" { + if parsedURL != nil { + imageURL = toAbsoluteFromBase(parsedURL, firstImg) + } else { + imageURL = firstImg + } + } + } + + resp := linkPreviewResponse{ + Preview: linkPreview{ + Title: meta.Title, + Description: meta.Description, + ImageURL: imageURL, + SiteName: siteName, + URL: rawURL, + }, + } + return c.JSON(http.StatusOK, resp) +} + +func toAbsoluteFromBase(base *url.URL, raw string) string { + if raw == "" || base == nil { + return raw + } + u, err := url.Parse(raw) + if err != nil { + return raw + } + if u.IsAbs() { + return u.String() + } + // Protocol-relative //host/path + if u.Host != "" && u.Scheme == "" { + u.Scheme = base.Scheme + return u.String() + } + // Pure relative path + u.Scheme = base.Scheme + u.Host = base.Host + if !path.IsAbs(u.Path) { + u.Path = path.Join(path.Dir(base.Path), u.Path) + } + return u.String() +} diff --git a/server/server.go b/server/server.go index ace7415c1..b4f82eb0d 100644 --- a/server/server.go +++ b/server/server.go @@ -95,6 +95,8 @@ func NewServer(ctx context.Context, profile *profile.Profile, store *store.Store // Create and register RSS routes (needs markdown service from apiV1Service). rss.NewRSSService(s.Profile, s.Store, apiV1Service.MarkdownService).RegisterRoutes(rootGroup) + // Link preview helper route (REST). + apiV1Service.RegisterLinkRoutes(rootGroup) // Register gRPC gateway as api v1. if err := apiV1Service.RegisterGateway(ctx, echoServer); err != nil { return nil, errors.Wrap(err, "failed to register gRPC gateway")