feat(api): add link preview endpoint with metadata fallback

- add REST GET /api/v1/link:preview to return title/description/image

- resolve relative/protocol-relative image URLs; fallback to first <img> when og:image missing

- add first-image scraper and guard HTML meta fetch against 4xx/5xx responses

- register route in server startup; add LinkService proto stub for future gateway/grpc use (not yet wired)
This commit is contained in:
ayasy-el 2025-12-14 16:26:12 +07:00
parent 65a19df4be
commit bc4ae02bcd
5 changed files with 230 additions and 0 deletions

View File

@ -0,0 +1,46 @@
package httpgetter
import (
"strings"
"github.com/pkg/errors"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
// GetFirstImageURL returns the first <img src> found on the page, or empty string.
func GetFirstImageURL(urlStr string) (string, error) {
if err := validateURL(urlStr); err != nil {
return "", err
}
resp, err := httpClient.Get(urlStr)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
return "", errors.Errorf("failed to fetch page: status %d", resp.StatusCode)
}
tokenizer := html.NewTokenizer(resp.Body)
for {
tt := tokenizer.Next()
if tt == html.ErrorToken {
break
}
if tt == html.StartTagToken || tt == html.SelfClosingTagToken {
token := tokenizer.Token()
if token.DataAtom == atom.Img {
for _, attr := range token.Attr {
if strings.EqualFold(attr.Key, "src") && attr.Val != "" {
return attr.Val, nil
}
}
}
}
}
return "", nil
}

View File

@ -43,6 +43,10 @@ func GetHTMLMeta(urlStr string) (*HTMLMeta, error) {
}
defer response.Body.Close()
if response.StatusCode >= 400 {
return nil, errors.Errorf("failed to fetch page: status %d", response.StatusCode)
}
mediatype, err := getMediatype(response)
if err != nil {
return nil, err

View File

@ -0,0 +1,61 @@
syntax = "proto3";
package memos.api.v1;
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
option go_package = "gen/api/v1";
service LinkService {
// GetLinkPreview fetches preview metadata for a URL (title, description, image).
rpc GetLinkPreview(GetLinkPreviewRequest) returns (GetLinkPreviewResponse) {
option (google.api.http) = {
get: "/api/v1/link:preview"
};
option (google.api.method_signature) = "url";
}
}
message LinkPreview {
option (google.api.resource) = {
type: "memos.api.v1/LinkPreview"
pattern: "linkPreviews/{link_preview}"
name_field: "name"
singular: "linkPreview"
plural: "linkPreviews"
};
// Resource name of the preview (server generated).
// Format: linkPreviews/{link_preview}
string name = 1 [
(google.api.field_behavior) = OUTPUT_ONLY,
(google.api.field_behavior) = IDENTIFIER
];
// The original URL that was fetched.
string url = 2 [(google.api.field_behavior) = REQUIRED];
// Extracted title of the page.
string title = 3;
// Extracted description of the page.
string description = 4;
// Resolved image URL for preview.
string image_url = 5;
// Human readable site/host name.
string site_name = 6;
}
message GetLinkPreviewRequest {
// URL to fetch metadata from.
string url = 1 [(google.api.field_behavior) = REQUIRED];
}
message GetLinkPreviewResponse {
LinkPreview preview = 1 [(google.api.field_behavior) = REQUIRED];
}

View File

@ -0,0 +1,117 @@
package v1
import (
"net/http"
"net/url"
"path"
"github.com/labstack/echo/v4"
"github.com/usememos/memos/plugin/httpgetter"
)
// RegisterLinkRoutes registers lightweight HTTP routes for link previews.
// We keep this as a REST handler (not gRPC) to avoid schema churn
// and to reuse existing safety checks in the httpgetter plugin.
func (s *APIV1Service) RegisterLinkRoutes(g *echo.Group) {
g.GET("/api/v1/link:preview", s.handleGetLinkPreview)
}
type linkPreviewResponse struct {
Preview linkPreview `json:"preview"`
}
type linkPreview struct {
Title string `json:"title"`
Description string `json:"description"`
ImageURL string `json:"imageUrl"`
SiteName string `json:"siteName"`
URL string `json:"url"`
}
func (s *APIV1Service) handleGetLinkPreview(c echo.Context) error {
_ = s
rawURL := c.QueryParam("url")
if rawURL == "" {
return echo.NewHTTPError(http.StatusBadRequest, "url is required")
}
meta, err := httpgetter.GetHTMLMeta(rawURL)
if err != nil {
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
}
parsedURL, _ := url.Parse(rawURL)
siteName := ""
if parsedURL != nil {
siteName = parsedURL.Hostname()
}
imageURL := meta.Image
if parsedURL != nil && imageURL != "" {
if u, err := url.Parse(imageURL); err == nil {
if !u.IsAbs() {
// handle protocol-relative
if u.Host != "" {
u.Scheme = parsedURL.Scheme
imageURL = u.String()
} else {
// relative path -> join with base
u.Scheme = parsedURL.Scheme
u.Host = parsedURL.Host
if !path.IsAbs(u.Path) {
u.Path = path.Join(parsedURL.Path, "..", u.Path)
}
imageURL = u.String()
}
}
}
}
// If meta image missing, try first <img> on page.
if imageURL == "" {
if firstImg, err := httpgetter.GetFirstImageURL(rawURL); err == nil && firstImg != "" {
if parsedURL != nil {
imageURL = toAbsoluteFromBase(parsedURL, firstImg)
} else {
imageURL = firstImg
}
}
}
resp := linkPreviewResponse{
Preview: linkPreview{
Title: meta.Title,
Description: meta.Description,
ImageURL: imageURL,
SiteName: siteName,
URL: rawURL,
},
}
return c.JSON(http.StatusOK, resp)
}
func toAbsoluteFromBase(base *url.URL, raw string) string {
if raw == "" || base == nil {
return raw
}
u, err := url.Parse(raw)
if err != nil {
return raw
}
if u.IsAbs() {
return u.String()
}
// Protocol-relative //host/path
if u.Host != "" && u.Scheme == "" {
u.Scheme = base.Scheme
return u.String()
}
// Pure relative path
u.Scheme = base.Scheme
u.Host = base.Host
if !path.IsAbs(u.Path) {
u.Path = path.Join(path.Dir(base.Path), u.Path)
}
return u.String()
}

View File

@ -95,6 +95,8 @@ func NewServer(ctx context.Context, profile *profile.Profile, store *store.Store
// Create and register RSS routes (needs markdown service from apiV1Service).
rss.NewRSSService(s.Profile, s.Store, apiV1Service.MarkdownService).RegisterRoutes(rootGroup)
// Link preview helper route (REST).
apiV1Service.RegisterLinkRoutes(rootGroup)
// Register gRPC gateway as api v1.
if err := apiV1Service.RegisterGateway(ctx, echoServer); err != nil {
return nil, errors.Wrap(err, "failed to register gRPC gateway")