mirror of https://github.com/usememos/memos.git
feat(api): add link preview endpoint with metadata fallback
- add REST GET /api/v1/link:preview to return title/description/image - resolve relative/protocol-relative image URLs; fallback to first <img> when og:image missing - add first-image scraper and guard HTML meta fetch against 4xx/5xx responses - register route in server startup; add LinkService proto stub for future gateway/grpc use (not yet wired)
This commit is contained in:
parent
65a19df4be
commit
bc4ae02bcd
|
|
@ -0,0 +1,46 @@
|
||||||
|
package httpgetter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/pkg/errors"
|
||||||
|
"golang.org/x/net/html"
|
||||||
|
"golang.org/x/net/html/atom"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GetFirstImageURL returns the first <img src> found on the page, or empty string.
|
||||||
|
func GetFirstImageURL(urlStr string) (string, error) {
|
||||||
|
if err := validateURL(urlStr); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := httpClient.Get(urlStr)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode >= 400 {
|
||||||
|
return "", errors.Errorf("failed to fetch page: status %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenizer := html.NewTokenizer(resp.Body)
|
||||||
|
for {
|
||||||
|
tt := tokenizer.Next()
|
||||||
|
if tt == html.ErrorToken {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if tt == html.StartTagToken || tt == html.SelfClosingTagToken {
|
||||||
|
token := tokenizer.Token()
|
||||||
|
if token.DataAtom == atom.Img {
|
||||||
|
for _, attr := range token.Attr {
|
||||||
|
if strings.EqualFold(attr.Key, "src") && attr.Val != "" {
|
||||||
|
return attr.Val, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
@ -43,6 +43,10 @@ func GetHTMLMeta(urlStr string) (*HTMLMeta, error) {
|
||||||
}
|
}
|
||||||
defer response.Body.Close()
|
defer response.Body.Close()
|
||||||
|
|
||||||
|
if response.StatusCode >= 400 {
|
||||||
|
return nil, errors.Errorf("failed to fetch page: status %d", response.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
mediatype, err := getMediatype(response)
|
mediatype, err := getMediatype(response)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,61 @@
|
||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
package memos.api.v1;
|
||||||
|
|
||||||
|
import "google/api/annotations.proto";
|
||||||
|
import "google/api/client.proto";
|
||||||
|
import "google/api/field_behavior.proto";
|
||||||
|
import "google/api/resource.proto";
|
||||||
|
|
||||||
|
option go_package = "gen/api/v1";
|
||||||
|
|
||||||
|
service LinkService {
|
||||||
|
// GetLinkPreview fetches preview metadata for a URL (title, description, image).
|
||||||
|
rpc GetLinkPreview(GetLinkPreviewRequest) returns (GetLinkPreviewResponse) {
|
||||||
|
option (google.api.http) = {
|
||||||
|
get: "/api/v1/link:preview"
|
||||||
|
};
|
||||||
|
option (google.api.method_signature) = "url";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
message LinkPreview {
|
||||||
|
option (google.api.resource) = {
|
||||||
|
type: "memos.api.v1/LinkPreview"
|
||||||
|
pattern: "linkPreviews/{link_preview}"
|
||||||
|
name_field: "name"
|
||||||
|
singular: "linkPreview"
|
||||||
|
plural: "linkPreviews"
|
||||||
|
};
|
||||||
|
|
||||||
|
// Resource name of the preview (server generated).
|
||||||
|
// Format: linkPreviews/{link_preview}
|
||||||
|
string name = 1 [
|
||||||
|
(google.api.field_behavior) = OUTPUT_ONLY,
|
||||||
|
(google.api.field_behavior) = IDENTIFIER
|
||||||
|
];
|
||||||
|
|
||||||
|
// The original URL that was fetched.
|
||||||
|
string url = 2 [(google.api.field_behavior) = REQUIRED];
|
||||||
|
|
||||||
|
// Extracted title of the page.
|
||||||
|
string title = 3;
|
||||||
|
|
||||||
|
// Extracted description of the page.
|
||||||
|
string description = 4;
|
||||||
|
|
||||||
|
// Resolved image URL for preview.
|
||||||
|
string image_url = 5;
|
||||||
|
|
||||||
|
// Human readable site/host name.
|
||||||
|
string site_name = 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
message GetLinkPreviewRequest {
|
||||||
|
// URL to fetch metadata from.
|
||||||
|
string url = 1 [(google.api.field_behavior) = REQUIRED];
|
||||||
|
}
|
||||||
|
|
||||||
|
message GetLinkPreviewResponse {
|
||||||
|
LinkPreview preview = 1 [(google.api.field_behavior) = REQUIRED];
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,117 @@
|
||||||
|
package v1
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"path"
|
||||||
|
|
||||||
|
"github.com/labstack/echo/v4"
|
||||||
|
|
||||||
|
"github.com/usememos/memos/plugin/httpgetter"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RegisterLinkRoutes registers lightweight HTTP routes for link previews.
|
||||||
|
// We keep this as a REST handler (not gRPC) to avoid schema churn
|
||||||
|
// and to reuse existing safety checks in the httpgetter plugin.
|
||||||
|
func (s *APIV1Service) RegisterLinkRoutes(g *echo.Group) {
|
||||||
|
g.GET("/api/v1/link:preview", s.handleGetLinkPreview)
|
||||||
|
}
|
||||||
|
|
||||||
|
type linkPreviewResponse struct {
|
||||||
|
Preview linkPreview `json:"preview"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type linkPreview struct {
|
||||||
|
Title string `json:"title"`
|
||||||
|
Description string `json:"description"`
|
||||||
|
ImageURL string `json:"imageUrl"`
|
||||||
|
SiteName string `json:"siteName"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *APIV1Service) handleGetLinkPreview(c echo.Context) error {
|
||||||
|
_ = s
|
||||||
|
rawURL := c.QueryParam("url")
|
||||||
|
if rawURL == "" {
|
||||||
|
return echo.NewHTTPError(http.StatusBadRequest, "url is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
meta, err := httpgetter.GetHTMLMeta(rawURL)
|
||||||
|
if err != nil {
|
||||||
|
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
parsedURL, _ := url.Parse(rawURL)
|
||||||
|
siteName := ""
|
||||||
|
if parsedURL != nil {
|
||||||
|
siteName = parsedURL.Hostname()
|
||||||
|
}
|
||||||
|
|
||||||
|
imageURL := meta.Image
|
||||||
|
if parsedURL != nil && imageURL != "" {
|
||||||
|
if u, err := url.Parse(imageURL); err == nil {
|
||||||
|
if !u.IsAbs() {
|
||||||
|
// handle protocol-relative
|
||||||
|
if u.Host != "" {
|
||||||
|
u.Scheme = parsedURL.Scheme
|
||||||
|
imageURL = u.String()
|
||||||
|
} else {
|
||||||
|
// relative path -> join with base
|
||||||
|
u.Scheme = parsedURL.Scheme
|
||||||
|
u.Host = parsedURL.Host
|
||||||
|
if !path.IsAbs(u.Path) {
|
||||||
|
u.Path = path.Join(parsedURL.Path, "..", u.Path)
|
||||||
|
}
|
||||||
|
imageURL = u.String()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If meta image missing, try first <img> on page.
|
||||||
|
if imageURL == "" {
|
||||||
|
if firstImg, err := httpgetter.GetFirstImageURL(rawURL); err == nil && firstImg != "" {
|
||||||
|
if parsedURL != nil {
|
||||||
|
imageURL = toAbsoluteFromBase(parsedURL, firstImg)
|
||||||
|
} else {
|
||||||
|
imageURL = firstImg
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := linkPreviewResponse{
|
||||||
|
Preview: linkPreview{
|
||||||
|
Title: meta.Title,
|
||||||
|
Description: meta.Description,
|
||||||
|
ImageURL: imageURL,
|
||||||
|
SiteName: siteName,
|
||||||
|
URL: rawURL,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return c.JSON(http.StatusOK, resp)
|
||||||
|
}
|
||||||
|
|
||||||
|
func toAbsoluteFromBase(base *url.URL, raw string) string {
|
||||||
|
if raw == "" || base == nil {
|
||||||
|
return raw
|
||||||
|
}
|
||||||
|
u, err := url.Parse(raw)
|
||||||
|
if err != nil {
|
||||||
|
return raw
|
||||||
|
}
|
||||||
|
if u.IsAbs() {
|
||||||
|
return u.String()
|
||||||
|
}
|
||||||
|
// Protocol-relative //host/path
|
||||||
|
if u.Host != "" && u.Scheme == "" {
|
||||||
|
u.Scheme = base.Scheme
|
||||||
|
return u.String()
|
||||||
|
}
|
||||||
|
// Pure relative path
|
||||||
|
u.Scheme = base.Scheme
|
||||||
|
u.Host = base.Host
|
||||||
|
if !path.IsAbs(u.Path) {
|
||||||
|
u.Path = path.Join(path.Dir(base.Path), u.Path)
|
||||||
|
}
|
||||||
|
return u.String()
|
||||||
|
}
|
||||||
|
|
@ -95,6 +95,8 @@ func NewServer(ctx context.Context, profile *profile.Profile, store *store.Store
|
||||||
|
|
||||||
// Create and register RSS routes (needs markdown service from apiV1Service).
|
// Create and register RSS routes (needs markdown service from apiV1Service).
|
||||||
rss.NewRSSService(s.Profile, s.Store, apiV1Service.MarkdownService).RegisterRoutes(rootGroup)
|
rss.NewRSSService(s.Profile, s.Store, apiV1Service.MarkdownService).RegisterRoutes(rootGroup)
|
||||||
|
// Link preview helper route (REST).
|
||||||
|
apiV1Service.RegisterLinkRoutes(rootGroup)
|
||||||
// Register gRPC gateway as api v1.
|
// Register gRPC gateway as api v1.
|
||||||
if err := apiV1Service.RegisterGateway(ctx, echoServer); err != nil {
|
if err := apiV1Service.RegisterGateway(ctx, echoServer); err != nil {
|
||||||
return nil, errors.Wrap(err, "failed to register gRPC gateway")
|
return nil, errors.Wrap(err, "failed to register gRPC gateway")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue