mirror of https://github.com/usememos/memos.git
feat(api): add link preview endpoint with metadata fallback
- add REST GET /api/v1/link:preview to return title/description/image - resolve relative/protocol-relative image URLs; fallback to first <img> when og:image missing - add first-image scraper and guard HTML meta fetch against 4xx/5xx responses - register route in server startup; add LinkService proto stub for future gateway/grpc use (not yet wired)
This commit is contained in:
parent
65a19df4be
commit
bc4ae02bcd
|
|
@ -0,0 +1,46 @@
|
|||
package httpgetter
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
)
|
||||
|
||||
// GetFirstImageURL returns the first <img src> found on the page, or empty string.
|
||||
func GetFirstImageURL(urlStr string) (string, error) {
|
||||
if err := validateURL(urlStr); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
resp, err := httpClient.Get(urlStr)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
return "", errors.Errorf("failed to fetch page: status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
tokenizer := html.NewTokenizer(resp.Body)
|
||||
for {
|
||||
tt := tokenizer.Next()
|
||||
if tt == html.ErrorToken {
|
||||
break
|
||||
}
|
||||
if tt == html.StartTagToken || tt == html.SelfClosingTagToken {
|
||||
token := tokenizer.Token()
|
||||
if token.DataAtom == atom.Img {
|
||||
for _, attr := range token.Attr {
|
||||
if strings.EqualFold(attr.Key, "src") && attr.Val != "" {
|
||||
return attr.Val, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "", nil
|
||||
}
|
||||
|
|
@ -43,6 +43,10 @@ func GetHTMLMeta(urlStr string) (*HTMLMeta, error) {
|
|||
}
|
||||
defer response.Body.Close()
|
||||
|
||||
if response.StatusCode >= 400 {
|
||||
return nil, errors.Errorf("failed to fetch page: status %d", response.StatusCode)
|
||||
}
|
||||
|
||||
mediatype, err := getMediatype(response)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
|
|||
|
|
@ -0,0 +1,61 @@
|
|||
syntax = "proto3";
|
||||
|
||||
package memos.api.v1;
|
||||
|
||||
import "google/api/annotations.proto";
|
||||
import "google/api/client.proto";
|
||||
import "google/api/field_behavior.proto";
|
||||
import "google/api/resource.proto";
|
||||
|
||||
option go_package = "gen/api/v1";
|
||||
|
||||
service LinkService {
|
||||
// GetLinkPreview fetches preview metadata for a URL (title, description, image).
|
||||
rpc GetLinkPreview(GetLinkPreviewRequest) returns (GetLinkPreviewResponse) {
|
||||
option (google.api.http) = {
|
||||
get: "/api/v1/link:preview"
|
||||
};
|
||||
option (google.api.method_signature) = "url";
|
||||
}
|
||||
}
|
||||
|
||||
message LinkPreview {
|
||||
option (google.api.resource) = {
|
||||
type: "memos.api.v1/LinkPreview"
|
||||
pattern: "linkPreviews/{link_preview}"
|
||||
name_field: "name"
|
||||
singular: "linkPreview"
|
||||
plural: "linkPreviews"
|
||||
};
|
||||
|
||||
// Resource name of the preview (server generated).
|
||||
// Format: linkPreviews/{link_preview}
|
||||
string name = 1 [
|
||||
(google.api.field_behavior) = OUTPUT_ONLY,
|
||||
(google.api.field_behavior) = IDENTIFIER
|
||||
];
|
||||
|
||||
// The original URL that was fetched.
|
||||
string url = 2 [(google.api.field_behavior) = REQUIRED];
|
||||
|
||||
// Extracted title of the page.
|
||||
string title = 3;
|
||||
|
||||
// Extracted description of the page.
|
||||
string description = 4;
|
||||
|
||||
// Resolved image URL for preview.
|
||||
string image_url = 5;
|
||||
|
||||
// Human readable site/host name.
|
||||
string site_name = 6;
|
||||
}
|
||||
|
||||
message GetLinkPreviewRequest {
|
||||
// URL to fetch metadata from.
|
||||
string url = 1 [(google.api.field_behavior) = REQUIRED];
|
||||
}
|
||||
|
||||
message GetLinkPreviewResponse {
|
||||
LinkPreview preview = 1 [(google.api.field_behavior) = REQUIRED];
|
||||
}
|
||||
|
|
@ -0,0 +1,117 @@
|
|||
package v1
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
|
||||
"github.com/usememos/memos/plugin/httpgetter"
|
||||
)
|
||||
|
||||
// RegisterLinkRoutes registers lightweight HTTP routes for link previews.
|
||||
// We keep this as a REST handler (not gRPC) to avoid schema churn
|
||||
// and to reuse existing safety checks in the httpgetter plugin.
|
||||
func (s *APIV1Service) RegisterLinkRoutes(g *echo.Group) {
|
||||
g.GET("/api/v1/link:preview", s.handleGetLinkPreview)
|
||||
}
|
||||
|
||||
type linkPreviewResponse struct {
|
||||
Preview linkPreview `json:"preview"`
|
||||
}
|
||||
|
||||
type linkPreview struct {
|
||||
Title string `json:"title"`
|
||||
Description string `json:"description"`
|
||||
ImageURL string `json:"imageUrl"`
|
||||
SiteName string `json:"siteName"`
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
func (s *APIV1Service) handleGetLinkPreview(c echo.Context) error {
|
||||
_ = s
|
||||
rawURL := c.QueryParam("url")
|
||||
if rawURL == "" {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, "url is required")
|
||||
}
|
||||
|
||||
meta, err := httpgetter.GetHTMLMeta(rawURL)
|
||||
if err != nil {
|
||||
return echo.NewHTTPError(http.StatusBadRequest, err.Error())
|
||||
}
|
||||
|
||||
parsedURL, _ := url.Parse(rawURL)
|
||||
siteName := ""
|
||||
if parsedURL != nil {
|
||||
siteName = parsedURL.Hostname()
|
||||
}
|
||||
|
||||
imageURL := meta.Image
|
||||
if parsedURL != nil && imageURL != "" {
|
||||
if u, err := url.Parse(imageURL); err == nil {
|
||||
if !u.IsAbs() {
|
||||
// handle protocol-relative
|
||||
if u.Host != "" {
|
||||
u.Scheme = parsedURL.Scheme
|
||||
imageURL = u.String()
|
||||
} else {
|
||||
// relative path -> join with base
|
||||
u.Scheme = parsedURL.Scheme
|
||||
u.Host = parsedURL.Host
|
||||
if !path.IsAbs(u.Path) {
|
||||
u.Path = path.Join(parsedURL.Path, "..", u.Path)
|
||||
}
|
||||
imageURL = u.String()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If meta image missing, try first <img> on page.
|
||||
if imageURL == "" {
|
||||
if firstImg, err := httpgetter.GetFirstImageURL(rawURL); err == nil && firstImg != "" {
|
||||
if parsedURL != nil {
|
||||
imageURL = toAbsoluteFromBase(parsedURL, firstImg)
|
||||
} else {
|
||||
imageURL = firstImg
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resp := linkPreviewResponse{
|
||||
Preview: linkPreview{
|
||||
Title: meta.Title,
|
||||
Description: meta.Description,
|
||||
ImageURL: imageURL,
|
||||
SiteName: siteName,
|
||||
URL: rawURL,
|
||||
},
|
||||
}
|
||||
return c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
func toAbsoluteFromBase(base *url.URL, raw string) string {
|
||||
if raw == "" || base == nil {
|
||||
return raw
|
||||
}
|
||||
u, err := url.Parse(raw)
|
||||
if err != nil {
|
||||
return raw
|
||||
}
|
||||
if u.IsAbs() {
|
||||
return u.String()
|
||||
}
|
||||
// Protocol-relative //host/path
|
||||
if u.Host != "" && u.Scheme == "" {
|
||||
u.Scheme = base.Scheme
|
||||
return u.String()
|
||||
}
|
||||
// Pure relative path
|
||||
u.Scheme = base.Scheme
|
||||
u.Host = base.Host
|
||||
if !path.IsAbs(u.Path) {
|
||||
u.Path = path.Join(path.Dir(base.Path), u.Path)
|
||||
}
|
||||
return u.String()
|
||||
}
|
||||
|
|
@ -95,6 +95,8 @@ func NewServer(ctx context.Context, profile *profile.Profile, store *store.Store
|
|||
|
||||
// Create and register RSS routes (needs markdown service from apiV1Service).
|
||||
rss.NewRSSService(s.Profile, s.Store, apiV1Service.MarkdownService).RegisterRoutes(rootGroup)
|
||||
// Link preview helper route (REST).
|
||||
apiV1Service.RegisterLinkRoutes(rootGroup)
|
||||
// Register gRPC gateway as api v1.
|
||||
if err := apiV1Service.RegisterGateway(ctx, echoServer); err != nil {
|
||||
return nil, errors.Wrap(err, "failed to register gRPC gateway")
|
||||
|
|
|
|||
Loading…
Reference in New Issue