feat: implement EXIF metadata stripping for image uploads

This commit is contained in:
Johnny 2026-01-26 22:18:44 +08:00
parent 2f7c8dcea7
commit a7b0d71f6e
2 changed files with 271 additions and 0 deletions

View File

@ -0,0 +1,191 @@
package v1
import (
"bytes"
"image"
"image/color"
"image/jpeg"
"testing"
"github.com/disintegration/imaging"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestShouldStripExif(t *testing.T) {
t.Parallel()
tests := []struct {
name string
mimeType string
expected bool
}{
{
name: "JPEG should strip EXIF",
mimeType: "image/jpeg",
expected: true,
},
{
name: "JPG should strip EXIF",
mimeType: "image/jpg",
expected: true,
},
{
name: "TIFF should strip EXIF",
mimeType: "image/tiff",
expected: true,
},
{
name: "WebP should strip EXIF",
mimeType: "image/webp",
expected: true,
},
{
name: "HEIC should strip EXIF",
mimeType: "image/heic",
expected: true,
},
{
name: "HEIF should strip EXIF",
mimeType: "image/heif",
expected: true,
},
{
name: "PNG should not strip EXIF",
mimeType: "image/png",
expected: false,
},
{
name: "GIF should not strip EXIF",
mimeType: "image/gif",
expected: false,
},
{
name: "text file should not strip EXIF",
mimeType: "text/plain",
expected: false,
},
{
name: "PDF should not strip EXIF",
mimeType: "application/pdf",
expected: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
result := shouldStripExif(tt.mimeType)
assert.Equal(t, tt.expected, result)
})
}
}
func TestStripImageExif(t *testing.T) {
t.Parallel()
// Create a simple test image
img := image.NewRGBA(image.Rect(0, 0, 100, 100))
// Fill with red color
for y := 0; y < 100; y++ {
for x := 0; x < 100; x++ {
img.Set(x, y, color.RGBA{R: 255, G: 0, B: 0, A: 255})
}
}
// Encode as JPEG
var buf bytes.Buffer
err := jpeg.Encode(&buf, img, &jpeg.Options{Quality: 90})
require.NoError(t, err)
originalData := buf.Bytes()
t.Run("strip JPEG metadata", func(t *testing.T) {
t.Parallel()
strippedData, err := stripImageExif(originalData, "image/jpeg")
require.NoError(t, err)
assert.NotEmpty(t, strippedData)
// Verify it's still a valid image
decodedImg, err := imaging.Decode(bytes.NewReader(strippedData))
require.NoError(t, err)
assert.Equal(t, 100, decodedImg.Bounds().Dx())
assert.Equal(t, 100, decodedImg.Bounds().Dy())
})
t.Run("strip JPG metadata (alternate extension)", func(t *testing.T) {
t.Parallel()
strippedData, err := stripImageExif(originalData, "image/jpg")
require.NoError(t, err)
assert.NotEmpty(t, strippedData)
// Verify it's still a valid image
decodedImg, err := imaging.Decode(bytes.NewReader(strippedData))
require.NoError(t, err)
assert.NotNil(t, decodedImg)
})
t.Run("strip PNG metadata", func(t *testing.T) {
t.Parallel()
// Encode as PNG first
var pngBuf bytes.Buffer
err := imaging.Encode(&pngBuf, img, imaging.PNG)
require.NoError(t, err)
strippedData, err := stripImageExif(pngBuf.Bytes(), "image/png")
require.NoError(t, err)
assert.NotEmpty(t, strippedData)
// Verify it's still a valid image
decodedImg, err := imaging.Decode(bytes.NewReader(strippedData))
require.NoError(t, err)
assert.Equal(t, 100, decodedImg.Bounds().Dx())
assert.Equal(t, 100, decodedImg.Bounds().Dy())
})
t.Run("handle WebP format by converting to JPEG", func(t *testing.T) {
t.Parallel()
// WebP format will be converted to JPEG
strippedData, err := stripImageExif(originalData, "image/webp")
require.NoError(t, err)
assert.NotEmpty(t, strippedData)
// Verify it's a valid image
decodedImg, err := imaging.Decode(bytes.NewReader(strippedData))
require.NoError(t, err)
assert.NotNil(t, decodedImg)
})
t.Run("handle HEIC format by converting to JPEG", func(t *testing.T) {
t.Parallel()
strippedData, err := stripImageExif(originalData, "image/heic")
require.NoError(t, err)
assert.NotEmpty(t, strippedData)
// Verify it's a valid image
decodedImg, err := imaging.Decode(bytes.NewReader(strippedData))
require.NoError(t, err)
assert.NotNil(t, decodedImg)
})
t.Run("return error for invalid image data", func(t *testing.T) {
t.Parallel()
invalidData := []byte("not an image")
_, err := stripImageExif(invalidData, "image/jpeg")
assert.Error(t, err)
assert.Contains(t, err.Error(), "failed to decode image")
})
t.Run("return error for empty image data", func(t *testing.T) {
t.Parallel()
emptyData := []byte{}
_, err := stripImageExif(emptyData, "image/jpeg")
assert.Error(t, err)
})
}

View File

@ -6,6 +6,7 @@ import (
"encoding/binary"
"fmt"
"io"
"log/slog"
"mime"
"net/http"
"os"
@ -14,6 +15,7 @@ import (
"strings"
"time"
"github.com/disintegration/imaging"
"github.com/lithammer/shortuuid/v4"
"github.com/pkg/errors"
"google.golang.org/grpc/codes"
@ -38,6 +40,10 @@ const (
MebiByte = 1024 * 1024
// ThumbnailCacheFolder is the folder name where the thumbnail images are stored.
ThumbnailCacheFolder = ".thumbnail_cache"
// defaultJPEGQuality is the JPEG quality used when re-encoding images for EXIF stripping.
// Quality 95 maintains visual quality while ensuring metadata is removed.
defaultJPEGQuality = 95
)
var SupportedThumbnailMimeTypes = []string{
@ -45,6 +51,17 @@ var SupportedThumbnailMimeTypes = []string{
"image/jpeg",
}
// exifCapableImageTypes defines image formats that may contain EXIF metadata.
// These formats will have their EXIF metadata stripped on upload for privacy.
var exifCapableImageTypes = map[string]bool{
"image/jpeg": true,
"image/jpg": true,
"image/tiff": true,
"image/webp": true,
"image/heic": true,
"image/heif": true,
}
func (s *APIV1Service) CreateAttachment(ctx context.Context, request *v1pb.CreateAttachmentRequest) (*v1pb.Attachment, error) {
user, err := s.fetchCurrentUser(ctx)
if err != nil {
@ -111,6 +128,21 @@ func (s *APIV1Service) CreateAttachment(ctx context.Context, request *v1pb.Creat
create.Size = int64(size)
create.Blob = request.Attachment.Content
// Strip EXIF metadata from images for privacy protection.
// This removes sensitive information like GPS location, device details, etc.
if shouldStripExif(create.Type) {
if strippedBlob, err := stripImageExif(create.Blob, create.Type); err != nil {
// Log warning but continue with original image to ensure uploads don't fail.
slog.Warn("failed to strip EXIF metadata from image",
slog.String("type", create.Type),
slog.String("filename", create.Filename),
slog.String("error", err.Error()))
} else {
create.Blob = strippedBlob
create.Size = int64(len(strippedBlob))
}
}
if err := SaveAttachmentBlob(ctx, s.Profile, s.Store, create); err != nil {
return nil, status.Errorf(codes.Internal, "failed to save attachment blob: %v", err)
}
@ -516,3 +548,51 @@ func (s *APIV1Service) validateAttachmentFilter(ctx context.Context, filterStr s
}
return nil
}
// shouldStripExif checks if the MIME type is an image format that may contain EXIF metadata.
// Returns true for formats like JPEG, TIFF, WebP, HEIC, and HEIF which commonly contain
// privacy-sensitive metadata such as GPS coordinates, camera settings, and device information.
func shouldStripExif(mimeType string) bool {
return exifCapableImageTypes[mimeType]
}
// stripImageExif removes EXIF metadata from image files by decoding and re-encoding them.
// This prevents exposure of sensitive metadata such as GPS location, camera details, and timestamps.
//
// The function preserves the correct image orientation by applying EXIF orientation tags
// during decoding before stripping all metadata. Images are re-encoded with high quality
// to minimize visual degradation.
//
// Supported formats:
// - JPEG/JPG: Re-encoded as JPEG with quality 95
// - PNG: Re-encoded as PNG (lossless)
// - TIFF/WebP/HEIC/HEIF: Re-encoded as JPEG with quality 95
//
// Returns the cleaned image data without any EXIF metadata, or an error if processing fails.
func stripImageExif(imageData []byte, mimeType string) ([]byte, error) {
// Decode image with automatic EXIF orientation correction.
// This ensures the image displays correctly after metadata removal.
img, err := imaging.Decode(bytes.NewReader(imageData), imaging.AutoOrientation(true))
if err != nil {
return nil, errors.Wrap(err, "failed to decode image")
}
// Re-encode the image without EXIF metadata.
var buf bytes.Buffer
var encodeErr error
if mimeType == "image/png" {
// Preserve PNG format for lossless encoding
encodeErr = imaging.Encode(&buf, img, imaging.PNG)
} else {
// For JPEG, TIFF, WebP, HEIC, HEIF - re-encode as JPEG.
// This ensures EXIF is stripped and provides good compression.
encodeErr = imaging.Encode(&buf, img, imaging.JPEG, imaging.JPEGQuality(defaultJPEGQuality))
}
if encodeErr != nil {
return nil, errors.Wrap(encodeErr, "failed to encode image")
}
return buf.Bytes(), nil
}