Merge 573f2cf58e into b91d7dfe5b
This commit is contained in:
commit
bb941c5269
|
|
@ -3615,9 +3615,17 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|||
|
||||
// set input pixel values
|
||||
if (!imgs.is_audio) {
|
||||
// detect number of channels from the buffer size
|
||||
const int nx = imgs.entries[0]->nx;
|
||||
const int ny = imgs.entries[0]->ny;
|
||||
const int n = nx * ny;
|
||||
const size_t buf_size = imgs.entries[0]->buf.size();
|
||||
const int n_channels = (int)(buf_size / n);
|
||||
GGML_ASSERT(n_channels == 3 || n_channels == 6);
|
||||
|
||||
size_t nelem = 0;
|
||||
for (const auto & img : imgs.entries) {
|
||||
nelem += img->nx * img->ny * 3;
|
||||
nelem += img->nx * img->ny * n_channels;
|
||||
}
|
||||
std::vector<float> inp_raw(nelem);
|
||||
|
||||
|
|
@ -3631,21 +3639,21 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|||
// │ H │ channel = B
|
||||
// └─────┘ │
|
||||
// ──────┘ x B
|
||||
//
|
||||
// for 6-channel video input, same layout but with 6 planar channels
|
||||
|
||||
for (size_t i = 0; i < imgs.entries.size(); i++) {
|
||||
const int nx = imgs.entries[i]->nx;
|
||||
const int ny = imgs.entries[i]->ny;
|
||||
const int n = nx * ny;
|
||||
for (int b = 0; b < batch_size; b++) {
|
||||
const int cur_nx = imgs.entries[b]->nx;
|
||||
const int cur_ny = imgs.entries[b]->ny;
|
||||
const int cur_n = cur_nx * cur_ny;
|
||||
|
||||
for (int b = 0; b < batch_size; b++) {
|
||||
float * batch_entry = inp_raw.data() + b * (3*n);
|
||||
for (int y = 0; y < ny; y++) {
|
||||
for (int x = 0; x < nx; x++) {
|
||||
size_t base_src = 3*(y * nx + x); // idx of the first channel
|
||||
size_t base_dst = y * nx + x; // idx of the first channel
|
||||
batch_entry[ base_dst] = imgs.entries[b]->buf[base_src ];
|
||||
batch_entry[1*n + base_dst] = imgs.entries[b]->buf[base_src + 1];
|
||||
batch_entry[2*n + base_dst] = imgs.entries[b]->buf[base_src + 2];
|
||||
float * batch_entry = inp_raw.data() + b * (n_channels * cur_n);
|
||||
for (int y = 0; y < cur_ny; y++) {
|
||||
for (int x = 0; x < cur_nx; x++) {
|
||||
size_t base_src = n_channels * (y * cur_nx + x);
|
||||
size_t base_dst = y * cur_nx + x;
|
||||
for (int c = 0; c < n_channels; c++) {
|
||||
batch_entry[c * cur_n + base_dst] = imgs.entries[b]->buf[base_src + c];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,16 +13,34 @@ ggml_cgraph * clip_graph_qwen3vl::build() {
|
|||
|
||||
int mrope_sections[4] = {d_head/4, d_head/4, d_head/4, d_head/4};
|
||||
|
||||
ggml_tensor * inp_raw = build_inp_raw();
|
||||
ggml_tensor * inp = ggml_conv_2d(ctx0, model.patch_embeddings_0, inp_raw, patch_size, patch_size, 0, 0, 1, 1);
|
||||
// detect video: 6-channel input means interleaved frame pairs (even_rgb + odd_rgb)
|
||||
// for images (3ch), both Conv2Ds receive the same input (original behavior)
|
||||
// for video (6ch), Conv2D_0 gets even frames (ch 0-2), Conv2D_1 gets odd frames (ch 3-5)
|
||||
const bool is_video = (img.buf.size() == (size_t)img.nx * img.ny * 6);
|
||||
const int n_channels = is_video ? 6 : 3;
|
||||
|
||||
ggml_tensor * inp_raw = build_inp_raw(n_channels);
|
||||
|
||||
ggml_tensor * inp;
|
||||
if (is_video) {
|
||||
const size_t nb1 = ggml_row_size(inp_raw->type, img.nx);
|
||||
const size_t nb2 = nb1 * img.ny;
|
||||
ggml_tensor * inp_even = ggml_view_3d(ctx0, inp_raw, img.nx, img.ny, 3, nb1, nb2, 0);
|
||||
ggml_tensor * inp_odd = ggml_view_3d(ctx0, inp_raw, img.nx, img.ny, 3, nb1, nb2, nb2 * 3);
|
||||
inp = ggml_add(ctx0,
|
||||
ggml_conv_2d(ctx0, model.patch_embeddings_0, inp_even, patch_size, patch_size, 0, 0, 1, 1),
|
||||
ggml_conv_2d(ctx0, model.patch_embeddings_1, inp_odd, patch_size, patch_size, 0, 0, 1, 1));
|
||||
} else {
|
||||
inp = ggml_add(ctx0,
|
||||
ggml_conv_2d(ctx0, model.patch_embeddings_0, inp_raw, patch_size, patch_size, 0, 0, 1, 1),
|
||||
ggml_conv_2d(ctx0, model.patch_embeddings_1, inp_raw, patch_size, patch_size, 0, 0, 1, 1));
|
||||
}
|
||||
|
||||
GGML_ASSERT(img.nx % (patch_size * 2) == 0);
|
||||
GGML_ASSERT(img.ny % (patch_size * 2) == 0);
|
||||
|
||||
// second conv dimension
|
||||
// spatial merge
|
||||
{
|
||||
auto inp_1 = ggml_conv_2d(ctx0, model.patch_embeddings_1, inp_raw, patch_size, patch_size, 0, 0, 1, 1);
|
||||
inp = ggml_add(ctx0, inp, inp_1);
|
||||
|
||||
inp = ggml_permute(ctx0, inp, 1, 2, 0, 3); // [w, h, c, b] -> [c, w, h, b]
|
||||
inp = ggml_cont_4d(
|
||||
|
|
|
|||
|
|
@ -174,6 +174,28 @@ struct decode_embd_batch {
|
|||
}
|
||||
}
|
||||
|
||||
// M-RoPE for video: 3D positions [temporal, height, width]
|
||||
void set_position_mrope_3d(llama_pos pos_0, int nx, int ny, int nt, llama_seq_id seq_id) {
|
||||
GGML_ASSERT(n_pos_per_embd == 4);
|
||||
seq_id_0[0] = seq_id;
|
||||
for (int t = 0; t < nt; t++) {
|
||||
for (int y = 0; y < ny; y++) {
|
||||
for (int x = 0; x < nx; x++) {
|
||||
int i = t * ny * nx + y * nx + x;
|
||||
pos[i ] = pos_0 + t;
|
||||
pos[i + batch.n_tokens ] = pos_0 + y;
|
||||
pos[i + batch.n_tokens * 2] = pos_0 + x;
|
||||
pos[i + batch.n_tokens * 3] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < batch.n_tokens; i++) {
|
||||
batch.n_seq_id[i] = 1;
|
||||
batch.seq_id [i] = seq_id_0.data();
|
||||
batch.logits [i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
// M-RoPE for audio
|
||||
void set_position_mrope_1d(llama_pos pos_0, llama_seq_id seq_id) {
|
||||
GGML_ASSERT(n_pos_per_embd == 4);
|
||||
|
|
@ -260,7 +282,12 @@ int32_t mtmd_helper_decode_image_chunk(
|
|||
}
|
||||
const int nx = mtmd_image_tokens_get_nx(image_tokens);
|
||||
const int ny = mtmd_image_tokens_get_ny(image_tokens);
|
||||
batch_embd.set_position_mrope_2d(n_past, nx, ny, seq_id);
|
||||
const int nt = mtmd_image_tokens_get_nt(image_tokens);
|
||||
if (nt > 1) {
|
||||
batch_embd.set_position_mrope_3d(n_past, nx, ny, nt, seq_id);
|
||||
} else {
|
||||
batch_embd.set_position_mrope_2d(n_past, nx, ny, seq_id);
|
||||
}
|
||||
} else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
|
||||
batch_embd.set_position_mrope_1d(n_past, seq_id);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -24,9 +24,11 @@
|
|||
|
||||
// represents raw image data, layout is RGBRGBRGB...
|
||||
// length of data must be nx * ny * 3
|
||||
// for video: data is n_frames sequential RGB frames, each nx * ny * 3 bytes
|
||||
struct mtmd_bitmap {
|
||||
uint32_t nx;
|
||||
uint32_t ny;
|
||||
uint32_t n_frames = 0; // 0 for single images, >= 2 (even) for video
|
||||
std::vector<unsigned char> data;
|
||||
std::string id; // optional user-defined id, for ex: can be set to image hash, useful for KV cache tracking
|
||||
bool is_audio = false; // true if the bitmap is audio
|
||||
|
|
@ -35,8 +37,9 @@ struct mtmd_bitmap {
|
|||
struct mtmd_image_tokens {
|
||||
uint32_t nx; // number of tokens in x direction
|
||||
uint32_t ny; // number of tokens in y direction
|
||||
uint32_t nt = 1; // number of temporal positions (1 for images, > 1 for video)
|
||||
bool use_mrope_pos = false; // use M-RoPE position counting (the whole image is 1 temporal position)
|
||||
uint32_t n_tokens() const { return nx * ny; }
|
||||
uint32_t n_tokens() const { return nt * nx * ny; }
|
||||
clip_image_f32_batch batch_f32; // preprocessed image patches
|
||||
std::string id; // optional user-defined ID, useful for KV cache tracking
|
||||
|
||||
|
|
@ -44,6 +47,7 @@ struct mtmd_image_tokens {
|
|||
return mtmd_image_tokens{
|
||||
nx,
|
||||
ny,
|
||||
nt,
|
||||
use_mrope_pos,
|
||||
batch_f32.clone(),
|
||||
id
|
||||
|
|
@ -553,6 +557,10 @@ struct mtmd_tokenizer {
|
|||
}
|
||||
|
||||
int32_t add_media(const mtmd_bitmap * bitmap) {
|
||||
if (bitmap->n_frames >= 2) {
|
||||
return add_video(bitmap);
|
||||
}
|
||||
|
||||
if (!bitmap->is_audio) {
|
||||
// handle image
|
||||
|
||||
|
|
@ -743,6 +751,102 @@ struct mtmd_tokenizer {
|
|||
return 0;
|
||||
}
|
||||
|
||||
// preprocess video frames and create an image chunk with temporal dimension
|
||||
// frames are paired (even+odd), each pair becomes one 6-channel image
|
||||
// each pair is encoded independently through the ViT (per-frame attention)
|
||||
int32_t add_video(const mtmd_bitmap * bitmap) {
|
||||
if (!ctx->ctx_v) {
|
||||
LOG_ERR("%s: error: model does not support vision input\n", __func__);
|
||||
return 2;
|
||||
}
|
||||
|
||||
const uint32_t n_frames = bitmap->n_frames;
|
||||
const uint32_t n_pairs = n_frames / 2;
|
||||
const size_t frame_bytes = (size_t)bitmap->nx * bitmap->ny * 3;
|
||||
|
||||
if (!ctx->img_beg.empty()) {
|
||||
add_text(ctx->img_beg, true);
|
||||
}
|
||||
|
||||
// preprocess each frame individually
|
||||
clip_image_f32_batch all_frames;
|
||||
for (uint32_t f = 0; f < n_frames; f++) {
|
||||
clip_image_u8_ptr img_u8(clip_image_u8_init());
|
||||
img_u8->nx = bitmap->nx;
|
||||
img_u8->ny = bitmap->ny;
|
||||
img_u8->buf.resize(frame_bytes);
|
||||
std::memcpy(img_u8->buf.data(), bitmap->data.data() + f * frame_bytes, frame_bytes);
|
||||
|
||||
clip_image_f32_batch frame_batch;
|
||||
bool ok = clip_image_preprocess(ctx->ctx_v, img_u8.get(), &frame_batch);
|
||||
if (!ok) {
|
||||
LOG_ERR("Unable to preprocess video frame %u\n", f);
|
||||
return 2;
|
||||
}
|
||||
GGML_ASSERT(frame_batch.entries.size() == 1);
|
||||
all_frames.entries.push_back(std::move(frame_batch.entries[0]));
|
||||
}
|
||||
|
||||
const int frame_nx = all_frames.entries[0]->nx;
|
||||
const int frame_ny = all_frames.entries[0]->ny;
|
||||
const int n_pixels = frame_nx * frame_ny;
|
||||
|
||||
// interleave frame pairs into 6-channel images (even_rgb + odd_rgb)
|
||||
// each pair is a separate batch entry, encoded independently
|
||||
clip_image_f32_batch pair_batch;
|
||||
for (uint32_t p = 0; p < n_pairs; p++) {
|
||||
const auto & even = all_frames.entries[p * 2];
|
||||
const auto & odd = all_frames.entries[p * 2 + 1];
|
||||
GGML_ASSERT(even->nx == frame_nx && even->ny == frame_ny);
|
||||
GGML_ASSERT(odd->nx == frame_nx && odd->ny == frame_ny);
|
||||
|
||||
clip_image_f32_ptr pair(clip_image_f32_init());
|
||||
pair->nx = frame_nx;
|
||||
pair->ny = frame_ny;
|
||||
pair->buf.resize((size_t)n_pixels * 6);
|
||||
|
||||
for (int i = 0; i < n_pixels; i++) {
|
||||
const int dst = i * 6;
|
||||
const int src = i * 3;
|
||||
pair->buf[dst + 0] = even->buf[src + 0];
|
||||
pair->buf[dst + 1] = even->buf[src + 1];
|
||||
pair->buf[dst + 2] = even->buf[src + 2];
|
||||
pair->buf[dst + 3] = odd->buf[src + 0];
|
||||
pair->buf[dst + 4] = odd->buf[src + 1];
|
||||
pair->buf[dst + 5] = odd->buf[src + 2];
|
||||
}
|
||||
pair_batch.entries.push_back(std::move(pair));
|
||||
}
|
||||
|
||||
const uint32_t tokens_x = clip_n_output_tokens_x(ctx->ctx_v, pair_batch.entries[0].get());
|
||||
const uint32_t tokens_y = clip_n_output_tokens_y(ctx->ctx_v, pair_batch.entries[0].get());
|
||||
|
||||
mtmd_image_tokens_ptr image_tokens(new mtmd_image_tokens);
|
||||
image_tokens->nx = tokens_x;
|
||||
image_tokens->ny = tokens_y;
|
||||
image_tokens->nt = n_pairs;
|
||||
image_tokens->use_mrope_pos = true;
|
||||
image_tokens->batch_f32 = std::move(pair_batch);
|
||||
image_tokens->id = bitmap->id;
|
||||
|
||||
LOG_DBG("video: nt=%u, nx=%u, ny=%u, n_tokens=%u\n",
|
||||
image_tokens->nt, image_tokens->nx, image_tokens->ny, image_tokens->n_tokens());
|
||||
|
||||
mtmd_input_chunk chunk{
|
||||
MTMD_INPUT_CHUNK_TYPE_IMAGE,
|
||||
{}, // text tokens
|
||||
std::move(image_tokens),
|
||||
nullptr, // audio tokens
|
||||
};
|
||||
cur.entries.emplace_back(std::move(chunk));
|
||||
|
||||
if (!ctx->img_end.empty()) {
|
||||
add_text(ctx->img_end, true);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::vector<mtmd_input_chunk> split_batch_to_chunk(clip_image_f32_batch && batch_f32, const std::string & id) {
|
||||
std::vector<mtmd_input_chunk> chunks;
|
||||
|
||||
|
|
@ -855,10 +959,13 @@ int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens)
|
|||
ctx->image_embd_v.resize(image_tokens->n_tokens() * n_mmproj_embd);
|
||||
bool ok = false;
|
||||
|
||||
if (clip_is_llava(ctx_clip)
|
||||
if (image_tokens->nt > 1
|
||||
|| clip_is_llava(ctx_clip)
|
||||
|| clip_is_minicpmv(ctx_clip)
|
||||
|| clip_is_glm(ctx_clip)) {
|
||||
// TODO @ngxson : llava does not support batched encoding ; this should be fixed inside clip_image_batch_encode()
|
||||
// encode each batch entry independently
|
||||
// video: each entry is one frame pair, encoded with per-frame attention
|
||||
// llava/minicpmv/glm: does not support batched encoding
|
||||
const auto & entries = image_tokens->batch_f32.entries;
|
||||
for (size_t i = 0; i < entries.size(); i++) {
|
||||
int n_tokens_per_image = clip_n_output_tokens(ctx_clip, entries[i].get());
|
||||
|
|
@ -938,6 +1045,21 @@ mtmd_bitmap * mtmd_bitmap_init(uint32_t nx,
|
|||
return bitmap;
|
||||
}
|
||||
|
||||
mtmd_bitmap * mtmd_bitmap_init_from_video(uint32_t nx,
|
||||
uint32_t ny,
|
||||
uint32_t n_frames,
|
||||
const unsigned char * data) {
|
||||
GGML_ASSERT(n_frames >= 2 && n_frames % 2 == 0);
|
||||
mtmd_bitmap * bitmap = new mtmd_bitmap;
|
||||
bitmap->nx = nx;
|
||||
bitmap->ny = ny;
|
||||
bitmap->n_frames = n_frames;
|
||||
size_t data_size = (size_t)nx * ny * 3 * n_frames;
|
||||
bitmap->data.resize(data_size);
|
||||
std::memcpy(bitmap->data.data(), data, data_size);
|
||||
return bitmap;
|
||||
}
|
||||
|
||||
mtmd_bitmap * mtmd_bitmap_init_from_audio(size_t n_samples,
|
||||
const float * data) {
|
||||
mtmd_bitmap * bitmap = new mtmd_bitmap;
|
||||
|
|
@ -970,6 +1092,14 @@ bool mtmd_bitmap_is_audio(const mtmd_bitmap * bitmap) {
|
|||
return bitmap->is_audio;
|
||||
}
|
||||
|
||||
bool mtmd_bitmap_is_video(const mtmd_bitmap * bitmap) {
|
||||
return bitmap->n_frames >= 2;
|
||||
}
|
||||
|
||||
uint32_t mtmd_bitmap_get_n_frames(const mtmd_bitmap * bitmap) {
|
||||
return bitmap->n_frames;
|
||||
}
|
||||
|
||||
const char * mtmd_bitmap_get_id(const mtmd_bitmap * bitmap) {
|
||||
return bitmap->id.c_str();
|
||||
}
|
||||
|
|
@ -1106,15 +1236,18 @@ size_t mtmd_image_tokens_get_ny(const mtmd_image_tokens * image_tokens) {
|
|||
return image_tokens->ny;
|
||||
}
|
||||
|
||||
size_t mtmd_image_tokens_get_nt(const mtmd_image_tokens * image_tokens) {
|
||||
return image_tokens->nt;
|
||||
}
|
||||
|
||||
const char * mtmd_image_tokens_get_id(const mtmd_image_tokens * image_tokens) {
|
||||
return image_tokens->id.c_str();
|
||||
}
|
||||
|
||||
llama_pos mtmd_image_tokens_get_n_pos(const mtmd_image_tokens * image_tokens) {
|
||||
if (image_tokens->use_mrope_pos) {
|
||||
// for M-RoPE, temporal dimension = max(t,h,w)
|
||||
// t is omitted as we don't support video input
|
||||
return std::max(image_tokens->nx, image_tokens->ny);
|
||||
// for M-RoPE, n_pos = max(t, h, w)
|
||||
return (llama_pos)std::max({image_tokens->nt, image_tokens->nx, image_tokens->ny});
|
||||
}
|
||||
return image_tokens->n_tokens();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -134,17 +134,24 @@ MTMD_API int mtmd_get_audio_sample_rate(mtmd_context * ctx);
|
|||
// if bitmap is image:
|
||||
// length of data must be nx * ny * 3
|
||||
// the data is in RGBRGBRGB... format
|
||||
// if bitmap is video:
|
||||
// length of data must be nx * ny * 3 * n_frames
|
||||
// n_frames must be >= 2 and even
|
||||
// frames are sequential RGB, each nx * ny * 3 bytes
|
||||
// if bitmap is audio:
|
||||
// length of data must be n_samples * sizeof(float)
|
||||
// the data is in float format (PCM F32)
|
||||
MTMD_API mtmd_bitmap * mtmd_bitmap_init (uint32_t nx, uint32_t ny, const unsigned char * data);
|
||||
MTMD_API mtmd_bitmap * mtmd_bitmap_init_from_video(uint32_t nx, uint32_t ny, uint32_t n_frames, const unsigned char * data);
|
||||
MTMD_API mtmd_bitmap * mtmd_bitmap_init_from_audio(size_t n_samples, const float * data);
|
||||
MTMD_API uint32_t mtmd_bitmap_get_nx (const mtmd_bitmap * bitmap);
|
||||
MTMD_API uint32_t mtmd_bitmap_get_ny (const mtmd_bitmap * bitmap);
|
||||
MTMD_API const unsigned char * mtmd_bitmap_get_data (const mtmd_bitmap * bitmap);
|
||||
MTMD_API size_t mtmd_bitmap_get_n_bytes(const mtmd_bitmap * bitmap);
|
||||
MTMD_API bool mtmd_bitmap_is_audio (const mtmd_bitmap * bitmap);
|
||||
MTMD_API void mtmd_bitmap_free (mtmd_bitmap * bitmap);
|
||||
MTMD_API uint32_t mtmd_bitmap_get_nx (const mtmd_bitmap * bitmap);
|
||||
MTMD_API uint32_t mtmd_bitmap_get_ny (const mtmd_bitmap * bitmap);
|
||||
MTMD_API const unsigned char * mtmd_bitmap_get_data (const mtmd_bitmap * bitmap);
|
||||
MTMD_API size_t mtmd_bitmap_get_n_bytes (const mtmd_bitmap * bitmap);
|
||||
MTMD_API bool mtmd_bitmap_is_audio (const mtmd_bitmap * bitmap);
|
||||
MTMD_API bool mtmd_bitmap_is_video (const mtmd_bitmap * bitmap);
|
||||
MTMD_API uint32_t mtmd_bitmap_get_n_frames(const mtmd_bitmap * bitmap);
|
||||
MTMD_API void mtmd_bitmap_free (mtmd_bitmap * bitmap);
|
||||
// bitmap ID is optional, but useful for KV cache tracking
|
||||
// these getters/setters are dedicated functions, so you can for example calculate the hash of the image based on mtmd_bitmap_get_data()
|
||||
MTMD_API const char * mtmd_bitmap_get_id(const mtmd_bitmap * bitmap);
|
||||
|
|
@ -187,6 +194,7 @@ MTMD_API void mtmd_input_chunk_free(mtmd_input_chunk * chunk);
|
|||
MTMD_API size_t mtmd_image_tokens_get_n_tokens(const mtmd_image_tokens * image_tokens); // TODO: deprecate
|
||||
MTMD_API size_t mtmd_image_tokens_get_nx (const mtmd_image_tokens * image_tokens);
|
||||
MTMD_API size_t mtmd_image_tokens_get_ny (const mtmd_image_tokens * image_tokens);
|
||||
MTMD_API size_t mtmd_image_tokens_get_nt (const mtmd_image_tokens * image_tokens);
|
||||
MTMD_API const char * mtmd_image_tokens_get_id (const mtmd_image_tokens * image_tokens); // TODO: deprecate
|
||||
// number of temporal positions (equals to max(t,h,w) for M-RoPE; equals to n_tokens otherwise)
|
||||
MTMD_API llama_pos mtmd_image_tokens_get_n_pos (const mtmd_image_tokens * image_tokens); // TODO: deprecate
|
||||
|
|
@ -276,9 +284,14 @@ struct bitmap {
|
|||
bitmap(uint32_t nx, uint32_t ny, const unsigned char * data) {
|
||||
ptr.reset(mtmd_bitmap_init(nx, ny, data));
|
||||
}
|
||||
bitmap(uint32_t nx, uint32_t ny, uint32_t n_frames, const unsigned char * data) {
|
||||
ptr.reset(mtmd_bitmap_init_from_video(nx, ny, n_frames, data));
|
||||
}
|
||||
~bitmap() = default;
|
||||
uint32_t nx() const { return mtmd_bitmap_get_nx(ptr.get()); }
|
||||
uint32_t ny() const { return mtmd_bitmap_get_ny(ptr.get()); }
|
||||
uint32_t nx() const { return mtmd_bitmap_get_nx(ptr.get()); }
|
||||
uint32_t ny() const { return mtmd_bitmap_get_ny(ptr.get()); }
|
||||
uint32_t n_frames() const { return mtmd_bitmap_get_n_frames(ptr.get()); }
|
||||
bool is_video() const { return mtmd_bitmap_is_video(ptr.get()); }
|
||||
const unsigned char * data() const { return mtmd_bitmap_get_data(ptr.get()); }
|
||||
size_t n_bytes() const { return mtmd_bitmap_get_n_bytes(ptr.get()); }
|
||||
std::string id() const { return mtmd_bitmap_get_id(ptr.get()); }
|
||||
|
|
|
|||
Loading…
Reference in New Issue