1 #ifndef CAFFE2_VIDEO_VIDEO_DECODER_H_ 2 #define CAFFE2_VIDEO_VIDEO_DECODER_H_ 4 #include <caffe2/core/logging.h> 11 #include <libavformat/avformat.h> 12 #include <libavformat/avio.h> 17 #define VIO_BUFFER_SZ 32768 18 #define MAX_DECODING_FRAMES 10000 26 SAMPLE_ALL_FRAMES = -1,
27 SAMPLE_TIMESTAMP_ONLY = -2,
39 USE_MINIMAL_WIDTH_HEIGHT = 1,
63 SampleInterval() : timestamp(-1), fps(SpecialFps::SAMPLE_ALL_FRAMES) {}
66 return (timestamp < itvl.timestamp);
73 bool keyFrames_ =
false;
76 AVPixelFormat pixelFormat_ = AVPixelFormat::AV_PIX_FMT_RGB24;
80 int streamIndex_ = -1;
84 int maximumOutputFrames_ = -1;
87 int video_res_type_ = VideoResType::USE_WIDTH_HEIGHT;
90 int crop_height_ = -1;
102 int decode_type_ = DecodeType::DO_TMP_JITTER;
103 int num_of_required_frame_ = -1;
109 std::vector<SampleInterval> intervals_ = {{0, SpecialFps::SAMPLE_ALL_FRAMES}};
120 intervals_.emplace_back(0, v);
133 for (
auto& timestamp : timestamps) {
134 intervals_.emplace_back(timestamp, SpecialFps::SAMPLE_TIMESTAMP_ONLY);
143 pixelFormat_ = pixelFormat;
151 keyFrames_ = keyFrames;
159 streamIndex_ = index;
167 maximumOutputFrames_ = count;
192 void operator()(
unsigned char* p)
const {
196 using AvDataPtr = std::unique_ptr<uint8_t, avDeleter>;
209 double timestamp_ = 0;
212 bool keyFrame_ =
false;
218 int outputFrameIndex_ = -1;
224 : workBuffersize_(VIO_BUFFER_SZ),
225 workBuffer_((uint8_t*)av_malloc(workBuffersize_)),
227 inputBuffer_(
nullptr),
228 inputBufferSize_(0) {
229 inputFile_ = fopen(fname.c_str(),
"rb");
230 if (inputFile_ ==
nullptr) {
231 LOG(ERROR) <<
"Error opening video file " << fname;
233 ctx_ = avio_alloc_context(
234 static_cast<unsigned char*>(workBuffer_.get()),
238 &VideoIOContext::readFile,
240 &VideoIOContext::seekFile);
244 : workBuffersize_(VIO_BUFFER_SZ),
245 workBuffer_((uint8_t*)av_malloc(workBuffersize_)),
247 inputBuffer_(buffer),
248 inputBufferSize_(size) {
249 ctx_ = avio_alloc_context(
250 static_cast<unsigned char*>(workBuffer_.get()),
254 &VideoIOContext::readMemory,
256 &VideoIOContext::seekMemory);
266 int read(
unsigned char* buf,
int buf_size) {
268 return readMemory(
this, buf, buf_size);
269 }
else if (inputFile_) {
270 return readFile(
this, buf, buf_size);
276 int64_t seek(int64_t offset,
int whence) {
278 return seekMemory(
this, offset, whence);
279 }
else if (inputFile_) {
280 return seekFile(
this, offset, whence);
286 static int readFile(
void* opaque,
unsigned char* buf,
int buf_size) {
288 if (feof(h->inputFile_)) {
291 size_t ret = fread(buf, 1, buf_size, h->inputFile_);
292 if (ret < buf_size) {
293 if (ferror(h->inputFile_)) {
300 static int64_t seekFile(
void* opaque, int64_t offset,
int whence) {
306 return fseek(h->inputFile_, static_cast<long>(offset), whence);
309 int64_t cur = ftell(h->inputFile_);
310 fseek(h->inputFile_, 0L, SEEK_END);
311 int64_t size = ftell(h->inputFile_);
312 fseek(h->inputFile_, cur, SEEK_SET);
319 static int readMemory(
void* opaque,
unsigned char* buf,
int buf_size) {
325 int reminder = h->inputBufferSize_ - h->offset_;
326 int r = buf_size < reminder ? buf_size : reminder;
331 memcpy(buf, h->inputBuffer_ + h->offset_, r);
336 static int64_t seekMemory(
void* opaque, int64_t offset,
int whence) {
340 h->offset_ += offset;
343 h->offset_ = h->inputBufferSize_ + offset;
349 return h->inputBufferSize_;
354 AVIOContext* get_avio() {
360 DecodedFrame::AvDataPtr workBuffer_;
365 const char* inputBuffer_;
366 int inputBufferSize_;
376 enum AVMediaType codec_type;
377 AVPixelFormat pixFormat;
382 codec_type(AVMEDIA_TYPE_VIDEO),
383 pixFormat(AVPixelFormat::AV_PIX_FMT_RGB24) {}
391 const std::string& filename,
394 std::vector<std::unique_ptr<DecodedFrame>>& sampledFrames);
401 std::vector<std::unique_ptr<DecodedFrame>>& sampledFrames);
404 std::string ffmpegErrorStr(
int result);
406 void ResizeAndKeepAspectRatio(
407 const int origHeight,
415 const std::string& videoName,
419 std::vector<std::unique_ptr<DecodedFrame>>& sampledFrames);
423 #endif // CAFFE2_VIDEO_VIDEO_DECODER_H_ Params & outputHeight(int height)
Output frame height, default to video height.
Params & keyFrames(bool keyFrames)
Return all key-frames.
Params & outputWidth(int width)
Output frame width, default to video width.
Params & streamIndex(int index)
Index of video stream to process, defaults to the first video stream.
Params & setSampleTimestamps(const std::vector< double > ×tamps)
Sample output frames at a specified list of timestamps Timestamps must be in increasing order...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Params & fps(float v)
FPS of output frames setting here will reset intervals_ and force decoding at target FPS This can be ...
Params & pixelFormat(AVPixelFormat pixelFormat)
Pixel format of output buffer, default PIX_FMT_RGB24.
Params & maxOutputFrames(int count)
Only output this many frames, default to no limit.