27 #define ENABLE_VAAPI 0 30 #define MAX_SUPPORTED_WIDTH 1950 31 #define MAX_SUPPORTED_HEIGHT 1100 34 #include "libavutil/hwcontext_vaapi.h" 36 typedef struct VAAPIDecodeContext {
38 VAEntrypoint va_entrypoint;
40 VAContextID va_context;
42 #if FF_API_STRUCT_VAAPI_CONTEXT 45 struct vaapi_context *old_context;
46 AVBufferRef *device_ref;
50 AVHWDeviceContext *device;
51 AVVAAPIDeviceContext *hwctx;
53 AVHWFramesContext *frames;
54 AVVAAPIFramesContext *hwfc;
56 enum AVPixelFormat surface_format;
59 #endif // ENABLE_VAAPI 60 #endif // USE_HW_ACCEL 71 FFmpegReader::FFmpegReader(
const std::string &
path,
bool inspect_reader)
72 : last_frame(0), is_seeking(0), seeking_pts(0), seeking_frame(0), seek_count(0), NO_PTS_OFFSET(-99999),
73 path(path), is_video_seek(true), check_interlace(false), check_fps(false), enable_seek(true), is_open(false),
74 seek_audio_frame_found(0), seek_video_frame_found(0),is_duration_known(false), largest_frame_processed(0),
76 video_pts(0), pFormatCtx(NULL), videoStream(-1), audioStream(-1), pCodecCtx(NULL), aCodecCtx(NULL),
77 pStream(NULL), aStream(NULL), pFrame(NULL), previous_packet_location{-1,0},
85 pts_offset_seconds = NO_PTS_OFFSET;
86 video_pts_seconds = NO_PTS_OFFSET;
87 audio_pts_seconds = NO_PTS_OFFSET;
109 if (abs(location.
frame - frame) >= 2)
115 int64_t diff = samples_per_frame * (location.
frame - frame) + location.
sample_start - sample_start;
116 if (abs(diff) <= amount)
127 static enum AVPixelFormat get_hw_dec_format(AVCodecContext *ctx,
const enum AVPixelFormat *pix_fmts)
129 const enum AVPixelFormat *p;
131 for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
133 #if defined(__linux__) 135 case AV_PIX_FMT_VAAPI:
140 case AV_PIX_FMT_VDPAU:
148 case AV_PIX_FMT_DXVA2_VLD:
153 case AV_PIX_FMT_D3D11:
159 #if defined(__APPLE__) 161 case AV_PIX_FMT_VIDEOTOOLBOX:
168 case AV_PIX_FMT_CUDA:
184 return AV_PIX_FMT_NONE;
187 int FFmpegReader::IsHardwareDecodeSupported(
int codecid)
191 case AV_CODEC_ID_H264:
192 case AV_CODEC_ID_MPEG2VIDEO:
193 case AV_CODEC_ID_VC1:
194 case AV_CODEC_ID_WMV1:
195 case AV_CODEC_ID_WMV2:
196 case AV_CODEC_ID_WMV3:
205 #endif // USE_HW_ACCEL 211 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
221 if (avformat_open_input(&pFormatCtx,
path.c_str(), NULL, NULL) != 0)
225 if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
232 packet_status.
reset(
true);
235 for (
unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
237 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_VIDEO && videoStream < 0) {
244 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_AUDIO && audioStream < 0) {
251 if (videoStream == -1 && audioStream == -1)
255 if (videoStream != -1) {
260 pStream = pFormatCtx->streams[videoStream];
266 const AVCodec *pCodec = avcodec_find_decoder(codecId);
267 AVDictionary *
opts = NULL;
268 int retry_decode_open = 2;
273 if (
hw_de_on && (retry_decode_open==2)) {
275 hw_de_supported = IsHardwareDecodeSupported(pCodecCtx->codec_id);
278 retry_decode_open = 0;
283 if (pCodec == NULL) {
284 throw InvalidCodec(
"A valid video codec could not be found for this file.",
path);
288 av_dict_set(&opts,
"strict",
"experimental", 0);
292 int i_decoder_hw = 0;
294 char *adapter_ptr = NULL;
297 fprintf(stderr,
"Hardware decoding device number: %d\n", adapter_num);
300 pCodecCtx->get_format = get_hw_dec_format;
302 if (adapter_num < 3 && adapter_num >=0) {
303 #if defined(__linux__) 304 snprintf(adapter,
sizeof(adapter),
"/dev/dri/renderD%d", adapter_num+128);
305 adapter_ptr = adapter;
307 switch (i_decoder_hw) {
309 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
312 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
315 hw_de_av_device_type = AV_HWDEVICE_TYPE_VDPAU;
318 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
321 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
325 #elif defined(_WIN32) 328 switch (i_decoder_hw) {
330 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
333 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
336 hw_de_av_device_type = AV_HWDEVICE_TYPE_D3D11VA;
339 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
342 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
345 #elif defined(__APPLE__) 348 switch (i_decoder_hw) {
350 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
353 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
356 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
366 #if defined(__linux__) 367 if( adapter_ptr != NULL && access( adapter_ptr, W_OK ) == 0 ) {
368 #elif defined(_WIN32) 369 if( adapter_ptr != NULL ) {
370 #elif defined(__APPLE__) 371 if( adapter_ptr != NULL ) {
380 hw_device_ctx = NULL;
382 if (av_hwdevice_ctx_create(&hw_device_ctx, hw_de_av_device_type, adapter_ptr, NULL, 0) >= 0) {
383 if (!(pCodecCtx->hw_device_ctx = av_buffer_ref(hw_device_ctx))) {
418 #endif // USE_HW_ACCEL 425 pCodecCtx->thread_type &= ~FF_THREAD_FRAME;
429 int avcodec_return = avcodec_open2(pCodecCtx, pCodec, &opts);
430 if (avcodec_return < 0) {
431 std::stringstream avcodec_error_msg;
432 avcodec_error_msg <<
"A video codec was found, but could not be opened. Error: " << av_err2string(avcodec_return);
438 AVHWFramesConstraints *constraints = NULL;
439 void *hwconfig = NULL;
440 hwconfig = av_hwdevice_hwconfig_alloc(hw_device_ctx);
444 ((AVVAAPIHWConfig *)hwconfig)->config_id = ((VAAPIDecodeContext *)(pCodecCtx->priv_data))->va_config;
445 constraints = av_hwdevice_get_hwframe_constraints(hw_device_ctx,hwconfig);
446 #endif // ENABLE_VAAPI 448 if (pCodecCtx->coded_width < constraints->min_width ||
449 pCodecCtx->coded_height < constraints->min_height ||
450 pCodecCtx->coded_width > constraints->max_width ||
451 pCodecCtx->coded_height > constraints->max_height) {
454 retry_decode_open = 1;
457 av_buffer_unref(&hw_device_ctx);
458 hw_device_ctx = NULL;
463 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Min width :", constraints->min_width,
"Min Height :", constraints->min_height,
"MaxWidth :", constraints->max_width,
"MaxHeight :", constraints->max_height,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
464 retry_decode_open = 0;
466 av_hwframe_constraints_free(&constraints);
479 if (pCodecCtx->coded_width < 0 ||
480 pCodecCtx->coded_height < 0 ||
481 pCodecCtx->coded_width > max_w ||
482 pCodecCtx->coded_height > max_h ) {
483 ZmqLogger::Instance()->
AppendDebugMethod(
"DIMENSIONS ARE TOO LARGE for hardware acceleration\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
485 retry_decode_open = 1;
488 av_buffer_unref(&hw_device_ctx);
489 hw_device_ctx = NULL;
493 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
494 retry_decode_open = 0;
502 retry_decode_open = 0;
503 #endif // USE_HW_ACCEL 504 }
while (retry_decode_open);
513 if (audioStream != -1) {
518 aStream = pFormatCtx->streams[audioStream];
524 const AVCodec *aCodec = avcodec_find_decoder(codecId);
530 if (aCodec == NULL) {
531 throw InvalidCodec(
"A valid audio codec could not be found for this file.",
path);
535 AVDictionary *opts = NULL;
536 av_dict_set(&opts,
"strict",
"experimental", 0);
539 if (avcodec_open2(aCodecCtx, aCodec, &opts) < 0)
540 throw InvalidCodec(
"An audio codec was found, but could not be opened.",
path);
550 AVDictionaryEntry *tag = NULL;
551 while ((tag = av_dict_get(pFormatCtx->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
552 QString str_key = tag->key;
553 QString str_value = tag->value;
554 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
558 previous_packet_location.
frame = -1;
590 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
596 AVPacket *recent_packet = packet;
601 int max_attempts = 128;
606 "attempts", attempts);
618 RemoveAVPacket(recent_packet);
623 if(avcodec_is_open(pCodecCtx)) {
624 avcodec_flush_buffers(pCodecCtx);
630 av_buffer_unref(&hw_device_ctx);
631 hw_device_ctx = NULL;
634 #endif // USE_HW_ACCEL 639 if(avcodec_is_open(aCodecCtx)) {
640 avcodec_flush_buffers(aCodecCtx);
647 working_cache.
Clear();
650 avformat_close_input(&pFormatCtx);
651 av_freep(&pFormatCtx);
656 largest_frame_processed = 0;
657 seek_audio_frame_found = 0;
658 seek_video_frame_found = 0;
659 current_video_frame = 0;
660 last_video_frame.reset();
664 bool FFmpegReader::HasAlbumArt() {
668 return pFormatCtx && videoStream >= 0 && pFormatCtx->streams[videoStream]
669 && (pFormatCtx->streams[videoStream]->disposition & AV_DISPOSITION_ATTACHED_PIC);
672 void FFmpegReader::UpdateAudioInfo() {
675 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
693 if (aStream->duration > 0 && aStream->duration >
info.
duration) {
696 }
else if (pFormatCtx->duration > 0 &&
info.
duration <= 0.0f) {
698 info.
duration = float(pFormatCtx->duration) / AV_TIME_BASE;
731 AVDictionaryEntry *tag = NULL;
732 while ((tag = av_dict_get(aStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
733 QString str_key = tag->key;
734 QString str_value = tag->value;
735 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
739 void FFmpegReader::UpdateVideoInfo() {
742 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
749 AVRational framerate = av_guess_frame_rate(pFormatCtx, pStream, NULL);
761 if (pStream->sample_aspect_ratio.num != 0) {
784 if (!check_interlace) {
785 check_interlace =
true;
787 switch(field_order) {
788 case AV_FIELD_PROGRESSIVE:
801 case AV_FIELD_UNKNOWN:
803 check_interlace =
false;
818 if (
info.
duration <= 0.0f && pFormatCtx->duration >= 0) {
820 info.
duration = float(pFormatCtx->duration) / AV_TIME_BASE;
830 if (
info.
duration <= 0.0f && pStream->duration == AV_NOPTS_VALUE && pFormatCtx->duration == AV_NOPTS_VALUE) {
848 is_duration_known =
false;
851 is_duration_known =
true;
861 AVDictionaryEntry *tag = NULL;
862 while ((tag = av_dict_get(pStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
863 QString str_key = tag->key;
864 QString str_value = tag->value;
865 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
870 return this->is_duration_known;
876 throw ReaderClosed(
"The FFmpegReader is closed. Call Open() before calling this method.",
path);
879 if (requested_frame < 1)
885 throw InvalidFile(
"Could not detect the duration of the video or audio stream.",
path);
901 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
915 int64_t diff = requested_frame - last_frame;
916 if (diff >= 1 && diff <= 20) {
918 frame = ReadStream(requested_frame);
923 Seek(requested_frame);
932 frame = ReadStream(requested_frame);
940 std::shared_ptr<Frame> FFmpegReader::ReadStream(int64_t requested_frame) {
942 bool check_seek =
false;
943 int packet_error = -1;
953 CheckWorkingFrames(requested_frame);
958 if (is_cache_found) {
962 if (!hold_packet || !packet) {
964 packet_error = GetNextPacket();
965 if (packet_error < 0 && !packet) {
976 check_seek = CheckSeek(
false);
988 if ((
info.
has_video && packet && packet->stream_index == videoStream) ||
992 ProcessVideoPacket(requested_frame);
995 if ((
info.
has_audio && packet && packet->stream_index == audioStream) ||
999 ProcessAudioPacket(requested_frame);
1004 if ((!
info.
has_video && packet && packet->stream_index == videoStream) ||
1005 (!
info.
has_audio && packet && packet->stream_index == audioStream)) {
1007 if (packet->stream_index == videoStream) {
1009 }
else if (packet->stream_index == audioStream) {
1015 RemoveAVPacket(packet);
1025 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ReadStream (force EOF)",
"packets_read", packet_status.
packets_read(),
"packets_decoded", packet_status.
packets_decoded(),
"packets_eof", packet_status.
packets_eof,
"video_eof", packet_status.
video_eof,
"audio_eof", packet_status.
audio_eof,
"end_of_file", packet_status.
end_of_file);
1042 "largest_frame_processed", largest_frame_processed,
1043 "Working Cache Count", working_cache.
Count());
1052 CheckWorkingFrames(requested_frame);
1068 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1071 if (!frame->has_image_data) {
1076 frame->AddAudioSilence(samples_in_frame);
1081 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1083 f->AddAudioSilence(samples_in_frame);
1091 int FFmpegReader::GetNextPacket() {
1092 int found_packet = 0;
1093 AVPacket *next_packet;
1094 next_packet =
new AVPacket();
1095 found_packet = av_read_frame(pFormatCtx, next_packet);
1099 RemoveAVPacket(packet);
1102 if (found_packet >= 0) {
1104 packet = next_packet;
1107 if (packet->stream_index == videoStream) {
1109 }
else if (packet->stream_index == audioStream) {
1118 return found_packet;
1122 bool FFmpegReader::GetAVFrame() {
1123 int frameFinished = 0;
1129 int send_packet_err = 0;
1130 int64_t send_packet_pts = 0;
1131 if ((packet && packet->stream_index == videoStream && !hold_packet) || !packet) {
1132 send_packet_err = avcodec_send_packet(pCodecCtx, packet);
1134 if (packet && send_packet_err >= 0) {
1135 send_packet_pts = GetPacketPTS();
1136 hold_packet =
false;
1145 #endif // USE_HW_ACCEL 1146 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1147 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: Not sent [" + av_err2string(send_packet_err) +
"])",
"send_packet_err", send_packet_err,
"send_packet_pts", send_packet_pts);
1148 if (send_packet_err == AVERROR(EAGAIN)) {
1150 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EAGAIN): user must read output with avcodec_receive_frame()",
"send_packet_pts", send_packet_pts);
1152 if (send_packet_err == AVERROR(EINVAL)) {
1153 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EINVAL): codec not opened, it is an encoder, or requires flush",
"send_packet_pts", send_packet_pts);
1155 if (send_packet_err == AVERROR(ENOMEM)) {
1156 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(ENOMEM): failed to add packet to internal queue, or legitimate decoding errors",
"send_packet_pts", send_packet_pts);
1163 int receive_frame_err = 0;
1164 AVFrame *next_frame2;
1170 #endif // USE_HW_ACCEL 1172 next_frame2 = next_frame;
1175 while (receive_frame_err >= 0) {
1176 receive_frame_err = avcodec_receive_frame(pCodecCtx, next_frame2);
1178 if (receive_frame_err != 0) {
1179 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (receive frame: frame not ready yet from decoder [\" + av_err2string(receive_frame_err) + \"])",
"receive_frame_err", receive_frame_err,
"send_packet_pts", send_packet_pts);
1181 if (receive_frame_err == AVERROR_EOF) {
1183 "FFmpegReader::GetAVFrame (receive frame: AVERROR_EOF: EOF detected from decoder, flushing buffers)",
"send_packet_pts", send_packet_pts);
1184 avcodec_flush_buffers(pCodecCtx);
1187 if (receive_frame_err == AVERROR(EINVAL)) {
1189 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EINVAL): invalid frame received, flushing buffers)",
"send_packet_pts", send_packet_pts);
1190 avcodec_flush_buffers(pCodecCtx);
1192 if (receive_frame_err == AVERROR(EAGAIN)) {
1194 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EAGAIN): output is not available in this state - user must try to send new input)",
"send_packet_pts", send_packet_pts);
1196 if (receive_frame_err == AVERROR_INPUT_CHANGED) {
1198 "FFmpegReader::GetAVFrame (receive frame: AVERROR_INPUT_CHANGED: current decoded frame has changed parameters with respect to first decoded frame)",
"send_packet_pts", send_packet_pts);
1209 if (next_frame2->format == hw_de_av_pix_fmt) {
1210 next_frame->format = AV_PIX_FMT_YUV420P;
1211 if ((err = av_hwframe_transfer_data(next_frame,next_frame2,0)) < 0) {
1214 if ((err = av_frame_copy_props(next_frame,next_frame2)) < 0) {
1220 #endif // USE_HW_ACCEL 1222 next_frame = next_frame2;
1230 av_image_alloc(pFrame->data, pFrame->linesize,
info.
width,
info.
height, (AVPixelFormat)(pStream->codecpar->format), 1);
1231 av_image_copy(pFrame->data, pFrame->linesize, (
const uint8_t**)next_frame->data, next_frame->linesize,
1238 if (next_frame->pts != AV_NOPTS_VALUE) {
1241 video_pts = next_frame->pts;
1242 }
else if (next_frame->pkt_dts != AV_NOPTS_VALUE) {
1244 video_pts = next_frame->pkt_dts;
1248 "FFmpegReader::GetAVFrame (Successful frame received)",
"video_pts", video_pts,
"send_packet_pts", send_packet_pts);
1257 #endif // USE_HW_ACCEL 1259 avcodec_decode_video2(pCodecCtx, next_frame, &frameFinished, packet);
1265 if (frameFinished) {
1269 av_picture_copy((AVPicture *) pFrame, (AVPicture *) next_frame, pCodecCtx->pix_fmt,
info.
width,
1272 #endif // IS_FFMPEG_3_2 1278 return frameFinished;
1282 bool FFmpegReader::CheckSeek(
bool is_video) {
1287 if ((is_video_seek && !seek_video_frame_found) || (!is_video_seek && !seek_audio_frame_found))
1295 int64_t max_seeked_frame = std::max(seek_audio_frame_found, seek_video_frame_found);
1298 if (max_seeked_frame >= seeking_frame) {
1301 "is_video_seek", is_video_seek,
1302 "max_seeked_frame", max_seeked_frame,
1303 "seeking_frame", seeking_frame,
1304 "seeking_pts", seeking_pts,
1305 "seek_video_frame_found", seek_video_frame_found,
1306 "seek_audio_frame_found", seek_audio_frame_found);
1309 Seek(seeking_frame - (10 * seek_count * seek_count));
1313 "is_video_seek", is_video_seek,
1314 "packet->pts", GetPacketPTS(),
1315 "seeking_pts", seeking_pts,
1316 "seeking_frame", seeking_frame,
1317 "seek_video_frame_found", seek_video_frame_found,
1318 "seek_audio_frame_found", seek_audio_frame_found);
1332 void FFmpegReader::ProcessVideoPacket(int64_t requested_frame) {
1335 int frame_finished = GetAVFrame();
1338 if (!frame_finished) {
1344 int64_t current_frame = ConvertVideoPTStoFrame(video_pts);
1347 if (!seek_video_frame_found && is_seeking)
1348 seek_video_frame_found = current_frame;
1354 working_cache.
Add(CreateFrame(requested_frame));
1364 AVFrame *my_frame = pFrame;
1368 AVFrame *pFrameRGB =
nullptr;
1369 uint8_t *buffer =
nullptr;
1373 if (pFrameRGB ==
nullptr)
1395 max_width = std::max(
float(max_width), max_width * max_scale_x);
1396 max_height = std::max(
float(max_height), max_height * max_scale_y);
1402 QSize width_size(max_width * max_scale_x,
1405 max_height * max_scale_y);
1407 if (width_size.width() >= max_width && width_size.height() >= max_height) {
1408 max_width = std::max(max_width, width_size.width());
1409 max_height = std::max(max_height, width_size.height());
1411 max_width = std::max(max_width, height_size.width());
1412 max_height = std::max(max_height, height_size.height());
1419 float preview_ratio = 1.0;
1426 max_width =
info.
width * max_scale_x * preview_ratio;
1427 max_height =
info.
height * max_scale_y * preview_ratio;
1432 int original_height = height;
1433 if (max_width != 0 && max_height != 0 && max_width < width && max_height < height) {
1435 float ratio = float(width) / float(height);
1436 int possible_width = round(max_height * ratio);
1437 int possible_height = round(max_width / ratio);
1439 if (possible_width <= max_width) {
1441 width = possible_width;
1442 height = max_height;
1446 height = possible_height;
1451 const int bytes_per_pixel = 4;
1452 int buffer_size = (width * height * bytes_per_pixel) + 128;
1453 buffer =
new unsigned char[buffer_size]();
1458 int scale_mode = SWS_FAST_BILINEAR;
1460 scale_mode = SWS_BICUBIC;
1466 sws_scale(img_convert_ctx, my_frame->data, my_frame->linesize, 0,
1467 original_height, pFrameRGB->data, pFrameRGB->linesize);
1470 std::shared_ptr<Frame> f = CreateFrame(current_frame);
1475 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888_Premultiplied, buffer);
1478 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888, buffer);
1482 working_cache.
Add(f);
1485 last_video_frame = f;
1491 RemoveAVFrame(my_frame);
1492 sws_freeContext(img_convert_ctx);
1498 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ProcessVideoPacket (After)",
"requested_frame", requested_frame,
"current_frame", current_frame,
"f->number", f->number,
"video_pts_seconds", video_pts_seconds);
1502 void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) {
1505 if (packet && packet->pts != AV_NOPTS_VALUE) {
1507 location = GetAudioPTSLocation(packet->pts);
1510 if (!seek_audio_frame_found && is_seeking)
1511 seek_audio_frame_found = location.
frame;
1518 working_cache.
Add(CreateFrame(requested_frame));
1522 "requested_frame", requested_frame,
1523 "target_frame", location.
frame,
1527 int frame_finished = 0;
1531 int packet_samples = 0;
1535 int send_packet_err = avcodec_send_packet(aCodecCtx, packet);
1536 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1540 int receive_frame_err = avcodec_receive_frame(aCodecCtx, audio_frame);
1541 if (receive_frame_err >= 0) {
1544 if (receive_frame_err == AVERROR_EOF) {
1548 if (receive_frame_err == AVERROR(EINVAL) || receive_frame_err == AVERROR_EOF) {
1550 avcodec_flush_buffers(aCodecCtx);
1552 if (receive_frame_err != 0) {
1557 int used = avcodec_decode_audio4(aCodecCtx, audio_frame, &frame_finished, packet);
1560 if (frame_finished) {
1566 audio_pts = audio_frame->pts;
1569 location = GetAudioPTSLocation(audio_pts);
1572 int plane_size = -1;
1573 data_size = av_samples_get_buffer_size(&plane_size,
1575 audio_frame->nb_samples,
1583 int pts_remaining_samples = packet_samples /
info.
channels;
1586 if (pts_remaining_samples == 0) {
1588 "packet_samples", packet_samples,
1590 "pts_remaining_samples", pts_remaining_samples);
1594 while (pts_remaining_samples) {
1599 int samples = samples_per_frame - previous_packet_location.
sample_start;
1600 if (samples > pts_remaining_samples)
1601 samples = pts_remaining_samples;
1604 pts_remaining_samples -= samples;
1606 if (pts_remaining_samples > 0) {
1608 previous_packet_location.
frame++;
1620 "packet_samples", packet_samples,
1624 "AV_SAMPLE_FMT_S16", AV_SAMPLE_FMT_S16);
1629 audio_converted->nb_samples = audio_frame->nb_samples;
1630 av_samples_alloc(audio_converted->data, audio_converted->linesize,
info.
channels, audio_frame->nb_samples, AV_SAMPLE_FMT_S16, 0);
1640 av_opt_set_int(avr,
"out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
1649 audio_converted->data,
1650 audio_converted->linesize[0],
1651 audio_converted->nb_samples,
1653 audio_frame->linesize[0],
1654 audio_frame->nb_samples);
1658 audio_converted->data[0],
1659 static_cast<size_t>(audio_converted->nb_samples)
1660 * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16)
1669 av_free(audio_converted->data[0]);
1672 int64_t starting_frame_number = -1;
1673 bool partial_frame =
true;
1674 for (
int channel_filter = 0; channel_filter <
info.
channels; channel_filter++) {
1676 starting_frame_number = location.
frame;
1677 int channel_buffer_size = packet_samples /
info.
channels;
1678 float *channel_buffer =
new float[channel_buffer_size];
1681 for (
int z = 0; z < channel_buffer_size; z++)
1682 channel_buffer[z] = 0.0f;
1688 for (
int sample = 0; sample < packet_samples; sample++) {
1690 if (channel_filter == channel) {
1692 channel_buffer[position] = audio_buf[sample] * (1.0f / (1 << 15));
1709 int remaining_samples = channel_buffer_size;
1710 float *iterate_channel_buffer = channel_buffer;
1711 while (remaining_samples > 0) {
1717 int samples = samples_per_frame - start;
1718 if (samples > remaining_samples)
1719 samples = remaining_samples;
1722 std::shared_ptr<Frame> f = CreateFrame(starting_frame_number);
1725 if (samples_per_frame == start + samples)
1726 partial_frame =
false;
1728 partial_frame =
true;
1731 f->AddAudio(
true, channel_filter, start, iterate_channel_buffer,
1736 "frame", starting_frame_number,
1739 "channel", channel_filter,
1740 "partial_frame", partial_frame,
1741 "samples_per_frame", samples_per_frame);
1744 working_cache.
Add(f);
1747 remaining_samples -= samples;
1750 if (remaining_samples > 0)
1751 iterate_channel_buffer += samples;
1754 starting_frame_number++;
1761 delete[] channel_buffer;
1762 channel_buffer = NULL;
1763 iterate_channel_buffer = NULL;
1778 "requested_frame", requested_frame,
1779 "starting_frame", location.
frame,
1780 "end_frame", starting_frame_number - 1,
1781 "audio_pts_seconds", audio_pts_seconds);
1787 void FFmpegReader::Seek(int64_t requested_frame) {
1789 if (requested_frame < 1)
1790 requested_frame = 1;
1793 if (requested_frame > largest_frame_processed && packet_status.
end_of_file) {
1800 "requested_frame", requested_frame,
1801 "seek_count", seek_count,
1802 "last_frame", last_frame);
1805 working_cache.
Clear();
1809 video_pts_seconds = NO_PTS_OFFSET;
1811 audio_pts_seconds = NO_PTS_OFFSET;
1812 hold_packet =
false;
1814 current_video_frame = 0;
1815 largest_frame_processed = 0;
1820 packet_status.
reset(
false);
1826 int buffer_amount = std::max(max_concurrent_frames, 8);
1827 if (requested_frame - buffer_amount < 20) {
1841 if (seek_count == 1) {
1844 seeking_pts = ConvertFrameToVideoPTS(1);
1846 seek_audio_frame_found = 0;
1847 seek_video_frame_found = 0;
1851 bool seek_worked =
false;
1852 int64_t seek_target = 0;
1856 seek_target = ConvertFrameToVideoPTS(requested_frame - buffer_amount);
1858 fprintf(stderr,
"%s: error while seeking video stream\n", pFormatCtx->AV_FILENAME);
1861 is_video_seek =
true;
1868 seek_target = ConvertFrameToAudioPTS(requested_frame - buffer_amount);
1870 fprintf(stderr,
"%s: error while seeking audio stream\n", pFormatCtx->AV_FILENAME);
1873 is_video_seek =
false;
1882 avcodec_flush_buffers(aCodecCtx);
1886 avcodec_flush_buffers(pCodecCtx);
1889 previous_packet_location.
frame = -1;
1894 if (seek_count == 1) {
1896 seeking_pts = seek_target;
1897 seeking_frame = requested_frame;
1899 seek_audio_frame_found = 0;
1900 seek_video_frame_found = 0;
1928 int64_t FFmpegReader::GetPacketPTS() {
1930 int64_t current_pts = packet->pts;
1931 if (current_pts == AV_NOPTS_VALUE && packet->dts != AV_NOPTS_VALUE)
1932 current_pts = packet->dts;
1938 return AV_NOPTS_VALUE;
1943 void FFmpegReader::UpdatePTSOffset() {
1944 if (pts_offset_seconds != NO_PTS_OFFSET) {
1948 pts_offset_seconds = 0.0;
1949 double video_pts_offset_seconds = 0.0;
1950 double audio_pts_offset_seconds = 0.0;
1952 bool has_video_pts =
false;
1955 has_video_pts =
true;
1957 bool has_audio_pts =
false;
1960 has_audio_pts =
true;
1964 while (!has_video_pts || !has_audio_pts) {
1966 if (GetNextPacket() < 0)
1971 int64_t pts = GetPacketPTS();
1974 if (!has_video_pts && packet->stream_index == videoStream) {
1980 if (std::abs(video_pts_offset_seconds) <= 10.0) {
1981 has_video_pts =
true;
1984 else if (!has_audio_pts && packet->stream_index == audioStream) {
1990 if (std::abs(audio_pts_offset_seconds) <= 10.0) {
1991 has_audio_pts =
true;
1997 if (has_video_pts && has_audio_pts) {
2009 pts_offset_seconds = std::max(video_pts_offset_seconds, audio_pts_offset_seconds);
2014 int64_t FFmpegReader::ConvertVideoPTStoFrame(int64_t pts) {
2016 int64_t previous_video_frame = current_video_frame;
2025 if (current_video_frame == 0)
2026 current_video_frame = frame;
2030 if (frame == previous_video_frame) {
2035 current_video_frame++;
2044 int64_t FFmpegReader::ConvertFrameToVideoPTS(int64_t frame_number) {
2046 double seconds = (double(frame_number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2056 int64_t FFmpegReader::ConvertFrameToAudioPTS(int64_t frame_number) {
2058 double seconds = (double(frame_number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2068 AudioLocation FFmpegReader::GetAudioPTSLocation(int64_t pts) {
2076 int64_t whole_frame = int64_t(frame);
2079 double sample_start_percentage = frame - double(whole_frame);
2085 int sample_start = round(
double(samples_per_frame) * sample_start_percentage);
2088 if (whole_frame < 1)
2090 if (sample_start < 0)
2097 if (previous_packet_location.
frame != -1) {
2098 if (location.
is_near(previous_packet_location, samples_per_frame, samples_per_frame)) {
2099 int64_t orig_frame = location.
frame;
2104 location.
frame = previous_packet_location.
frame;
2107 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAudioPTSLocation (Audio Gap Detected)",
"Source Frame", orig_frame,
"Source Audio Sample", orig_start,
"Target Frame", location.
frame,
"Target Audio Sample", location.
sample_start,
"pts", pts);
2116 previous_packet_location = location;
2123 std::shared_ptr<Frame> FFmpegReader::CreateFrame(int64_t requested_frame) {
2125 std::shared_ptr<Frame> output = working_cache.
GetFrame(requested_frame);
2129 output = working_cache.
GetFrame(requested_frame);
2130 if(output)
return output;
2138 working_cache.
Add(output);
2141 if (requested_frame > largest_frame_processed)
2142 largest_frame_processed = requested_frame;
2149 bool FFmpegReader::IsPartialFrame(int64_t requested_frame) {
2152 bool seek_trash =
false;
2153 int64_t max_seeked_frame = seek_audio_frame_found;
2154 if (seek_video_frame_found > max_seeked_frame) {
2155 max_seeked_frame = seek_video_frame_found;
2157 if ((
info.
has_audio && seek_audio_frame_found && max_seeked_frame >= requested_frame) ||
2158 (
info.
has_video && seek_video_frame_found && max_seeked_frame >= requested_frame)) {
2166 void FFmpegReader::CheckWorkingFrames(int64_t requested_frame) {
2169 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
2172 std::vector<std::shared_ptr<openshot::Frame>> working_frames = working_cache.
GetFrames();
2173 std::vector<std::shared_ptr<openshot::Frame>>::iterator working_itr;
2176 for(working_itr = working_frames.begin(); working_itr != working_frames.end(); ++working_itr)
2179 std::shared_ptr<Frame> f = *working_itr;
2182 if (!f || f->number > requested_frame) {
2188 double frame_pts_seconds = (double(f->number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2189 double recent_pts_seconds = std::max(video_pts_seconds, audio_pts_seconds);
2192 bool is_video_ready =
false;
2193 bool is_audio_ready =
false;
2194 double recent_pts_diff = recent_pts_seconds - frame_pts_seconds;
2195 if ((frame_pts_seconds <= video_pts_seconds)
2196 || (recent_pts_diff > 1.5)
2200 is_video_ready =
true;
2202 "frame_number", f->number,
2203 "frame_pts_seconds", frame_pts_seconds,
2204 "video_pts_seconds", video_pts_seconds,
2205 "recent_pts_diff", recent_pts_diff);
2209 for (int64_t previous_frame = requested_frame - 1; previous_frame > 0; previous_frame--) {
2211 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2213 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2218 if (last_video_frame && !f->has_image_data) {
2220 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2221 }
else if (!f->has_image_data) {
2222 f->AddColor(
"#000000");
2227 double audio_pts_diff = audio_pts_seconds - frame_pts_seconds;
2228 if ((frame_pts_seconds < audio_pts_seconds && audio_pts_diff > 1.0)
2229 || (recent_pts_diff > 1.5)
2234 is_audio_ready =
true;
2236 "frame_number", f->number,
2237 "frame_pts_seconds", frame_pts_seconds,
2238 "audio_pts_seconds", audio_pts_seconds,
2239 "audio_pts_diff", audio_pts_diff,
2240 "recent_pts_diff", recent_pts_diff);
2242 bool is_seek_trash = IsPartialFrame(f->number);
2250 "frame_number", f->number,
2251 "is_video_ready", is_video_ready,
2252 "is_audio_ready", is_audio_ready,
2258 if ((!packet_status.
end_of_file && is_video_ready && is_audio_ready) || packet_status.
end_of_file || is_seek_trash) {
2261 "requested_frame", requested_frame,
2262 "f->number", f->number,
2263 "is_seek_trash", is_seek_trash,
2264 "Working Cache Count", working_cache.
Count(),
2268 if (!is_seek_trash) {
2273 working_cache.
Remove(f->number);
2276 last_frame = f->number;
2279 working_cache.
Remove(f->number);
2286 working_frames.clear();
2287 working_frames.shrink_to_fit();
2291 void FFmpegReader::CheckFPS() {
2299 int frames_per_second[3] = {0,0,0};
2300 int max_fps_index =
sizeof(frames_per_second) /
sizeof(frames_per_second[0]);
2303 int all_frames_detected = 0;
2304 int starting_frames_detected = 0;
2309 if (GetNextPacket() < 0)
2314 if (packet->stream_index == videoStream) {
2317 fps_index = int(video_seconds);
2320 if (fps_index >= 0 && fps_index < max_fps_index) {
2322 starting_frames_detected++;
2323 frames_per_second[fps_index]++;
2327 all_frames_detected++;
2332 float avg_fps = 30.0;
2333 if (starting_frames_detected > 0 && fps_index > 0) {
2334 avg_fps = float(starting_frames_detected) / std::min(fps_index, max_fps_index);
2338 if (avg_fps < 8.0) {
2347 if (all_frames_detected > 0) {
2361 void FFmpegReader::RemoveAVFrame(AVFrame *remove_frame) {
2365 av_freep(&remove_frame->data[0]);
2373 void FFmpegReader::RemoveAVPacket(AVPacket *remove_packet) {
2378 delete remove_packet;
2393 root[
"type"] =
"FFmpegReader";
2394 root[
"path"] =
path;
2409 catch (
const std::exception& e) {
2411 throw InvalidJSON(
"JSON is invalid (missing keys or invalid data types)");
2422 if (!root[
"path"].isNull())
2423 path = root[
"path"].asString();
#define AV_RESET_FRAME(av_frame)
#define AV_FREE_FRAME(av_frame)
int num
Numerator for the fraction.
Point GetMaxPoint() const
Get max point (by Y coordinate)
void Close() override
Close File.
#define AV_FIND_DECODER_CODEC_ID(av_stream)
int width
The width of the video (in pixesl)
#define AV_COPY_PICTURE_DATA(av_frame, buffer, pix_fmt, width, height)
int preview_width
Optional preview width of timeline image. If your preview window is smaller than the timeline...
float ToFloat()
Return this fraction as a float (i.e. 1/2 = 0.5)
#define AV_GET_CODEC_PIXEL_FORMAT(av_stream, av_context)
float duration
Length of time (in seconds)
bool is_near(AudioLocation location, int samples_per_frame, int64_t amount)
Json::Value JsonValue() const override
Generate Json::Value for this object.
Header file for FFmpegReader class.
double ToDouble() const
Return this fraction as a double (i.e. 1/2 = 0.5)
void Reduce()
Reduce this fraction (i.e. 640/480 = 4/3)
void SetMaxBytesFromInfo(int64_t number_of_frames, int width, int height, int sample_rate, int channels)
Set maximum bytes to a different amount based on a ReaderInfo struct.
Scale the clip until both height and width fill the canvas (cropping the overlap) ...
#define AVCODEC_MAX_AUDIO_FRAME_SIZE
openshot::Keyframe scale_x
Curve representing the horizontal scaling in percent (0 to 1)
void SetJson(const std::string value) override
Load JSON string into this object.
const Json::Value stringToJson(const std::string value)
#define OPEN_MP_NUM_PROCESSORS
#define AV_GET_SAMPLE_FORMAT(av_stream, av_context)
#define AVCODEC_REGISTER_ALL
openshot::Keyframe scale_y
Curve representing the vertical scaling in percent (0 to 1)
Exception when a reader is closed, and a frame is requested.
bool has_video
Determines if this file has a video stream.
#define AV_GET_CODEC_CONTEXT(av_stream, av_codec)
int64_t file_size
Size of file (in bytes)
Header file for Timeline class.
void AppendDebugMethod(std::string method_name, std::string arg1_name="", float arg1_value=-1.0, std::string arg2_name="", float arg2_value=-1.0, std::string arg3_name="", float arg3_value=-1.0, std::string arg4_name="", float arg4_value=-1.0, std::string arg5_name="", float arg5_value=-1.0, std::string arg6_name="", float arg6_value=-1.0)
Append debug information.
AVPixelFormat hw_de_av_pix_fmt_global
int audio_bit_rate
The bit rate of the audio stream (in bytes)
int GetSamplesPerFrame(openshot::Fraction fps, int sample_rate, int channels)
Calculate the # of samples per video frame (for the current frame number)
Header file for all Exception classes.
void SetJsonValue(const Json::Value root) override
Load Json::Value into this object.
bool has_audio
Determines if this file has an audio stream.
virtual Json::Value JsonValue() const =0
Generate Json::Value for this object.
This class represents a clip (used to arrange readers on the timeline)
#define AV_FREE_CONTEXT(av_context)
Exception when no valid codec is found for a file.
Exception when memory could not be allocated.
double Y
The Y value of the coordinate (usually representing the value of the property being animated) ...
int HARDWARE_DECODER
Use video codec for faster video decoding (if supported)
int audio_stream_index
The index of the audio stream.
#define AV_GET_CODEC_ATTRIBUTES(av_stream, av_context)
std::vector< std::shared_ptr< openshot::Frame > > GetFrames()
Get an array of all Frames.
int64_t video_length
The number of frames in the video stream.
#define FF_NUM_PROCESSORS
#define AV_FREE_PACKET(av_packet)
Exception when no streams are found in the file.
int height
The height of the video (in pixels)
#define AV_ALLOCATE_FRAME()
openshot::Fraction video_timebase
The video timebase determines how long each frame stays on the screen.
Exception for files that can not be found or opened.
std::recursive_mutex getFrameMutex
Mutex for multiple threads.
This class represents a fraction.
Header file for ZeroMQ-based Logger class.
std::map< std::string, std::string > metadata
An optional map/dictionary of metadata for this reader.
openshot::ChannelLayout channel_layout
The channel layout (mono, stereo, 5 point surround, etc...)
bool has_single_image
Determines if this file only contains a single image.
ChannelLayout
This enumeration determines the audio channel layout (such as stereo, mono, 5 point surround...
std::shared_ptr< openshot::Frame > GetFrame(int64_t frame_number)
Get a frame from the cache.
AVHWDeviceType hw_de_av_device_type_global
int HW_DE_DEVICE_SET
Which GPU to use to decode (0 is the first)
Scale the clip until both height and width fill the canvas (distort to fit)
void Clear()
Clear the cache of all frames.
virtual void SetJsonValue(const Json::Value root)=0
Load Json::Value into this object.
#define MY_INPUT_BUFFER_PADDING_SIZE
openshot::ClipBase * ParentClip()
Parent clip object of this reader (which can be unparented and NULL)
openshot::ReaderInfo info
Information about the current media file.
static ZmqLogger * Instance()
Create or get an instance of this logger singleton (invoke the class with this method) ...
int64_t Count()
Count the frames in the queue.
openshot::Fraction audio_timebase
The audio timebase determines how long each audio packet should be played.
std::string vcodec
The name of the video codec used to encode / decode the video stream.
This namespace is the default namespace for all code in the openshot library.
std::shared_ptr< openshot::Frame > GetFrame(int64_t requested_frame) override
#define SWR_CONVERT(ctx, out, linesize, out_count, in, linesize2, in_count)
Coordinate co
This is the primary coordinate.
Exception for invalid JSON.
#define AV_GET_CODEC_TYPE(av_stream)
int pixel_format
The pixel format (i.e. YUV420P, RGB24, etc...)
openshot::Fraction display_ratio
The ratio of width to height of the video stream (i.e. 640x480 has a ratio of 4/3) ...
This struct holds the associated video frame and starting sample # for an audio packet.
openshot::ScaleType scale
The scale determines how a clip should be resized to fit its parent.
openshot::TimelineBase * ParentTimeline()
Get the associated Timeline pointer (if any)
static Settings * Instance()
Create or get an instance of this logger singleton (invoke the class with this method) ...
int video_bit_rate
The bit rate of the video stream (in bytes)
std::string Json() const override
Generate JSON string of this object.
int DE_LIMIT_WIDTH_MAX
Maximum columns that hardware decode can handle.
openshot::Fraction pixel_ratio
The pixel ratio of the video stream as a fraction (i.e. some pixels are not square) ...
bool GetIsDurationKnown()
Return true if frame can be read with GetFrame()
void Add(std::shared_ptr< openshot::Frame > frame)
Add a Frame to the cache.
int preview_height
Optional preview width of timeline image. If your preview window is smaller than the timeline...
void Remove(int64_t frame_number)
Remove a specific frame.
CacheMemory final_cache
Final cache object used to hold final frames.
void Open() override
Open File - which is called by the constructor automatically.
virtual ~FFmpegReader()
Destructor.
int den
Denominator for the fraction.
int channels
The number of audio channels used in the audio stream.
int64_t packets_decoded()
int video_stream_index
The index of the video stream.
Scale the clip until either height or width fills the canvas (with no cropping)
Header file for FFmpegUtilities.
openshot::Fraction fps
Frames per second, as a fraction (i.e. 24/1 = 24 fps)
std::string acodec
The name of the audio codec used to encode / decode the video stream.
int sample_rate
The number of audio samples per second (44100 is a common sample rate)
int DE_LIMIT_HEIGHT_MAX
Maximum rows that hardware decode can handle.
This class represents a timeline.