From 610074e4ac0248cfca7c214dc6a8c14172d10b9a Mon Sep 17 00:00:00 2001 From: HappyTanuki Date: Fri, 5 Sep 2025 01:47:32 +0900 Subject: [PATCH] =?UTF-8?q?=EC=9D=BC=EB=8B=A8=20ffmpeg=ED=8C=8C=EC=9D=BC?= =?UTF-8?q?=20=EC=9D=B8=EC=BD=94=EB=94=A9/=EB=94=94=EC=BD=94=EB=94=A9=20?= =?UTF-8?q?=EC=98=88=EC=A0=9C=20=EC=BD=94=EB=93=9C=20=EC=99=84=EC=84=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/settings.json | 9 +- include/precomp.h | 1 + tests/basic_yt_dlp_download.cc | 2 +- tests/ffmpeg_any_to_opus.cc | 371 ++++++++++++++++++--------------- tests/ffmpeg_decode_audio.cc | 155 ++++++++++++++ 5 files changed, 370 insertions(+), 168 deletions(-) create mode 100644 tests/ffmpeg_decode_audio.cc diff --git a/.vscode/settings.json b/.vscode/settings.json index c366c4b..21c998d 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -103,7 +103,14 @@ "__locale": "cpp", "ios": "cpp", "locale": "cpp", - "print": "cpp" + "print": "cpp", + "__bit_reference": "cpp", + "__hash_table": "cpp", + "__node_handle": "cpp", + "__split_buffer": "cpp", + "__threading_support": "cpp", + "__verbose_abort": "cpp", + "queue": "cpp" }, "files.exclude": { "**/*.rpyc": true, diff --git a/include/precomp.h b/include/precomp.h index e994123..10b5a60 100644 --- a/include/precomp.h +++ b/include/precomp.h @@ -16,6 +16,7 @@ #include "boost/process.hpp" extern "C" { +#include "libavcodec/avcodec.h" #include "libavformat/avformat.h" #include "libavutil/avutil.h" #include "libswresample/swresample.h" diff --git a/tests/basic_yt_dlp_download.cc b/tests/basic_yt_dlp_download.cc index de4f5f9..51e9af4 100644 --- a/tests/basic_yt_dlp_download.cc +++ b/tests/basic_yt_dlp_download.cc @@ -6,7 +6,7 @@ int main() { boost::asio::io_context ctx; boost::system::error_code ec; - utils::CheckUpdate(ctx); + // utils::CheckUpdate(ctx); char buf[8192]; #ifdef WIN32 diff --git a/tests/ffmpeg_any_to_opus.cc b/tests/ffmpeg_any_to_opus.cc index e91ba7b..2b1d628 100644 --- a/tests/ffmpeg_any_to_opus.cc +++ b/tests/ffmpeg_any_to_opus.cc @@ -1,193 +1,232 @@ #include "precomp.h" -/* check that a given sample format is supported by the encoder */ -static int check_sample_fmt(const AVCodec *codec, - enum AVSampleFormat sample_fmt) { - const enum AVSampleFormat *p = codec->sample_fmts; - while (*p != AV_SAMPLE_FMT_NONE) { - if (*p == sample_fmt) return 1; - p++; +#define OPUS_FRAME_SIZE 960 // 20ms @ 48kHz + +int main() { + const char* input_filename = "golden.webm"; + const char* output_filename = "output.opus"; + + AVFormatContext* fmt_ctx = NULL; + AVCodecContext* dec_ctx = NULL; + AVCodecContext* enc_ctx = NULL; + const AVCodec* decoder = NULL; + const AVCodec* encoder = NULL; + AVPacket* packet = NULL; + AVFrame* frame = NULL; + AVFrame* enc_frame = NULL; + SwrContext* swr_ctx = NULL; + FILE* outfile = NULL; + + av_log_set_level(AV_LOG_ERROR); + + if (avformat_open_input(&fmt_ctx, input_filename, NULL, NULL) < 0) { + fprintf(stderr, "Could not open input file\n"); + return -1; } - return 0; -} - -/* just pick the highest supported samplerate */ -static int select_sample_rate(const AVCodec *codec) { - const int *p; - int best_samplerate = 0; - - if (!codec->supported_samplerates) return 44100; - - p = codec->supported_samplerates; - while (*p) { - if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate)) - best_samplerate = *p; - p++; + if (avformat_find_stream_info(fmt_ctx, NULL) < 0) { + fprintf(stderr, "Could not find stream info\n"); + return -1; } - return best_samplerate; -} -/* select layout with the highest channel count */ -static int select_channel_layout(const AVCodec *codec, AVChannelLayout *dst) { - const AVChannelLayout *p, *best_ch_layout; - int best_nb_channels = 0; - - if (!codec->ch_layouts) - return av_channel_layout_copy(dst, - &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO); - - p = codec->ch_layouts; - while (p->nb_channels) { - int nb_channels = p->nb_channels; - - if (nb_channels > best_nb_channels) { - best_ch_layout = p; - best_nb_channels = nb_channels; + int stream_index = -1; + for (unsigned i = 0; i < fmt_ctx->nb_streams; i++) { + if (fmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { + stream_index = i; + break; } - p++; } - return av_channel_layout_copy(dst, best_ch_layout); -} - -static void encode(AVCodecContext *ctx, AVFrame *frame, AVPacket *pkt, - FILE *output) { - int ret; - - /* send the frame for encoding */ - ret = avcodec_send_frame(ctx, frame); - if (ret < 0) { - fprintf(stderr, "Error sending the frame to the encoder\n"); - exit(1); + if (stream_index == -1) { + fprintf(stderr, "No audio stream found\n"); + return -1; } - /* read all the available output packets (in general there may be any - * number of them */ - while (ret >= 0) { - ret = avcodec_receive_packet(ctx, pkt); - if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) - return; - else if (ret < 0) { - fprintf(stderr, "Error encoding audio frame\n"); - exit(1); - } - - fwrite(pkt->data, 1, pkt->size, output); - av_packet_unref(pkt); + decoder = + avcodec_find_decoder(fmt_ctx->streams[stream_index]->codecpar->codec_id); + if (!decoder) { + fprintf(stderr, "Decoder not found\n"); + return -1; } -} + dec_ctx = avcodec_alloc_context3(decoder); + avcodec_parameters_to_context(dec_ctx, + fmt_ctx->streams[stream_index]->codecpar); + avcodec_open2(dec_ctx, decoder, NULL); -int main(int argc, char **argv) { - const char *filename; - const AVCodec *codec; - AVCodecContext *c = NULL; - AVFrame *frame; - AVPacket *pkt; - int i, j, k, ret; - FILE *f; - uint16_t *samples; - float t, tincr; - - if (argc <= 1) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return 0; + encoder = avcodec_find_encoder(AV_CODEC_ID_OPUS); + if (!encoder) { + fprintf(stderr, "Opus encoder not found\n"); + return -1; } - filename = argv[1]; + enc_ctx = avcodec_alloc_context3(encoder); - /* find the MP2 encoder */ - codec = avcodec_find_encoder(AV_CODEC_ID_MP2); - if (!codec) { - fprintf(stderr, "Codec not found\n"); - exit(1); + AVChannelLayout enc_layout; + av_channel_layout_default(&enc_layout, 2); // 스테레오 + av_channel_layout_copy(&enc_ctx->ch_layout, &enc_layout); + + enc_ctx->sample_rate = 48000; + enc_ctx->sample_fmt = AV_SAMPLE_FMT_FLT; + enc_ctx->bit_rate = 128000; + + avcodec_open2(enc_ctx, encoder, NULL); + + swr_ctx = NULL; + if (swr_alloc_set_opts2(&swr_ctx, &enc_ctx->ch_layout, enc_ctx->sample_fmt, + enc_ctx->sample_rate, &dec_ctx->ch_layout, + dec_ctx->sample_fmt, dec_ctx->sample_rate, 0, + NULL) < 0) { + fprintf(stderr, "Failed to allocate SwrContext\n"); + return -1; } + swr_init(swr_ctx); - c = avcodec_alloc_context3(codec); - if (!c) { - fprintf(stderr, "Could not allocate audio codec context\n"); - exit(1); - } - - /* put sample parameters */ - c->bit_rate = 64000; - - /* check that the encoder supports s16 pcm input */ - c->sample_fmt = AV_SAMPLE_FMT_S16; - if (!check_sample_fmt(codec, c->sample_fmt)) { - fprintf(stderr, "Encoder does not support sample format %s", - av_get_sample_fmt_name(c->sample_fmt)); - exit(1); - } - - /* select other audio parameters supported by the encoder */ - c->sample_rate = select_sample_rate(codec); - ret = select_channel_layout(codec, &c->ch_layout); - if (ret < 0) exit(1); - - /* open it */ - if (avcodec_open2(c, codec, NULL) < 0) { - fprintf(stderr, "Could not open codec\n"); - exit(1); - } - - f = fopen(filename, "wb"); - if (!f) { - fprintf(stderr, "Could not open %s\n", filename); - exit(1); - } - - /* packet for holding encoded output */ - pkt = av_packet_alloc(); - if (!pkt) { - fprintf(stderr, "could not allocate the packet\n"); - exit(1); - } - - /* frame containing input raw audio */ + packet = av_packet_alloc(); frame = av_frame_alloc(); - if (!frame) { - fprintf(stderr, "Could not allocate audio frame\n"); - exit(1); + enc_frame = av_frame_alloc(); + + outfile = fopen(output_filename, "wb"); + if (!outfile) { + fprintf(stderr, "Could not open output file\n"); + return -1; } - frame->nb_samples = c->frame_size; - frame->format = c->sample_fmt; - ret = av_channel_layout_copy(&frame->ch_layout, &c->ch_layout); - if (ret < 0) exit(1); + // 임시 PCM 버퍼 (float, 스테레오) + float* pcm_buffer = (float*)malloc(sizeof(float) * 2 * OPUS_FRAME_SIZE * + 4); // 충분히 큰 버퍼 + int buffered_samples = 0; - /* allocate the data buffers */ - ret = av_frame_get_buffer(frame, 0); - if (ret < 0) { - fprintf(stderr, "Could not allocate audio data buffers\n"); - exit(1); - } - - /* encode a single tone sound */ - t = 0; - tincr = 2 * M_PI * 440.0 / c->sample_rate; - for (i = 0; i < 200; i++) { - /* make sure the frame is writable -- makes a copy if the encoder - * kept a reference internally */ - ret = av_frame_make_writable(frame); - if (ret < 0) exit(1); - samples = (uint16_t *)frame->data[0]; - - for (j = 0; j < c->frame_size; j++) { - samples[2 * j] = (int)(sin(t) * 10000); - - for (k = 1; k < c->ch_layout.nb_channels; k++) - samples[2 * j + k] = samples[2 * j]; - t += tincr; + while (av_read_frame(fmt_ctx, packet) >= 0) { + if (packet->stream_index != stream_index) { + av_packet_unref(packet); + continue; } - encode(c, frame, pkt, f); + + avcodec_send_packet(dec_ctx, packet); + while (avcodec_receive_frame(dec_ctx, frame) == 0) { + int max_out = av_rescale_rnd( + swr_get_delay(swr_ctx, dec_ctx->sample_rate) + frame->nb_samples, + enc_ctx->sample_rate, dec_ctx->sample_rate, AV_ROUND_UP); + + uint8_t** out_data = NULL; + int out_linesize = 0; + av_samples_alloc_array_and_samples(&out_data, &out_linesize, 2, max_out, + enc_ctx->sample_fmt, 0); + + int converted = + swr_convert(swr_ctx, out_data, max_out, (const uint8_t**)frame->data, + frame->nb_samples); + + // float PCM으로 임시 버퍼에 추가 + memcpy(pcm_buffer + buffered_samples * 2, out_data[0], + converted * 2 * sizeof(float)); + buffered_samples += converted; + + av_freep(&out_data[0]); + free(out_data); + + // OPUS_FRAME_SIZE 단위로 인코딩 + while (buffered_samples >= OPUS_FRAME_SIZE) { + enc_frame->nb_samples = OPUS_FRAME_SIZE; + enc_frame->format = enc_ctx->sample_fmt; + enc_frame->sample_rate = enc_ctx->sample_rate; + av_channel_layout_copy(&enc_frame->ch_layout, &enc_ctx->ch_layout); + enc_frame->data[0] = (uint8_t*)pcm_buffer; + + AVPacket* out_pkt = av_packet_alloc(); + avcodec_send_frame(enc_ctx, enc_frame); + while (avcodec_receive_packet(enc_ctx, out_pkt) == 0) { + fwrite(out_pkt->data, 1, out_pkt->size, outfile); + av_packet_unref(out_pkt); + } + av_packet_free(&out_pkt); + + // 버퍼 이동 + memmove(pcm_buffer, pcm_buffer + OPUS_FRAME_SIZE * 2, + (buffered_samples - OPUS_FRAME_SIZE) * 2 * sizeof(float)); + buffered_samples -= OPUS_FRAME_SIZE; + } + } + av_packet_unref(packet); } - /* flush the encoder */ - encode(c, NULL, pkt, f); + // 디코더 플러시 + avcodec_send_packet(dec_ctx, NULL); + while (avcodec_receive_frame(dec_ctx, frame) == 0) { + int max_out = av_rescale_rnd( + swr_get_delay(swr_ctx, dec_ctx->sample_rate) + frame->nb_samples, + enc_ctx->sample_rate, dec_ctx->sample_rate, AV_ROUND_UP); - fclose(f); + uint8_t** out_data = NULL; + int out_linesize = 0; + av_samples_alloc_array_and_samples(&out_data, &out_linesize, 2, max_out, + enc_ctx->sample_fmt, 0); + int converted = + swr_convert(swr_ctx, out_data, max_out, (const uint8_t**)frame->data, + frame->nb_samples); + + memcpy(pcm_buffer + buffered_samples * 2, out_data[0], + converted * 2 * sizeof(float)); + buffered_samples += converted; + + av_freep(&out_data[0]); + free(out_data); + + while (buffered_samples >= OPUS_FRAME_SIZE) { + enc_frame->nb_samples = OPUS_FRAME_SIZE; + enc_frame->format = enc_ctx->sample_fmt; + enc_frame->sample_rate = enc_ctx->sample_rate; + av_channel_layout_copy(&enc_frame->ch_layout, &enc_ctx->ch_layout); + enc_frame->data[0] = (uint8_t*)pcm_buffer; + + AVPacket* out_pkt = av_packet_alloc(); + avcodec_send_frame(enc_ctx, enc_frame); + while (avcodec_receive_packet(enc_ctx, out_pkt) == 0) { + fwrite(out_pkt->data, 1, out_pkt->size, outfile); + av_packet_unref(out_pkt); + } + av_packet_free(&out_pkt); + + memmove(pcm_buffer, pcm_buffer + OPUS_FRAME_SIZE * 2, + (buffered_samples - OPUS_FRAME_SIZE) * 2 * sizeof(float)); + buffered_samples -= OPUS_FRAME_SIZE; + } + } + + // 마지막 남은 샘플 인코딩 + if (buffered_samples > 0) { + enc_frame->nb_samples = buffered_samples; + enc_frame->format = enc_ctx->sample_fmt; + enc_frame->sample_rate = enc_ctx->sample_rate; + av_channel_layout_copy(&enc_frame->ch_layout, &enc_ctx->ch_layout); + enc_frame->data[0] = (uint8_t*)pcm_buffer; + + AVPacket* out_pkt = av_packet_alloc(); + avcodec_send_frame(enc_ctx, enc_frame); + while (avcodec_receive_packet(enc_ctx, out_pkt) == 0) { + fwrite(out_pkt->data, 1, out_pkt->size, outfile); + av_packet_unref(out_pkt); + } + av_packet_free(&out_pkt); + } + + // 인코더 플러시 + avcodec_send_frame(enc_ctx, NULL); + AVPacket* out_pkt = av_packet_alloc(); + while (avcodec_receive_packet(enc_ctx, out_pkt) == 0) { + fwrite(out_pkt->data, 1, out_pkt->size, outfile); + av_packet_unref(out_pkt); + } + av_packet_free(&out_pkt); + + fclose(outfile); + free(pcm_buffer); + swr_free(&swr_ctx); av_frame_free(&frame); - av_packet_free(&pkt); - avcodec_free_context(&c); + av_frame_free(&enc_frame); + av_packet_free(&packet); + avcodec_free_context(&dec_ctx); + avcodec_free_context(&enc_ctx); + avformat_close_input(&fmt_ctx); + printf("Encoding finished: %s\n", output_filename); return 0; -} +} \ No newline at end of file diff --git a/tests/ffmpeg_decode_audio.cc b/tests/ffmpeg_decode_audio.cc new file mode 100644 index 0000000..e34f3c1 --- /dev/null +++ b/tests/ffmpeg_decode_audio.cc @@ -0,0 +1,155 @@ +#include + +#include "ffmpeg/libavcodec.h" +#include "precomp.h" + +int main() { + const char* input_filename = "golden.webm"; + const char* output_filename = "output.pcm"; + + AVFormatContext* fmt_ctx = NULL; + AVCodecContext* codec_ctx = NULL; + const AVCodec* codec = NULL; + AVPacket* packet = NULL; + AVFrame* frame = NULL; + SwrContext* swr_ctx = NULL; + FILE* outfile = NULL; + + if (avformat_open_input(&fmt_ctx, input_filename, NULL, NULL) < 0) { + fprintf(stderr, "Could not open input file\n"); + return -1; + } + + if (avformat_find_stream_info(fmt_ctx, NULL) < 0) { + fprintf(stderr, "Could not find stream info\n"); + return -1; + } + + int stream_index = -1; + for (unsigned i = 0; i < fmt_ctx->nb_streams; i++) { + if (fmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { + stream_index = i; + break; + } + } + if (stream_index == -1) { + fprintf(stderr, "Could not find audio stream\n"); + return -1; + } + + codec = + avcodec_find_decoder(fmt_ctx->streams[stream_index]->codecpar->codec_id); + if (!codec) { + fprintf(stderr, "Could not find decoder\n"); + return -1; + } + + codec_ctx = avcodec_alloc_context3(codec); + if (!codec_ctx) { + fprintf(stderr, "Could not allocate codec context\n"); + return -1; + } + + if (avcodec_parameters_to_context( + codec_ctx, fmt_ctx->streams[stream_index]->codecpar) < 0) { + fprintf(stderr, "Failed to copy codec parameters\n"); + return -1; + } + + if (avcodec_open2(codec_ctx, codec, NULL) < 0) { + fprintf(stderr, "Could not open codec\n"); + return -1; + } + + packet = av_packet_alloc(); + frame = av_frame_alloc(); + if (!packet || !frame) { + fprintf(stderr, "Could not allocate packet or frame\n"); + return -1; + } + + outfile = fopen(output_filename, "wb"); + if (!outfile) { + fprintf(stderr, "Could not open output file\n"); + return -1; + } + + // AVChannelLayout 초기화 + AVChannelLayout in_layout, out_layout; + if (av_channel_layout_copy(&in_layout, &codec_ctx->ch_layout) < 0) { + fprintf(stderr, "Failed to copy channel layout\n"); + return -1; + } + av_channel_layout_default(&out_layout, 2); // 스테레오 + + swr_ctx = NULL; // 먼저 NULL로 선언 + if (swr_alloc_set_opts2(&swr_ctx, &out_layout, AV_SAMPLE_FMT_S16, 48000, + &in_layout, codec_ctx->sample_fmt, + codec_ctx->sample_rate, 0, NULL) < 0) { + fprintf(stderr, "Failed to allocate and set SwrContext\n"); + return -1; + } + + if (swr_init(swr_ctx) < 0) { + fprintf(stderr, "Failed to initialize SwrContext\n"); + return -1; + } + + while (av_read_frame(fmt_ctx, packet) >= 0) { + if (packet->stream_index == stream_index) { + if (avcodec_send_packet(codec_ctx, packet) == 0) { + while (avcodec_receive_frame(codec_ctx, frame) == 0) { + int out_samples = + av_rescale_rnd(swr_get_delay(swr_ctx, codec_ctx->sample_rate) + + frame->nb_samples, + 48000, codec_ctx->sample_rate, AV_ROUND_UP); + + uint8_t** out_buf = NULL; + int out_linesize = 0; + av_samples_alloc_array_and_samples(&out_buf, &out_linesize, 2, + out_samples, AV_SAMPLE_FMT_S16, 0); + + int converted_samples = + swr_convert(swr_ctx, out_buf, out_samples, + (const uint8_t**)frame->data, frame->nb_samples); + + fwrite(out_buf[0], 1, converted_samples * 2 * 2, outfile); + av_freep(&out_buf[0]); + free(out_buf); + } + } + } + av_packet_unref(packet); + } + + // 디코더 플러시 + avcodec_send_packet(codec_ctx, NULL); + while (avcodec_receive_frame(codec_ctx, frame) == 0) { + int out_samples = av_rescale_rnd( + swr_get_delay(swr_ctx, codec_ctx->sample_rate) + frame->nb_samples, + 48000, codec_ctx->sample_rate, AV_ROUND_UP); + + uint8_t** out_buf = NULL; + int out_linesize = 0; + av_samples_alloc_array_and_samples(&out_buf, &out_linesize, 2, out_samples, + AV_SAMPLE_FMT_S16, 0); + + int converted_samples = + swr_convert(swr_ctx, out_buf, out_samples, (const uint8_t**)frame->data, + frame->nb_samples); + + fwrite(out_buf[0], 1, converted_samples * 2 * 2, outfile); + av_freep(&out_buf[0]); + free(out_buf); + } + + fclose(outfile); + swr_free(&swr_ctx); + av_frame_free(&frame); + av_packet_free(&packet); + avcodec_free_context(&codec_ctx); + avformat_close_input(&fmt_ctx); + + printf("Decoding finished, output saved to %s\n", output_filename); + return 0; +} \ No newline at end of file