일단 ffmpeg파일 인코딩/디코딩 예제 코드 완성

2026-02-04 07:43:52 +00:00 · 2025-09-05 01:47:32 +09:00
parent e3b5e92164
commit 610074e4ac
5 changed files with 370 additions and 168 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -103,7 +103,14 @@
        "__locale": "cpp",
        "ios": "cpp",
        "locale": "cpp",
-        "print": "cpp"
+        "print": "cpp",
+        "__bit_reference": "cpp",
+        "__hash_table": "cpp",
+        "__node_handle": "cpp",
+        "__split_buffer": "cpp",
+        "__threading_support": "cpp",
+        "__verbose_abort": "cpp",
+        "queue": "cpp"
    },
    "files.exclude": {
        "**/*.rpyc": true,
--- a/include/precomp.h
+++ b/include/precomp.h
@@ -16,6 +16,7 @@
 #include "boost/process.hpp"

 extern "C" {
+#include "libavcodec/avcodec.h"
 #include "libavformat/avformat.h"
 #include "libavutil/avutil.h"
 #include "libswresample/swresample.h"
--- a/tests/basic_yt_dlp_download.cc
+++ b/tests/basic_yt_dlp_download.cc
@@ -6,7 +6,7 @@ int main() {
  boost::asio::io_context ctx;
  boost::system::error_code ec;

-  utils::CheckUpdate(ctx);
+  // utils::CheckUpdate(ctx);

  char buf[8192];
 #ifdef WIN32
--- a/tests/ffmpeg_any_to_opus.cc
+++ b/tests/ffmpeg_any_to_opus.cc
@@ -1,193 +1,232 @@
 #include "precomp.h"
-/* check that a given sample format is supported by the encoder */
-static int check_sample_fmt(const AVCodec *codec,
-                            enum AVSampleFormat sample_fmt) {
-  const enum AVSampleFormat *p = codec->sample_fmts;

-  while (*p != AV_SAMPLE_FMT_NONE) {
-    if (*p == sample_fmt) return 1;
-    p++;
+#define OPUS_FRAME_SIZE 960  // 20ms @ 48kHz
+
+int main() {
+  const char* input_filename = "golden.webm";
+  const char* output_filename = "output.opus";
+
+  AVFormatContext* fmt_ctx = NULL;
+  AVCodecContext* dec_ctx = NULL;
+  AVCodecContext* enc_ctx = NULL;
+  const AVCodec* decoder = NULL;
+  const AVCodec* encoder = NULL;
+  AVPacket* packet = NULL;
+  AVFrame* frame = NULL;
+  AVFrame* enc_frame = NULL;
+  SwrContext* swr_ctx = NULL;
+  FILE* outfile = NULL;
+
+  av_log_set_level(AV_LOG_ERROR);
+
+  if (avformat_open_input(&fmt_ctx, input_filename, NULL, NULL) < 0) {
+    fprintf(stderr, "Could not open input file\n");
+    return -1;
  }
-  return 0;
+  if (avformat_find_stream_info(fmt_ctx, NULL) < 0) {
+    fprintf(stderr, "Could not find stream info\n");
+    return -1;
  }

-/* just pick the highest supported samplerate */
-static int select_sample_rate(const AVCodec *codec) {
-  const int *p;
-  int best_samplerate = 0;
-
-  if (!codec->supported_samplerates) return 44100;
-
-  p = codec->supported_samplerates;
-  while (*p) {
-    if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate))
-      best_samplerate = *p;
-    p++;
-  }
-  return best_samplerate;
-}
-
-/* select layout with the highest channel count */
-static int select_channel_layout(const AVCodec *codec, AVChannelLayout *dst) {
-  const AVChannelLayout *p, *best_ch_layout;
-  int best_nb_channels = 0;
-
-  if (!codec->ch_layouts)
-    return av_channel_layout_copy(dst,
-                                  &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO);
-
-  p = codec->ch_layouts;
-  while (p->nb_channels) {
-    int nb_channels = p->nb_channels;
-
-    if (nb_channels > best_nb_channels) {
-      best_ch_layout = p;
-      best_nb_channels = nb_channels;
-    }
-    p++;
-  }
-  return av_channel_layout_copy(dst, best_ch_layout);
-}
-
-static void encode(AVCodecContext *ctx, AVFrame *frame, AVPacket *pkt,
-                   FILE *output) {
-  int ret;
-
-  /* send the frame for encoding */
-  ret = avcodec_send_frame(ctx, frame);
-  if (ret < 0) {
-    fprintf(stderr, "Error sending the frame to the encoder\n");
-    exit(1);
-  }
-
-  /* read all the available output packets (in general there may be any
-   * number of them */
-  while (ret >= 0) {
-    ret = avcodec_receive_packet(ctx, pkt);
-    if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
-      return;
-    else if (ret < 0) {
-      fprintf(stderr, "Error encoding audio frame\n");
-      exit(1);
-    }
-
-    fwrite(pkt->data, 1, pkt->size, output);
-    av_packet_unref(pkt);
+  int stream_index = -1;
+  for (unsigned i = 0; i < fmt_ctx->nb_streams; i++) {
+    if (fmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+      stream_index = i;
+      break;
    }
  }
-
-int main(int argc, char **argv) {
-  const char *filename;
-  const AVCodec *codec;
-  AVCodecContext *c = NULL;
-  AVFrame *frame;
-  AVPacket *pkt;
-  int i, j, k, ret;
-  FILE *f;
-  uint16_t *samples;
-  float t, tincr;
-
-  if (argc <= 1) {
-    fprintf(stderr, "Usage: %s <output file>\n", argv[0]);
-    return 0;
-  }
-  filename = argv[1];
-
-  /* find the MP2 encoder */
-  codec = avcodec_find_encoder(AV_CODEC_ID_MP2);
-  if (!codec) {
-    fprintf(stderr, "Codec not found\n");
-    exit(1);
+  if (stream_index == -1) {
+    fprintf(stderr, "No audio stream found\n");
+    return -1;
  }

-  c = avcodec_alloc_context3(codec);
-  if (!c) {
-    fprintf(stderr, "Could not allocate audio codec context\n");
-    exit(1);
+  decoder =
+      avcodec_find_decoder(fmt_ctx->streams[stream_index]->codecpar->codec_id);
+  if (!decoder) {
+    fprintf(stderr, "Decoder not found\n");
+    return -1;
  }
+  dec_ctx = avcodec_alloc_context3(decoder);
+  avcodec_parameters_to_context(dec_ctx,
+                                fmt_ctx->streams[stream_index]->codecpar);
+  avcodec_open2(dec_ctx, decoder, NULL);

-  /* put sample parameters */
-  c->bit_rate = 64000;
-
-  /* check that the encoder supports s16 pcm input */
-  c->sample_fmt = AV_SAMPLE_FMT_S16;
-  if (!check_sample_fmt(codec, c->sample_fmt)) {
-    fprintf(stderr, "Encoder does not support sample format %s",
-            av_get_sample_fmt_name(c->sample_fmt));
-    exit(1);
+  encoder = avcodec_find_encoder(AV_CODEC_ID_OPUS);
+  if (!encoder) {
+    fprintf(stderr, "Opus encoder not found\n");
+    return -1;
  }
+  enc_ctx = avcodec_alloc_context3(encoder);

-  /* select other audio parameters supported by the encoder */
-  c->sample_rate = select_sample_rate(codec);
-  ret = select_channel_layout(codec, &c->ch_layout);
-  if (ret < 0) exit(1);
+  AVChannelLayout enc_layout;
+  av_channel_layout_default(&enc_layout, 2);  // 스테레오
+  av_channel_layout_copy(&enc_ctx->ch_layout, &enc_layout);

-  /* open it */
-  if (avcodec_open2(c, codec, NULL) < 0) {
-    fprintf(stderr, "Could not open codec\n");
-    exit(1);
+  enc_ctx->sample_rate = 48000;
+  enc_ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+  enc_ctx->bit_rate = 128000;
+
+  avcodec_open2(enc_ctx, encoder, NULL);
+
+  swr_ctx = NULL;
+  if (swr_alloc_set_opts2(&swr_ctx, &enc_ctx->ch_layout, enc_ctx->sample_fmt,
+                          enc_ctx->sample_rate, &dec_ctx->ch_layout,
+                          dec_ctx->sample_fmt, dec_ctx->sample_rate, 0,
+                          NULL) < 0) {
+    fprintf(stderr, "Failed to allocate SwrContext\n");
+    return -1;
  }
+  swr_init(swr_ctx);

-  f = fopen(filename, "wb");
-  if (!f) {
-    fprintf(stderr, "Could not open %s\n", filename);
-    exit(1);
-  }
-
-  /* packet for holding encoded output */
-  pkt = av_packet_alloc();
-  if (!pkt) {
-    fprintf(stderr, "could not allocate the packet\n");
-    exit(1);
-  }
-
-  /* frame containing input raw audio */
+  packet = av_packet_alloc();
  frame = av_frame_alloc();
-  if (!frame) {
-    fprintf(stderr, "Could not allocate audio frame\n");
-    exit(1);
+  enc_frame = av_frame_alloc();
+
+  outfile = fopen(output_filename, "wb");
+  if (!outfile) {
+    fprintf(stderr, "Could not open output file\n");
+    return -1;
  }

-  frame->nb_samples = c->frame_size;
-  frame->format = c->sample_fmt;
-  ret = av_channel_layout_copy(&frame->ch_layout, &c->ch_layout);
-  if (ret < 0) exit(1);
+  // 임시 PCM 버퍼 (float, 스테레오)
+  float* pcm_buffer = (float*)malloc(sizeof(float) * 2 * OPUS_FRAME_SIZE *
+                                     4);  // 충분히 큰 버퍼
+  int buffered_samples = 0;

-  /* allocate the data buffers */
-  ret = av_frame_get_buffer(frame, 0);
-  if (ret < 0) {
-    fprintf(stderr, "Could not allocate audio data buffers\n");
-    exit(1);
+  while (av_read_frame(fmt_ctx, packet) >= 0) {
+    if (packet->stream_index != stream_index) {
+      av_packet_unref(packet);
+      continue;
    }

-  /* encode a single tone sound */
-  t = 0;
-  tincr = 2 * M_PI * 440.0 / c->sample_rate;
-  for (i = 0; i < 200; i++) {
-    /* make sure the frame is writable -- makes a copy if the encoder
-     * kept a reference internally */
-    ret = av_frame_make_writable(frame);
-    if (ret < 0) exit(1);
-    samples = (uint16_t *)frame->data[0];
+    avcodec_send_packet(dec_ctx, packet);
+    while (avcodec_receive_frame(dec_ctx, frame) == 0) {
+      int max_out = av_rescale_rnd(
+          swr_get_delay(swr_ctx, dec_ctx->sample_rate) + frame->nb_samples,
+          enc_ctx->sample_rate, dec_ctx->sample_rate, AV_ROUND_UP);

-    for (j = 0; j < c->frame_size; j++) {
-      samples[2 * j] = (int)(sin(t) * 10000);
+      uint8_t** out_data = NULL;
+      int out_linesize = 0;
+      av_samples_alloc_array_and_samples(&out_data, &out_linesize, 2, max_out,
+                                         enc_ctx->sample_fmt, 0);

-      for (k = 1; k < c->ch_layout.nb_channels; k++)
-        samples[2 * j + k] = samples[2 * j];
-      t += tincr;
+      int converted =
+          swr_convert(swr_ctx, out_data, max_out, (const uint8_t**)frame->data,
+                      frame->nb_samples);
+
+      // float PCM으로 임시 버퍼에 추가
+      memcpy(pcm_buffer + buffered_samples * 2, out_data[0],
+             converted * 2 * sizeof(float));
+      buffered_samples += converted;
+
+      av_freep(&out_data[0]);
+      free(out_data);
+
+      // OPUS_FRAME_SIZE 단위로 인코딩
+      while (buffered_samples >= OPUS_FRAME_SIZE) {
+        enc_frame->nb_samples = OPUS_FRAME_SIZE;
+        enc_frame->format = enc_ctx->sample_fmt;
+        enc_frame->sample_rate = enc_ctx->sample_rate;
+        av_channel_layout_copy(&enc_frame->ch_layout, &enc_ctx->ch_layout);
+        enc_frame->data[0] = (uint8_t*)pcm_buffer;
+
+        AVPacket* out_pkt = av_packet_alloc();
+        avcodec_send_frame(enc_ctx, enc_frame);
+        while (avcodec_receive_packet(enc_ctx, out_pkt) == 0) {
+          fwrite(out_pkt->data, 1, out_pkt->size, outfile);
+          av_packet_unref(out_pkt);
        }
-    encode(c, frame, pkt, f);
+        av_packet_free(&out_pkt);
+
+        // 버퍼 이동
+        memmove(pcm_buffer, pcm_buffer + OPUS_FRAME_SIZE * 2,
+                (buffered_samples - OPUS_FRAME_SIZE) * 2 * sizeof(float));
+        buffered_samples -= OPUS_FRAME_SIZE;
+      }
+    }
+    av_packet_unref(packet);
  }

-  /* flush the encoder */
-  encode(c, NULL, pkt, f);
+  // 디코더 플러시
+  avcodec_send_packet(dec_ctx, NULL);
+  while (avcodec_receive_frame(dec_ctx, frame) == 0) {
+    int max_out = av_rescale_rnd(
+        swr_get_delay(swr_ctx, dec_ctx->sample_rate) + frame->nb_samples,
+        enc_ctx->sample_rate, dec_ctx->sample_rate, AV_ROUND_UP);

-  fclose(f);
+    uint8_t** out_data = NULL;
+    int out_linesize = 0;
+    av_samples_alloc_array_and_samples(&out_data, &out_linesize, 2, max_out,
+                                       enc_ctx->sample_fmt, 0);

+    int converted =
+        swr_convert(swr_ctx, out_data, max_out, (const uint8_t**)frame->data,
+                    frame->nb_samples);
+
+    memcpy(pcm_buffer + buffered_samples * 2, out_data[0],
+           converted * 2 * sizeof(float));
+    buffered_samples += converted;
+
+    av_freep(&out_data[0]);
+    free(out_data);
+
+    while (buffered_samples >= OPUS_FRAME_SIZE) {
+      enc_frame->nb_samples = OPUS_FRAME_SIZE;
+      enc_frame->format = enc_ctx->sample_fmt;
+      enc_frame->sample_rate = enc_ctx->sample_rate;
+      av_channel_layout_copy(&enc_frame->ch_layout, &enc_ctx->ch_layout);
+      enc_frame->data[0] = (uint8_t*)pcm_buffer;
+
+      AVPacket* out_pkt = av_packet_alloc();
+      avcodec_send_frame(enc_ctx, enc_frame);
+      while (avcodec_receive_packet(enc_ctx, out_pkt) == 0) {
+        fwrite(out_pkt->data, 1, out_pkt->size, outfile);
+        av_packet_unref(out_pkt);
+      }
+      av_packet_free(&out_pkt);
+
+      memmove(pcm_buffer, pcm_buffer + OPUS_FRAME_SIZE * 2,
+              (buffered_samples - OPUS_FRAME_SIZE) * 2 * sizeof(float));
+      buffered_samples -= OPUS_FRAME_SIZE;
+    }
+  }
+
+  // 마지막 남은 샘플 인코딩
+  if (buffered_samples > 0) {
+    enc_frame->nb_samples = buffered_samples;
+    enc_frame->format = enc_ctx->sample_fmt;
+    enc_frame->sample_rate = enc_ctx->sample_rate;
+    av_channel_layout_copy(&enc_frame->ch_layout, &enc_ctx->ch_layout);
+    enc_frame->data[0] = (uint8_t*)pcm_buffer;
+
+    AVPacket* out_pkt = av_packet_alloc();
+    avcodec_send_frame(enc_ctx, enc_frame);
+    while (avcodec_receive_packet(enc_ctx, out_pkt) == 0) {
+      fwrite(out_pkt->data, 1, out_pkt->size, outfile);
+      av_packet_unref(out_pkt);
+    }
+    av_packet_free(&out_pkt);
+  }
+
+  // 인코더 플러시
+  avcodec_send_frame(enc_ctx, NULL);
+  AVPacket* out_pkt = av_packet_alloc();
+  while (avcodec_receive_packet(enc_ctx, out_pkt) == 0) {
+    fwrite(out_pkt->data, 1, out_pkt->size, outfile);
+    av_packet_unref(out_pkt);
+  }
+  av_packet_free(&out_pkt);
+
+  fclose(outfile);
+  free(pcm_buffer);
+  swr_free(&swr_ctx);
  av_frame_free(&frame);
-  av_packet_free(&pkt);
-  avcodec_free_context(&c);
+  av_frame_free(&enc_frame);
+  av_packet_free(&packet);
+  avcodec_free_context(&dec_ctx);
+  avcodec_free_context(&enc_ctx);
+  avformat_close_input(&fmt_ctx);

+  printf("Encoding finished: %s\n", output_filename);
  return 0;
 }
--- a/tests/ffmpeg_decode_audio.cc
+++ b/tests/ffmpeg_decode_audio.cc
@@ -0,0 +1,155 @@
+#include <fstream>
+
+#include "ffmpeg/libavcodec.h"
+#include "precomp.h"
+
+int main() {
+  const char* input_filename = "golden.webm";
+  const char* output_filename = "output.pcm";
+
+  AVFormatContext* fmt_ctx = NULL;
+  AVCodecContext* codec_ctx = NULL;
+  const AVCodec* codec = NULL;
+  AVPacket* packet = NULL;
+  AVFrame* frame = NULL;
+  SwrContext* swr_ctx = NULL;
+  FILE* outfile = NULL;
+
+  if (avformat_open_input(&fmt_ctx, input_filename, NULL, NULL) < 0) {
+    fprintf(stderr, "Could not open input file\n");
+    return -1;
+  }
+
+  if (avformat_find_stream_info(fmt_ctx, NULL) < 0) {
+    fprintf(stderr, "Could not find stream info\n");
+    return -1;
+  }
+
+  int stream_index = -1;
+  for (unsigned i = 0; i < fmt_ctx->nb_streams; i++) {
+    if (fmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+      stream_index = i;
+      break;
+    }
+  }
+  if (stream_index == -1) {
+    fprintf(stderr, "Could not find audio stream\n");
+    return -1;
+  }
+
+  codec =
+      avcodec_find_decoder(fmt_ctx->streams[stream_index]->codecpar->codec_id);
+  if (!codec) {
+    fprintf(stderr, "Could not find decoder\n");
+    return -1;
+  }
+
+  codec_ctx = avcodec_alloc_context3(codec);
+  if (!codec_ctx) {
+    fprintf(stderr, "Could not allocate codec context\n");
+    return -1;
+  }
+
+  if (avcodec_parameters_to_context(
+          codec_ctx, fmt_ctx->streams[stream_index]->codecpar) < 0) {
+    fprintf(stderr, "Failed to copy codec parameters\n");
+    return -1;
+  }
+
+  if (avcodec_open2(codec_ctx, codec, NULL) < 0) {
+    fprintf(stderr, "Could not open codec\n");
+    return -1;
+  }
+
+  packet = av_packet_alloc();
+  frame = av_frame_alloc();
+  if (!packet || !frame) {
+    fprintf(stderr, "Could not allocate packet or frame\n");
+    return -1;
+  }
+
+  outfile = fopen(output_filename, "wb");
+  if (!outfile) {
+    fprintf(stderr, "Could not open output file\n");
+    return -1;
+  }
+
+  // AVChannelLayout 초기화
+  AVChannelLayout in_layout, out_layout;
+  if (av_channel_layout_copy(&in_layout, &codec_ctx->ch_layout) < 0) {
+    fprintf(stderr, "Failed to copy channel layout\n");
+    return -1;
+  }
+  av_channel_layout_default(&out_layout, 2);  // 스테레오
+
+  swr_ctx = NULL;  // 먼저 NULL로 선언
+  if (swr_alloc_set_opts2(&swr_ctx, &out_layout, AV_SAMPLE_FMT_S16, 48000,
+                          &in_layout, codec_ctx->sample_fmt,
+                          codec_ctx->sample_rate, 0, NULL) < 0) {
+    fprintf(stderr, "Failed to allocate and set SwrContext\n");
+    return -1;
+  }
+
+  if (swr_init(swr_ctx) < 0) {
+    fprintf(stderr, "Failed to initialize SwrContext\n");
+    return -1;
+  }
+
+  while (av_read_frame(fmt_ctx, packet) >= 0) {
+    if (packet->stream_index == stream_index) {
+      if (avcodec_send_packet(codec_ctx, packet) == 0) {
+        while (avcodec_receive_frame(codec_ctx, frame) == 0) {
+          int out_samples =
+              av_rescale_rnd(swr_get_delay(swr_ctx, codec_ctx->sample_rate) +
+                                 frame->nb_samples,
+                             48000, codec_ctx->sample_rate, AV_ROUND_UP);
+
+          uint8_t** out_buf = NULL;
+          int out_linesize = 0;
+          av_samples_alloc_array_and_samples(&out_buf, &out_linesize, 2,
+                                             out_samples, AV_SAMPLE_FMT_S16, 0);
+
+          int converted_samples =
+              swr_convert(swr_ctx, out_buf, out_samples,
+                          (const uint8_t**)frame->data, frame->nb_samples);
+
+          fwrite(out_buf[0], 1, converted_samples * 2 * 2, outfile);
+          av_freep(&out_buf[0]);
+          free(out_buf);
+        }
+      }
+    }
+    av_packet_unref(packet);
+  }
+
+  // 디코더 플러시
+  avcodec_send_packet(codec_ctx, NULL);
+  while (avcodec_receive_frame(codec_ctx, frame) == 0) {
+    int out_samples = av_rescale_rnd(
+        swr_get_delay(swr_ctx, codec_ctx->sample_rate) + frame->nb_samples,
+        48000, codec_ctx->sample_rate, AV_ROUND_UP);
+
+    uint8_t** out_buf = NULL;
+    int out_linesize = 0;
+    av_samples_alloc_array_and_samples(&out_buf, &out_linesize, 2, out_samples,
+                                       AV_SAMPLE_FMT_S16, 0);
+
+    int converted_samples =
+        swr_convert(swr_ctx, out_buf, out_samples, (const uint8_t**)frame->data,
+                    frame->nb_samples);
+
+    fwrite(out_buf[0], 1, converted_samples * 2 * 2, outfile);
+    av_freep(&out_buf[0]);
+    free(out_buf);
+  }
+
+  fclose(outfile);
+  swr_free(&swr_ctx);
+  av_frame_free(&frame);
+  av_packet_free(&packet);
+  avcodec_free_context(&codec_ctx);
+  avformat_close_input(&fmt_ctx);
+
+  printf("Decoding finished, output saved to %s\n", output_filename);
+  return 0;
+}