Files
BumbleCee/tests/ffmpeg_any_to_opus.cc

232 lines
7.3 KiB
C++

#include "precomp.h"
#define OPUS_FRAME_SIZE 960 // 20ms @ 48kHz
int main() {
const char* input_filename = "golden.webm";
const char* output_filename = "output.opus";
AVFormatContext* fmt_ctx = NULL;
AVCodecContext* dec_ctx = NULL;
AVCodecContext* enc_ctx = NULL;
const AVCodec* decoder = NULL;
const AVCodec* encoder = NULL;
AVPacket* packet = NULL;
AVFrame* frame = NULL;
AVFrame* enc_frame = NULL;
SwrContext* swr_ctx = NULL;
FILE* outfile = NULL;
av_log_set_level(AV_LOG_ERROR);
if (avformat_open_input(&fmt_ctx, input_filename, NULL, NULL) < 0) {
fprintf(stderr, "Could not open input file\n");
return -1;
}
if (avformat_find_stream_info(fmt_ctx, NULL) < 0) {
fprintf(stderr, "Could not find stream info\n");
return -1;
}
int stream_index = -1;
for (unsigned i = 0; i < fmt_ctx->nb_streams; i++) {
if (fmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
stream_index = i;
break;
}
}
if (stream_index == -1) {
fprintf(stderr, "No audio stream found\n");
return -1;
}
decoder =
avcodec_find_decoder(fmt_ctx->streams[stream_index]->codecpar->codec_id);
if (!decoder) {
fprintf(stderr, "Decoder not found\n");
return -1;
}
dec_ctx = avcodec_alloc_context3(decoder);
avcodec_parameters_to_context(dec_ctx,
fmt_ctx->streams[stream_index]->codecpar);
avcodec_open2(dec_ctx, decoder, NULL);
encoder = avcodec_find_encoder(AV_CODEC_ID_OPUS);
if (!encoder) {
fprintf(stderr, "Opus encoder not found\n");
return -1;
}
enc_ctx = avcodec_alloc_context3(encoder);
AVChannelLayout enc_layout;
av_channel_layout_default(&enc_layout, 2); // 스테레오
av_channel_layout_copy(&enc_ctx->ch_layout, &enc_layout);
enc_ctx->sample_rate = 48000;
enc_ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
enc_ctx->bit_rate = 128000;
avcodec_open2(enc_ctx, encoder, NULL);
swr_ctx = NULL;
if (swr_alloc_set_opts2(&swr_ctx, &enc_ctx->ch_layout, enc_ctx->sample_fmt,
enc_ctx->sample_rate, &dec_ctx->ch_layout,
dec_ctx->sample_fmt, dec_ctx->sample_rate, 0,
NULL) < 0) {
fprintf(stderr, "Failed to allocate SwrContext\n");
return -1;
}
swr_init(swr_ctx);
packet = av_packet_alloc();
frame = av_frame_alloc();
enc_frame = av_frame_alloc();
outfile = fopen(output_filename, "wb");
if (!outfile) {
fprintf(stderr, "Could not open output file\n");
return -1;
}
// 임시 PCM 버퍼 (float, 스테레오)
float* pcm_buffer = (float*)malloc(sizeof(float) * 2 * OPUS_FRAME_SIZE *
4); // 충분히 큰 버퍼
int buffered_samples = 0;
while (av_read_frame(fmt_ctx, packet) >= 0) {
if (packet->stream_index != stream_index) {
av_packet_unref(packet);
continue;
}
avcodec_send_packet(dec_ctx, packet);
while (avcodec_receive_frame(dec_ctx, frame) == 0) {
int max_out = av_rescale_rnd(
swr_get_delay(swr_ctx, dec_ctx->sample_rate) + frame->nb_samples,
enc_ctx->sample_rate, dec_ctx->sample_rate, AV_ROUND_UP);
uint8_t** out_data = NULL;
int out_linesize = 0;
av_samples_alloc_array_and_samples(&out_data, &out_linesize, 2, max_out,
enc_ctx->sample_fmt, 0);
int converted =
swr_convert(swr_ctx, out_data, max_out, (const uint8_t**)frame->data,
frame->nb_samples);
// float PCM으로 임시 버퍼에 추가
memcpy(pcm_buffer + buffered_samples * 2, out_data[0],
converted * 2 * sizeof(float));
buffered_samples += converted;
av_freep(&out_data[0]);
free(out_data);
// OPUS_FRAME_SIZE 단위로 인코딩
while (buffered_samples >= OPUS_FRAME_SIZE) {
enc_frame->nb_samples = OPUS_FRAME_SIZE;
enc_frame->format = enc_ctx->sample_fmt;
enc_frame->sample_rate = enc_ctx->sample_rate;
av_channel_layout_copy(&enc_frame->ch_layout, &enc_ctx->ch_layout);
enc_frame->data[0] = (uint8_t*)pcm_buffer;
AVPacket* out_pkt = av_packet_alloc();
avcodec_send_frame(enc_ctx, enc_frame);
while (avcodec_receive_packet(enc_ctx, out_pkt) == 0) {
fwrite(out_pkt->data, 1, out_pkt->size, outfile);
av_packet_unref(out_pkt);
}
av_packet_free(&out_pkt);
// 버퍼 이동
memmove(pcm_buffer, pcm_buffer + OPUS_FRAME_SIZE * 2,
(buffered_samples - OPUS_FRAME_SIZE) * 2 * sizeof(float));
buffered_samples -= OPUS_FRAME_SIZE;
}
}
av_packet_unref(packet);
}
// 디코더 플러시
avcodec_send_packet(dec_ctx, NULL);
while (avcodec_receive_frame(dec_ctx, frame) == 0) {
int max_out = av_rescale_rnd(
swr_get_delay(swr_ctx, dec_ctx->sample_rate) + frame->nb_samples,
enc_ctx->sample_rate, dec_ctx->sample_rate, AV_ROUND_UP);
uint8_t** out_data = NULL;
int out_linesize = 0;
av_samples_alloc_array_and_samples(&out_data, &out_linesize, 2, max_out,
enc_ctx->sample_fmt, 0);
int converted =
swr_convert(swr_ctx, out_data, max_out, (const uint8_t**)frame->data,
frame->nb_samples);
memcpy(pcm_buffer + buffered_samples * 2, out_data[0],
converted * 2 * sizeof(float));
buffered_samples += converted;
av_freep(&out_data[0]);
free(out_data);
while (buffered_samples >= OPUS_FRAME_SIZE) {
enc_frame->nb_samples = OPUS_FRAME_SIZE;
enc_frame->format = enc_ctx->sample_fmt;
enc_frame->sample_rate = enc_ctx->sample_rate;
av_channel_layout_copy(&enc_frame->ch_layout, &enc_ctx->ch_layout);
enc_frame->data[0] = (uint8_t*)pcm_buffer;
AVPacket* out_pkt = av_packet_alloc();
avcodec_send_frame(enc_ctx, enc_frame);
while (avcodec_receive_packet(enc_ctx, out_pkt) == 0) {
fwrite(out_pkt->data, 1, out_pkt->size, outfile);
av_packet_unref(out_pkt);
}
av_packet_free(&out_pkt);
memmove(pcm_buffer, pcm_buffer + OPUS_FRAME_SIZE * 2,
(buffered_samples - OPUS_FRAME_SIZE) * 2 * sizeof(float));
buffered_samples -= OPUS_FRAME_SIZE;
}
}
// 마지막 남은 샘플 인코딩
if (buffered_samples > 0) {
enc_frame->nb_samples = buffered_samples;
enc_frame->format = enc_ctx->sample_fmt;
enc_frame->sample_rate = enc_ctx->sample_rate;
av_channel_layout_copy(&enc_frame->ch_layout, &enc_ctx->ch_layout);
enc_frame->data[0] = (uint8_t*)pcm_buffer;
AVPacket* out_pkt = av_packet_alloc();
avcodec_send_frame(enc_ctx, enc_frame);
while (avcodec_receive_packet(enc_ctx, out_pkt) == 0) {
fwrite(out_pkt->data, 1, out_pkt->size, outfile);
av_packet_unref(out_pkt);
}
av_packet_free(&out_pkt);
}
// 인코더 플러시
avcodec_send_frame(enc_ctx, NULL);
AVPacket* out_pkt = av_packet_alloc();
while (avcodec_receive_packet(enc_ctx, out_pkt) == 0) {
fwrite(out_pkt->data, 1, out_pkt->size, outfile);
av_packet_unref(out_pkt);
}
av_packet_free(&out_pkt);
fclose(outfile);
free(pcm_buffer);
swr_free(&swr_ctx);
av_frame_free(&frame);
av_frame_free(&enc_frame);
av_packet_free(&packet);
avcodec_free_context(&dec_ctx);
avcodec_free_context(&enc_ctx);
avformat_close_input(&fmt_ctx);
printf("Encoding finished: %s\n", output_filename);
return 0;
}