赞
踩
音频重采样的基本流程为:
- 申请重采样器上下文
- 设置重采样去上下文的参数
- 初始化重采样器
- 申请数据存放的缓冲区空间
- 进行重采样
注意,要先设置参数再对重采样器初始化
SwrContext
重采样器上下文的结构体。此结构是不透明的,这意味着,如果要设置选项,诸如av_opt_set等函数来设置。
struct SwrContext *swr_alloc();
,申请重采样器上下文。
int av_opt_set(void *obj, const char *name, const char *val, int search_flags);
int av_opt_set_int(void *obj, const char *name, int64_t val, int search_flags);
int av_opt_set_chlayout(void *obj, const char *name, const AVChannelLayout *layout, int search_flags);
av_opt_set* 函数簇,这里仅列举几个。以av_opt_set为例,用于将给定name的obj字段设置为指定的val。第一个void* 的obj参数表示要设置的对象,第二个name参数表示要设置的字段名称,以字符串形式传入。例如obj为SwrContext* 对象,name为"in_sample_rate"就对应着SwrContext中的同名字段。中间的部分就为要设置的参数,最后的search_flags表示搜索搜索标志,一般设为0即可。
int swr_alloc_set_opts2(struct SwrContext **ps, const AVChannelLayout *out_ch_layout, enum AVSampleFormat out_sample_fmt, int out_sample_rate, const AVChannelLayout *in_ch_layout, enum AVSampleFormat in_sample_fmt, int in_sample_rate, int log_offset, void *log_ctx);
如果还未分配则分配SwrContext,并设置/重置公共参数。就相当于alloc + set。
int swr_init(struct SwrContext *s);
重采样去初始化。必须在设置过SwrContext 参数之后初始化。
int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd)
和int64_t av_rescale(int64_t a, int64_t b, int64_t c)
都是用于计算的(a*b/c),唯一的区别在于rnd可以设置向上取整向下取整等。
int av_samples_alloc_array_and_samples(uint8_t ***audio_data, int *linesize, int nb_channels, int nb_samples, enum AVSampleFormat sample_fmt, int align);
申请一个 data[nb_channels][ch_data] 的二维数组,所以audio_data要作为一个三级指针传进去。
void av_freep(void *ptr);
释放av_samples_alloc_array_and_samples申请的data。av_freep即使传入null也是安全的。用法示例:
uint8_t *buf = av_malloc(16);
av_freep(&buf);
int64_t swr_get_delay(struct SwrContext *s, int64_t base);
获取下一个输入样本相对于下一个输出样本所经历的延迟帧数。
int swr_convert(struct SwrContext *s, uint8_t * const *out, int out_count, const uint8_t * const *in , int in_count);
音频重采样,in和out是由av_samples_alloc_array_and_samples生成的data缓冲区。in_count和out_count则是对应的缓冲区大小的样本数。
int av_samples_get_buffer_size(int *linesize, int nb_channels, int nb_samples, enum AVSampleFormat sample_fmt, int align);
获取给定音频参数所需的缓冲区大小。
重采样样例,参考:Examples - resample_audio.c
#include <iostream> #include <fstream> #include <string> #include <cmath> using namespace std; extern "C" { #include <libavutil/opt.h> #include <libavutil/channel_layout.h> #include <libavutil/samplefmt.h> #include <libswresample/swresample.h> } /* format转字符串 */ string string_sample_fmt(enum AVSampleFormat sample_fmt) { // 定义sample_fmt_entry结构体,同时定义了一个数组 struct sample_fmt_entry { enum AVSampleFormat sample_fmt; const char *fmt_be, *fmt_le; } sample_fmt_entries[] = { { AV_SAMPLE_FMT_U8, "u8", "u8" }, { AV_SAMPLE_FMT_S16, "s16be", "s16le" }, { AV_SAMPLE_FMT_S32, "s32be", "s32le" }, { AV_SAMPLE_FMT_FLT, "f32be", "f32le" }, { AV_SAMPLE_FMT_DBL, "f64be", "f64le" },}; // 返回字符串 const char* str_fmt = nullptr; int arr_len = FF_ARRAY_ELEMS(sample_fmt_entries); for (int i = 0; i < arr_len; i++) { auto entry = sample_fmt_entries[i]; if (sample_fmt == entry.sample_fmt) { return AV_NE(entry.fmt_be, entry.fmt_le); } } } /** * Fill dst buffer with nb_samples, generated starting from t. * 交错模式,函数摘自:https://ffmpeg.org/doxygen/7.0/resample_audio_8c-example.html * sin曲线,t表示当前所在的相位,周期为一帧所持续的时间 */ void fill_samples(double *dst, int nb_samples, int nb_channels, int sample_rate, double *t) { int i, j; double tincr = 1.0 / sample_rate, *dstp = dst; const double c = 2 * M_PI * 440.0; /* generate sin tone with 440Hz frequency and duplicated channels */ for (i = 0; i < nb_samples; i++) { *dstp = sin(c * *t); for (j = 1; j < nb_channels; j++) dstp[j] = dstp[0]; dstp += nb_channels; *t += tincr; } } int main() { /* 采样参数定义 */ // 输入参数 int src_sample_rate = 48000; enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_DBL; AVChannelLayout src_ch_layout = AV_CHANNEL_LAYOUT_STEREO; // 立体声 // 输出参数 int dst_sample_rate = 44100; enum AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_S16; AVChannelLayout dst_ch_layout = AV_CHANNEL_LAYOUT_STEREO; // 立体声 // 创建重采样器上下文(暂且认为不会失败) SwrContext *swr_ctx = swr_alloc(); /* 参数设置(SwrContext字段设置) */ // 输入参数 check_optset(av_opt_set_int(swr_ctx, "in_sample_rate", src_sample_rate, 0), __LINE__); check_optset(av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0), __LINE__); check_optset(av_opt_set_chlayout(swr_ctx, "in_chlayout", &src_ch_layout, 0), __LINE__); // 输出参数 check_optset(av_opt_set_int(swr_ctx, "out_sample_rate", dst_sample_rate, 0), __LINE__); check_optset(av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0), __LINE__); check_optset(av_opt_set_chlayout(swr_ctx, "out_chlayout", &dst_ch_layout, 0), __LINE__); // 参数设置完成后,初始化上下文 swr_init(swr_ctx); // 给输入源分配内存空间 uint8_t **src_data = nullptr; int src_linesize; int src_nb_samples = 1024; // 每个通道的样本数 av_samples_alloc_array_and_samples(&src_data, &src_linesize, src_ch_layout.nb_channels, src_nb_samples, src_sample_fmt, 0); // 给输出源分配内存空间 uint8_t **dst_data; int dst_linesize; // 计算输出的信道样本数:a * b / c,AV_ROUND_UP表示向上取整 int dst_nb_samples = av_rescale_rnd(src_nb_samples, dst_sample_rate, src_sample_rate, AV_ROUND_UP); // 分配空间 av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, dst_ch_layout.nb_channels, dst_nb_samples, dst_sample_fmt, 0); // 采样转换 double t = 0; // 时间,以输入源的时间为基准 int max_nb_samples = dst_nb_samples; string dst_file_name = "out.pcm"; ofstream dst_file(dst_file_name, ios_base::out | ios_base::binary); while(t < 10) { // 生成输入源(模拟) fill_samples((double*)src_data[0], src_nb_samples, src_ch_layout.nb_channels, src_sample_rate, &t); // 获取延迟(dst音频相对src音频延迟的帧数) int64_t delay = swr_get_delay(swr_ctx, src_sample_rate); // 输出的信道样本数,a * b / c dst_nb_samples = av_rescale(delay + src_nb_samples, dst_sample_rate, src_sample_rate); // 如果输出缓冲区大小不够,重新申请空间 if(dst_nb_samples > max_nb_samples) { // 重新申请空间 av_freep(&dst_data[0]); av_samples_alloc(dst_data, &dst_linesize, dst_ch_layout.nb_channels, dst_nb_samples, dst_sample_fmt, 1); max_nb_samples = dst_nb_samples; } // 音频重采样 int ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t **)src_data, src_nb_samples); // 获取给定音频参数所需的缓冲区大小。 int dst_buf_size = av_samples_get_buffer_size(&dst_linesize, dst_ch_layout.nb_channels, ret, dst_sample_fmt, 1); // write dst_file.write((char*)dst_data[0], dst_buf_size); } // clear and exit // TODO }
版本:ffmpeg-7.0
struct SwrContext { const AVClass *av_class; ///< AVClass used for AVOption and av_log() int log_level_offset; ///< logging level offset void *log_ctx; ///< parent logging context enum AVSampleFormat in_sample_fmt; ///< input sample format enum AVSampleFormat int_sample_fmt; ///< internal sample format (AV_SAMPLE_FMT_FLTP or AV_SAMPLE_FMT_S16P) enum AVSampleFormat out_sample_fmt; ///< output sample format AVChannelLayout used_ch_layout; ///< number of used input channels (mapped channel count if channel_map, otherwise in.ch_count) AVChannelLayout in_ch_layout; ///< input channel layout AVChannelLayout out_ch_layout; ///< output channel layout int in_sample_rate; ///< input sample rate int out_sample_rate; ///< output sample rate int flags; ///< miscellaneous flags such as SWR_FLAG_RESAMPLE float slev; ///< surround mixing level float clev; ///< center mixing level float lfe_mix_level; ///< LFE mixing level float rematrix_volume; ///< rematrixing volume coefficient float rematrix_maxval; ///< maximum value for rematrixing output int matrix_encoding; /**< matrixed stereo encoding */ const int *channel_map; ///< channel index (or -1 if muted channel) map int engine; AVChannelLayout user_used_chlayout; ///< User set used channel layout AVChannelLayout user_in_chlayout; ///< User set input channel layout AVChannelLayout user_out_chlayout; ///< User set output channel layout enum AVSampleFormat user_int_sample_fmt; ///< User set internal sample format int user_dither_method; ///< User set dither method struct DitherContext dither; int filter_size; /**< length of each FIR filter in the resampling filterbank relative to the cutoff frequency */ int phase_shift; /**< log2 of the number of entries in the resampling polyphase filterbank */ int linear_interp; /**< if 1 then the resampling FIR filter will be linearly interpolated */ int exact_rational; /**< if 1 then enable non power of 2 phase_count */ double cutoff; /**< resampling cutoff frequency (swr: 6dB point; soxr: 0dB point). 1.0 corresponds to half the output sample rate */ int filter_type; /**< swr resampling filter type */ double kaiser_beta; /**< swr beta value for Kaiser window (only applicable if filter_type == AV_FILTER_TYPE_KAISER) */ double precision; /**< soxr resampling precision (in bits) */ int cheby; /**< soxr: if 1 then passband rolloff will be none (Chebyshev) & irrational ratio approximation precision will be higher */ float min_compensation; ///< swr minimum below which no compensation will happen float min_hard_compensation; ///< swr minimum below which no silence inject / sample drop will happen float soft_compensation_duration; ///< swr duration over which soft compensation is applied float max_soft_compensation; ///< swr maximum soft compensation in seconds over soft_compensation_duration float async; ///< swr simple 1 parameter async, similar to ffmpegs -async int64_t firstpts_in_samples; ///< swr first pts in samples int resample_first; ///< 1 if resampling must come first, 0 if rematrixing int rematrix; ///< flag to indicate if rematrixing is needed (basically if input and output layouts mismatch) int rematrix_custom; ///< flag to indicate that a custom matrix has been defined AudioData in; ///< input audio data AudioData postin; ///< post-input audio data: used for rematrix/resample AudioData midbuf; ///< intermediate audio data (postin/preout) AudioData preout; ///< pre-output audio data: used for rematrix/resample AudioData out; ///< converted output audio data AudioData in_buffer; ///< cached audio data (convert and resample purpose) AudioData silence; ///< temporary with silence AudioData drop_temp; ///< temporary used to discard output int in_buffer_index; ///< cached buffer position int in_buffer_count; ///< cached buffer length int resample_in_constraint; ///< 1 if the input end was reach before the output end, 0 otherwise int flushed; ///< 1 if data is to be flushed and no further input is expected int64_t outpts; ///< output PTS int64_t firstpts; ///< first PTS int drop_output; ///< number of output samples to drop double delayed_samples_fixup; ///< soxr 0.1.1: needed to fixup delayed_samples after flush has been called. struct AudioConvert *in_convert; ///< input conversion context struct AudioConvert *out_convert; ///< output conversion context struct AudioConvert *full_convert; ///< full conversion context (single conversion for input and output) struct ResampleContext *resample; ///< resampling context struct Resampler const *resampler; ///< resampler virtual function table double matrix[SWR_CH_MAX][SWR_CH_MAX]; ///< floating point rematrixing coefficients float matrix_flt[SWR_CH_MAX][SWR_CH_MAX]; ///< single precision floating point rematrixing coefficients uint8_t *native_matrix; uint8_t *native_one; uint8_t *native_simd_one; uint8_t *native_simd_matrix; int32_t matrix32[SWR_CH_MAX][SWR_CH_MAX]; ///< 17.15 fixed point rematrixing coefficients uint8_t matrix_ch[SWR_CH_MAX][SWR_CH_MAX+1]; ///< Lists of input channels per output channel that have non zero rematrixing coefficients mix_1_1_func_type *mix_1_1_f; mix_1_1_func_type *mix_1_1_simd; mix_2_1_func_type *mix_2_1_f; mix_2_1_func_type *mix_2_1_simd; mix_any_func_type *mix_any_f; /* TODO: callbacks for ASM optimizations */ };
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。