这应该 99% 满足您的需求,它是使用 wasapi 的纯 c++ 示例播放器。
编译和链接:
- 需要符合 c++17(+) 的编译器
- 安装boost库,用于无锁队列
- 可能需要 MS c++ 编译器(使用 conio.h)
- 参考 avrt.lib 获取实时音频线程(使用 AvSetMmThreadPriority)
- 如果您需要,完整的 vs2019 项目
跑步:
- 您需要 5 个 44100 16 位立体声格式的 .wav 文件,称为 c4.wav 到 g4.wav。
- 见样品包
它能做什么:
- 控制台应用程序运行 getchar() 循环,c、d、e、f、g,触发 note-on,q 退出
- 由于它是一个控制台应用程序,因此没有注释消息。每次按键都会触发完整样本的回放。
- Note-down 用时间戳标记并发布到共享队列(这是无锁的提升,大小上限为 64)。
- 因此,您可以通过在 3 毫秒间隔内按下超过 64 个键(最小 wasapi 独占延迟)使其崩溃。
- 音频线程拾取这些消息,并将它们放入音频线程本地的“活动笔记”列表中。活动音符受最大复音数 (64) 限制。
- 因此,您也可以通过在 [length of the shortest sample] 秒内按下超过 64 个键来使其崩溃。
- 将每个活动音符混合到当前的 wasapi 缓冲区中,直到到达 .wav 样本的末尾。
这是代码:
#include <atomic>
#include <vector>
#include <cstdio>
#include <cstdint>
#include <cassert>
#include <fstream>
#include <cstring>
#include <iostream>
#include <filesystem>
#include <boost/lockfree/queue.hpp>
#include <conio.h>
#include <atlbase.h>
#include <Windows.h>
#include <avrt.h>
#include <mmdeviceapi.h>
#include <Audioclient.h>
// for wasapi event callback
static HANDLE event_handle;
// sample data
static const size_t sample_count = 5;
static int16_t* note_samples[sample_count];
static size_t note_frame_counts[sample_count];
static std::vector<char> note_samples_raw[sample_count];
static char const* note_files[sample_count] = {
"c4.wav", "d4.wav", "e4.wav", "f4.wav", "g4.wav"
};
// user input / audio thread communication
static std::atomic_bool stop_finished;
static std::atomic_bool stop_initiated;
// scale mix volume
static const double mix_scale_amp = 0.4;
// debug stuff
static int32_t prev_note_active_count = 0;
static int32_t prev_note_audible_count = 0;
// timing stuff
static const int64_t millis_per_second = 1000;
static const int64_t reftimes_per_milli = 10000;
// audio format = 44.1khz 16bit stereo
static const int32_t sample_size = 2;
static const int32_t channel_count = 2;
static const int32_t sample_rate = 44100;
static const int32_t frame_size = sample_size * channel_count;
// exclusive mode event driven must use 128-byte aligned buffers
static const int32_t alignment_requirement_bytes = 128;
// note down notification + timestamp
static const size_t note_queue_size = 64;
struct note_down_msg
{
int32_t note; // 0..4 = c..g
uint64_t time_stamp_qpc;
};
static boost::lockfree::queue<note_down_msg>
note_msg_queue(note_queue_size);
// current playing notes
static const size_t max_polyphony = 64;
struct active_note
{
// slot in use?
bool in_use;
// note + timestamp
note_down_msg msg;
// position relative to stream pos when it should start
uint64_t trigger_pos_frames;
// how many of it has played already
size_t frames_rendered;
active_note() = default;
};
static active_note
active_notes[max_polyphony];
// shared by user input / audio thread
struct audio_thread_data
{
IAudioClock* clock;
IAudioClient* client;
IAudioRenderClient* render;
};
// bail out on any error
#define CHECK_COM(expr) do { \
HRESULT hr = expr; \
if(SUCCEEDED(hr)) break; \
std::cout << #expr << ": " << hr << "\n"; \
std::terminate(); \
} while(0)
static WAVEFORMATEXTENSIBLE
make_audio_format()
{
// translate format specification to WAVEFORMATEXTENSIBLE
WAVEFORMATEXTENSIBLE result = { 0 };
result.dwChannelMask = 0;
result.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
result.Samples.wValidBitsPerSample = sample_size * 8;
result.Format.nChannels = channel_count;
result.Format.nSamplesPerSec = sample_rate;
result.Format.wBitsPerSample = sample_size * 8;
result.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
result.Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE);
result.Format.nBlockAlign = channel_count * sample_size;
result.Format.nAvgBytesPerSec = channel_count * sample_size * sample_rate;
return result;
}
static void
load_note_samples()
{
for(size_t i = 0; i < sample_count; i++)
{
// load piano samples to bytes
auto path = std::filesystem::current_path() / note_files[i];
std::ifstream input(path, std::ios::binary);
assert(input);
input.seekg(0, input.end);
size_t length = input.tellg();
input.seekg(0, input.beg);
note_samples_raw[i].reserve(length);
input.read(note_samples_raw[i].data(), length);
assert(input);
input.close();
// compute frame count and set actual audio data
// 44 bytes skipped for .WAV file header
note_frame_counts[i] = (length - 44) / (sample_size * channel_count);
note_samples[i] = reinterpret_cast<int16_t*>(note_samples_raw[i].data() + 44);
}
}
// this runs audio processing
static DWORD WINAPI
run_audio_thread(void* param)
{
int16_t* audio;
BYTE* audio_mem;
bool slot_found;
UINT32 buffer_frames;
HANDLE task;
BOOL success;
DWORD wait_result;
DWORD task_index = 0;
UINT64 clock_pos;
UINT64 clock_freq;
UINT64 clock_qpc_pos;
LARGE_INTEGER qpc_freq;
audio_thread_data* data = static_cast<audio_thread_data*>(param);
// init thread
CHECK_COM(CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED));
task = AvSetMmThreadCharacteristicsW(TEXT("Pro Audio"), &task_index);
assert(task != nullptr);
// wasapi buffer frame count & clock info
CHECK_COM(data->client->GetBufferSize(&buffer_frames));
CHECK_COM(data->clock->GetFrequency(&clock_freq));
success = QueryPerformanceFrequency(&qpc_freq);
assert(success);
// audio loop
data->client->Start();
while(!stop_initiated.load())
{
wait_result = WaitForSingleObject(event_handle, INFINITE);
assert(wait_result == WAIT_OBJECT_0);
// retrieve and clear buffer for this round
CHECK_COM(data->render->GetBuffer(buffer_frames, &audio_mem));
audio = reinterpret_cast<int16_t*>(audio_mem);
memset(audio, 0, buffer_frames * static_cast<uint64_t>(frame_size));
// get timing stuff
CHECK_COM(data->clock->GetPosition(&clock_pos, &clock_qpc_pos));
uint64_t stream_offset_hns = clock_pos * reftimes_per_milli * millis_per_second / clock_freq;
uint64_t stream_offset_frames = stream_offset_hns * sample_rate / (reftimes_per_milli * millis_per_second);
// process each frame
for(size_t f = 0; f < buffer_frames; f++)
{
// pop user input, find empty slot in active notes buffer
// for better performance this can also be outside the frame
// loop, at start of each buffer round, in that case add 1 additional buffer latency
note_down_msg msg;
while(note_msg_queue.pop(msg))
{
slot_found = false;
for(size_t i = 0; i < max_polyphony; i++)
if(!active_notes[i].in_use)
{
slot_found = true;
active_notes[i].msg = msg;
active_notes[i].in_use = true;
active_notes[i].frames_rendered = 0;
int64_t clock_note_diff_qpc = clock_qpc_pos - static_cast<int64_t>(active_notes[i].msg.time_stamp_qpc);
int64_t clock_note_diff_hns = clock_note_diff_qpc * reftimes_per_milli * millis_per_second / qpc_freq.QuadPart;
int64_t clock_note_diff_frames = clock_note_diff_hns * sample_rate / (reftimes_per_milli * millis_per_second);
int64_t note_clock_diff_frames = -static_cast<int64_t>(clock_note_diff_frames);
// allow 1 buffer latency otherwise notes would have to start in the past
active_notes[i].trigger_pos_frames = stream_offset_frames + note_clock_diff_frames + buffer_frames;
assert(active_notes[i].trigger_pos_frames <= stream_offset_frames + buffer_frames * 3);
assert(active_notes[i].trigger_pos_frames >= stream_offset_frames + f);
break;
}
if(!slot_found)
assert(!"Max polyphony reached.");
}
// debugging stuff
int32_t note_active_count = 0;
int32_t note_audible_count = 0;
// compose frame from all samples active up to max_polyphony
double current_samples[channel_count] = { 0 };
for(size_t i = 0; i < max_polyphony; i++)
{
// slot not in use
if(!active_notes[i].in_use) continue;
note_active_count++;
// not my turn yet
// note this very briefly wastes a slot for a sample which starts halfway in the current buffer
if(active_notes[i].trigger_pos_frames > stream_offset_frames + f) continue;
if(active_notes[i].frames_rendered == note_frame_counts[active_notes[i].msg.note])
{
// reached sample end
active_notes[i].in_use = false;
active_notes[i].frames_rendered = 0;
continue;
}
// note is active + audible
note_audible_count++;
size_t frame_index = active_notes[i].frames_rendered++;
for(size_t c = 0; c < channel_count; c++)
{
assert(active_notes[i].msg.note < sample_count);
assert(frame_index < note_frame_counts[active_notes[i].msg.note]);
current_samples[c] += static_cast<double>(note_samples[active_notes[i].msg.note][frame_index * channel_count + c] * mix_scale_amp) / SHRT_MAX;
}
}
// normally never do io on the audio thread, just debugging
if(prev_note_active_count != note_active_count || prev_note_audible_count != note_audible_count)
;//std::cout << "\nactive: " << note_active_count << " audible: " << note_audible_count << "\n";
prev_note_active_count = note_active_count;
prev_note_audible_count = note_audible_count;
// convert to int16 and write to wasapi
for(size_t c = 0; c < channel_count; c++)
audio[f * channel_count + c] = static_cast<int16_t>(current_samples[c] * SHRT_MAX);
}
CHECK_COM(data->render->ReleaseBuffer(buffer_frames, 0));
}
data->client->Stop();
// cleanup
success = AvRevertMmThreadCharacteristics(task);
assert(success);
CoUninitialize();
stop_finished.store(true);
return 0;
}
// this runs user input
static void
run_user_input_thread()
{
int32_t chr;
int32_t note;
BOOL success;
UINT32 buffer_frames;
REFERENCE_TIME engine;
REFERENCE_TIME period;
LARGE_INTEGER qpc_count;
CComPtr<IMMDevice> device;
CComPtr<IAudioClock> clock;
CComPtr<IAudioClient> client;
CComPtr<IAudioRenderClient> render;
CComPtr<IMMDeviceEnumerator> enumerator;
WAVEFORMATEXTENSIBLE format = make_audio_format();
// get default render endpoint
CHECK_COM(CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_ALL,
__uuidof(IMMDeviceEnumerator), reinterpret_cast<void**>(&enumerator)));
CHECK_COM(enumerator->GetDefaultAudioEndpoint(eRender, eMultimedia, &device));
CHECK_COM(device->Activate(__uuidof(IAudioClient), CLSCTX_ALL,
nullptr, reinterpret_cast<void**>(&client)));
// open exclusive mode event driven stream
CHECK_COM(client->GetDevicePeriod(&engine, &period));
buffer_frames = static_cast<uint32_t>(period / reftimes_per_milli * sample_rate / millis_per_second);
while((buffer_frames * frame_size) % alignment_requirement_bytes != 0) buffer_frames++;
period = buffer_frames * millis_per_second * reftimes_per_milli / sample_rate;
CHECK_COM(client->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE, AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
period, period, reinterpret_cast<WAVEFORMATEX*>(&format), nullptr));
event_handle = CreateEvent(nullptr, FALSE, FALSE, nullptr);
assert(event_handle != nullptr);
CHECK_COM(client->SetEventHandle(event_handle));
CHECK_COM(client->GetService(__uuidof(IAudioClock), reinterpret_cast<void**>(&clock)));
CHECK_COM(client->GetService(__uuidof(IAudioRenderClient), reinterpret_cast<void**>(&render)));
// start audio thread
audio_thread_data data = { 0 };
data.clock = clock;
data.client = client;
data.render = render;
CreateThread(nullptr, 0, run_audio_thread, &data, 0, nullptr);
// process user input
// cdefg = notes, q = quit
while((chr = _getch()) != 'q')
{
if(chr == 'c') note = 0;
else if(chr == 'd') note = 1;
else if(chr == 'e') note = 2;
else if(chr == 'f') note = 3;
else if(chr == 'g') note = 4;
else continue;
success = QueryPerformanceCounter(&qpc_count);
note_down_msg msg;
msg.note = note;
msg.time_stamp_qpc = qpc_count.QuadPart;
assert(success);
note_msg_queue.push(msg);
_putch(chr);
}
// cleanup
stop_initiated.store(true);
while(!stop_finished.load());
success = CloseHandle(event_handle);
assert(success);
}
int
main(int argc, char** argv)
{
// wraps COM init/cleanup
CHECK_COM(CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED));
load_note_samples();
run_user_input_thread();
CoUninitialize();
return 0;
}