Skip to content

Instantly share code, notes, and snippets.

@TwoSquirrels
Created July 23, 2025 19:35
Show Gist options
  • Select an option

  • Save TwoSquirrels/2a97d61e1a7c60d6ec6a1b6083614446 to your computer and use it in GitHub Desktop.

Select an option

Save TwoSquirrels/2a97d61e1a7c60d6ec6a1b6083614446 to your computer and use it in GitHub Desktop.
mic2key WIP
# include <Siv3D.hpp> // Siv3D v0.6.15
//# include <Windows.h>
constexpr bool isDebug = SIV3D_BUILD_PRIVATE_DEFINITION_DEBUG(); // Debug Build
// MFCC
struct MFCC
{
Array<double> feature;
[[nodiscard]] bool isUnset() const
{
return std::ranges::all_of(feature, [](const double x) { return x == 0.0; });
}
[[nodiscard]] double norm() const
{
return Math::Sqrt(std::accumulate(
feature.begin(),
feature.end(),
0.0,
[](const auto& norm, const auto& x) { return norm + x * x; }
));
}
[[nodiscard]] double cosineSimilarity(const MFCC& other) const
{
if (feature.size() != other.feature.size()) throw Error{ U"MFCC order mismatch" };
const double thisNorm = norm(), otherNorm = other.norm();
if (thisNorm < 1e-8 || otherNorm < 1e-8) return 0.0;
double innerProduct = 0.0;
for (size_t i : step(feature.size())) innerProduct += feature[i] * other.feature[i];
return innerProduct / thisNorm / otherNorm;
}
};
// MFCC analyzer
class MFCCAnalyzer
{
public:
static [[nodiscard]] double freqToMel(const double freq)
{
return 1127.01 * Math::Log(1.0 + freq / 700.0);
}
static [[nodiscard]] double melToFreq(const double mel)
{
return 700.0 * (Math::Exp(mel / 1127.01) - 1.0);
}
explicit MFCCAnalyzer(const FFTSampleLength frames = FFTSampleLength::SL2K, const size_t melChannels = 40, const size_t mfccOrder = 12)
: frames(frames), f(256uLL << FromEnum(frames), 0.0f), melChannels(melChannels), bin(melChannels + 2),
melSpectrum(melChannels), melEnvelope(melChannels), mfccOrder(mfccOrder) {}
[[nodiscard]] MFCC analyze(const Microphone& mic)
{
if (not mic.isLoop()) throw Error{ U"Microphone is must be loop mode." };
if (not mic.isRecording() || mic.getBufferLength() < f.size()) return MFCC{ Array<double>(mfccOrder, 0.0) };
// get data from mic
const auto sampleRate = mic.getSampleRate();
const auto& buffer = mic.getBuffer();
const size_t writePos = mic.posSample();
for (const size_t pos : step(f.size()))
{
const size_t idx = (pos + writePos < f.size() ? mic.getBufferLength() : 0) + pos + writePos - f.size();
f[pos] = buffer[idx].left; // NOTE: Use only one side!
}
// pre-emphasis
for (const size_t i : Range(f.size() - 1, 1, -1)) f[i] -= f[i - 1] * 0.96875f; // 31/32
// hamming window
for (const size_t i : Range(f.size() - 2, 1)) f[i] *= 0.54f - 0.46f * cos(2 * Math::Pi * i / (f.size() - 1));
f.front() = 0.0f;
f.back() = 0.0f;
// FFT
FFT::Analyze(fftResult, f.data(), f.size(), sampleRate, frames);
// apply mel filter bank
const auto melMax = freqToMel(sampleRate / 2.0);
const auto melMin = freqToMel(0);
const auto deltaMel = (melMax - melMin) / static_cast<double>(melChannels + 1);
for (const size_t i : step(bin.size()))
{
bin[i] = floor((f.size() + 1) * melToFreq(melMin + i * deltaMel) / sampleRate);
}
for (const size_t i : step(melChannels))
{
melSpectrum[i] = 0.0;
for (const size_t j : Range(bin[i], bin[i + 1] - 1))
{
melSpectrum[i] += 1.0 * fftResult.buffer[j] * (j - bin[i]) / (bin[i + 1] - bin[i]);
}
for (const size_t j : Range(bin[i + 1], bin[i + 2] - 1))
{
melSpectrum[i] += 1.0 * fftResult.buffer[j] * (bin[i + 2] - j) / (bin[i + 2] - bin[i + 1]);
}
melEnvelope[i] = { 2.0 * bin[i + 1] / f.size(), melSpectrum[i] / (bin[i + 2] - bin[i]) };
}
// DCT
MFCC mfcc{ Array<double>(mfccOrder, 0.0) };
for (const size_t i : Range(1, mfccOrder))
{
for (const size_t j : step(melChannels))
{
mfcc.feature[i - 1] += Math::Log10(Math::Abs(melSpectrum[j])) * Math::Cos(Math::Pi * i * (j + 0.5) / melChannels) * 10;
}
}
return mfcc;
}
[[nodiscard]] std::span<const float> getFFTResult() const { return fftResult.buffer; }
[[nodiscard]] std::span<const Vec2> getMelEnvelope() const { return melEnvelope; }
protected:
FFTSampleLength frames;
Array<float> f;
FFTResult fftResult;
size_t melChannels;
Array<size_t> bin;
Array<double> melSpectrum;
Array<Vec2> melEnvelope;
size_t mfccOrder;
};
MFCCAnalyzer mfccAnalyzer{};
// volume
[[nodiscard]] double rmsToVolume(const double rms)
{
if (rms <= 0.0) return 0.0;
return Clamp(1.0 + Math::Log10(rms) / 5.0, 0.0, 1.0);
}
[[nodiscard]] double volumeToRMS(const double volume)
{
return Clamp(Math::Pow(10.0, (volume - 1.0) * 5.0), 0.0, 1.0);
}
// palette
namespace Theme {
struct Palette { Color base, sub, accent, ok, bad; };
constexpr Palette lightPalette{
.base = Color{ U"#FEE" }, .sub = Color{ U"#322" }, .accent = Color{ U"#F53" },
.ok = Color{ U"#3A3" }, .bad = Color{ U"#FA3" }
};
constexpr Palette darkPalette{
.base = Color{ U"#322" }, .sub = Color{ U"#FEE" }, .accent = Color{ U"#F53" },
.ok = Color{ U"#3A3" }, .bad = Color{ U"#FA3" }
};
bool isDarkMode;
[[nodiscard]] Palette palette() { return isDarkMode ? darkPalette : lightPalette; }
[[nodiscard]] SimpleMenuBar::ColorPalette menuPalette()
{
return {
.menuBarColor = palette().sub,
.activeMenuColor = palette().accent,
.menuTextColor = palette().base,
.itemBoxColor = palette().sub,
.itemMouseoverColor = palette().accent,
.itemTextColor = palette().base,
.itemMouseoverTextColor = palette().base,
.itemDisabledTextColor = { palette().base, 0.5 },
};
}
}
// scrollable GUI
namespace MainGUI
{
struct Card { String id; double height; };
Array<Card> cards =
{
{ U"menu", 30 },
{ U"volume", 100 },
{ U"hoge", 200 },
{ U"fuga", 300 },
{ U"piyo", 200 },
{ U"foo", 400 },
{ U"bar", 200 },
{ U"buz", 300 },
};
double getHeight()
{
return cards.map([](const auto& card) { return card.height; }).sum();
}
double scrollY = 0.0;
auto scrolled(const String& uiId, const auto& callback)
{
double offset = -scrollY;
for (const auto& [id, height] : cards)
{
if (id == uiId) break;
offset += height;
}
const Transformer2D scrolling{ Mat3x2::Translate(0.0, offset), TransformCursor::Yes };
return callback();
}
}
// main
void Main()
{
LicenseManager::SetApplicationLicense(U"mic2key", { U"mic2key", U"MIT License", U"Copyright 2024 TwoSquirrels" });
Window::SetMinimumFrameBufferSize(Size{ 400, 300 });
Window::Resize(800, 600);
Window::SetStyle(WindowStyle::Sizable);
Window::SetToggleFullscreenEnabled(false);
Window::SetTitle(U"mic2key");
Font font{ FontMethod::SDF, 40, Typeface::Bold };
font.setBufferThickness(10);
Microphone mic{ StartImmediately::Yes };
double rmsThreshold = volumeToRMS(0.5);
RectF sliderRect{ Arg::topLeft(20, 50), 760, 20 };
SimpleMenuBar menuBar
{ {
{ U"File", { U"Exit (ESC)" } },
{ U"View", { U"Dark mode" } },
{ U"Help", { U"About", U"License (F1)" } },
} };
menuBar.setItemChecked({ 1, 0 }, Theme::isDarkMode = false);
menuBar.setColorPalette(Theme::menuPalette());
Scene::SetBackground(Theme::palette().base);
while (System::Update())
{
if (System::EnumerateMicrophones().none([&](const auto& info) { return info.microphoneIndex == mic.microphoneIndex(); }))
{
mic.open(StartImmediately::Yes);
}
const auto mfcc = mfccAnalyzer.analyze(mic);
// menu bar
if (const auto item = menuBar.update())
{
if (item == MenuBarItemIndex{ 0, 0 }) System::Exit();
if (item == MenuBarItemIndex{ 1, 0 })
{
menuBar.setItemChecked({ 1, 0 }, Theme::isDarkMode = not Theme::isDarkMode);
Scene::SetBackground(Theme::palette().base);
menuBar.setColorPalette(Theme::menuPalette());
}
if (item == MenuBarItemIndex{ 2, 0 }) System::LaunchBrowser(U"https://github.com/TwoSquirrels/mic2key");
if (item == MenuBarItemIndex{ 2, 1 }) LicenseManager::ShowInBrowser();
}
MainGUI::scrollY += Mouse::Wheel() * 64.0;
MainGUI::scrollY = Min(MainGUI::scrollY, MainGUI::getHeight() - Scene::Height());
MainGUI::scrollY = Max(MainGUI::scrollY, 0.0);
// volume threshold slider
MainGUI::scrolled(U"volume", [&]
{
sliderRect.w = Scene::Width() - 40.0;
if (sliderRect.stretched(5.0).mouseOver()) {
Cursor::RequestStyle(CursorStyle::Hand);
if (MouseL.pressed()) {
rmsThreshold = volumeToRMS((Cursor::PosF().x - sliderRect.x) / sliderRect.w);
}
}
});
// draw
// spectrum graph background
for (size_t i : Range(1, 39))
{
RectF{ Arg::leftCenter(0, 30.0 + (Scene::Height() - 30.0) * std::erfc(i / 20.0)), Scene::Width(), 2 }.draw(
ColorF{ Theme::palette().sub, i % 5 == 0 ? 0.25 : 0.125 }
);
}
for (const auto buffer = mfccAnalyzer.getFFTResult(); const size_t i : step(buffer.size()))
{
const auto width = 1.0 * Scene::Width() / buffer.size();
RectF{
Arg::bottomLeft(i * width, Scene::Height()),
1.0 * width,
(Scene::Height() - 30.0) * std::erf(buffer[i] * (1.0 - rmsThreshold) * 640.0)
}.draw(HSV{ 300.0 - 300.0 * i / buffer.size(), 0.25 });
}
LineString envelope{ { 0, Scene::Height() } };
for (const auto [x, y] : mfccAnalyzer.getMelEnvelope())
{
envelope << Vec2{ Scene::Width() * x, 30.0 + (Scene::Height() - 30.0) * std::erfc(y * (1.0 - rmsThreshold) * 640.0) };
Circle{ envelope.back(), 3.0 }.draw(ColorF{ Theme::palette().accent, 0.25 });
}
envelope << Vec2{ Scene::Width(), Scene::Height() };
envelope.draw(2.0, ColorF{ Theme::palette().accent, 0.25 });
// GUI borders
for (const auto& [id, height] : MainGUI::cards)
{
MainGUI::scrolled(id, [&]
{
RectF{ Arg::center(Scene::Width() / 2.0, 0.0), Scene::Width() - 40.0, 2.0}.draw(Theme::palette().sub);
});
}
// volume threshold slider
MainGUI::scrolled(U"volume", [&]
{
sliderRect.drawShadow(Vec2{ 1.0, 1.0 }, 8.0, 2.0).draw(Theme::palette().sub);
sliderRect.stretched(0.0, sliderRect.w * (rmsToVolume(rmsThreshold) - 1.0), 0.0, 0.0)
.draw(ColorF{ Theme::palette().bad, 0.75 });
sliderRect.stretched(0.0, 0.0, 0.0, sliderRect.w * -rmsToVolume(rmsThreshold))
.draw(ColorF{ Theme::palette().ok, 0.75 });
sliderRect.stretched(0.0, sliderRect.w * (rmsToVolume(mic.rootMeanSquare()) - 1.0), 0.0, 0.0)
.drawShadow(Vec2{ 1.0, 1.0 }, 8.0, 2.0, ColorF{ 0, 0.125 }, false)
.draw(ColorF{ Theme::palette().base, 0.5 });
Circle{ sliderRect.leftCenter() + Vec2{ sliderRect.w * rmsToVolume(rmsThreshold), 0.0 }, 12 }
.drawShadow(Vec2{ 1.0, 1.0 }, 6.0, 2.0)
.draw(Palette::White);
font(U"Volume threshold: {:05.1f}dB"_fmt(Clamp(Math::Log10(rmsThreshold) * 20.0, -99.9, -0.01)))
.draw(20, Arg::topLeft(20, 20), Theme::palette().sub);
});
if (MainGUI::getHeight() > Scene::Height())
{
RectF{
Arg::topRight(Scene::Width() - 4.0, 36.0 + (Scene::Height() - 42.0) * MainGUI::scrollY / MainGUI::getHeight()),
6.0,
(Scene::Height() - 38.0) * Scene::Height() / MainGUI::getHeight()
}.rounded(3.0).draw(ColorF{ Theme::palette().sub, 0.5 });
}
// menu bar
Rect{ 0, 0, Scene::Width(), 30 }.drawShadow(Vec2{ 0.0, 1.0 }, 10.0, 4.0);
menuBar.draw();
if (isDebug)
{
RectF{ Arg::center(Cursor::PosF()), 2.0, 60.0 }.draw(Palette::Red);
RectF{ Arg::center(Cursor::PosF()), 60.0, 2.0 }.draw(Palette::Red);
Array<String> dump;
Vec2 sceneSize = Scene::Size();
dump << U"scene size\t= ({:06.1f}, {:06.1f})"_fmt(sceneSize.x, sceneSize.y);
dump << U"cursor\t= ({:06.1f}, {:06.1f})"_fmt(Cursor::PosF().x, Cursor::PosF().y);
dump << U"ui size\t= ({:06.1f}, {:06.1f})"_fmt(sceneSize.x, MainGUI::getHeight());
dump << U"scroll\t= (0.0, {:06.1f})"_fmt(MainGUI::scrollY);
dump << U"scrolled cursor\t= ({:06.1f}, {:06.1f})"_fmt(Cursor::PosF().x, Cursor::PosF().y + MainGUI::scrollY);
font(dump.join(U"\n", U"", U"")).draw(
TextStyle::Outline(0.0, 0.5, Palette::Black),
16,
Arg::bottomLeft(8, Scene::Height() - 8),
Palette::White
);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment