Created
July 23, 2025 19:35
-
-
Save TwoSquirrels/2a97d61e1a7c60d6ec6a1b6083614446 to your computer and use it in GitHub Desktop.
mic2key WIP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # include <Siv3D.hpp> // Siv3D v0.6.15 | |
| //# include <Windows.h> | |
| constexpr bool isDebug = SIV3D_BUILD_PRIVATE_DEFINITION_DEBUG(); // Debug Build | |
| // MFCC | |
| struct MFCC | |
| { | |
| Array<double> feature; | |
| [[nodiscard]] bool isUnset() const | |
| { | |
| return std::ranges::all_of(feature, [](const double x) { return x == 0.0; }); | |
| } | |
| [[nodiscard]] double norm() const | |
| { | |
| return Math::Sqrt(std::accumulate( | |
| feature.begin(), | |
| feature.end(), | |
| 0.0, | |
| [](const auto& norm, const auto& x) { return norm + x * x; } | |
| )); | |
| } | |
| [[nodiscard]] double cosineSimilarity(const MFCC& other) const | |
| { | |
| if (feature.size() != other.feature.size()) throw Error{ U"MFCC order mismatch" }; | |
| const double thisNorm = norm(), otherNorm = other.norm(); | |
| if (thisNorm < 1e-8 || otherNorm < 1e-8) return 0.0; | |
| double innerProduct = 0.0; | |
| for (size_t i : step(feature.size())) innerProduct += feature[i] * other.feature[i]; | |
| return innerProduct / thisNorm / otherNorm; | |
| } | |
| }; | |
| // MFCC analyzer | |
| class MFCCAnalyzer | |
| { | |
| public: | |
| static [[nodiscard]] double freqToMel(const double freq) | |
| { | |
| return 1127.01 * Math::Log(1.0 + freq / 700.0); | |
| } | |
| static [[nodiscard]] double melToFreq(const double mel) | |
| { | |
| return 700.0 * (Math::Exp(mel / 1127.01) - 1.0); | |
| } | |
| explicit MFCCAnalyzer(const FFTSampleLength frames = FFTSampleLength::SL2K, const size_t melChannels = 40, const size_t mfccOrder = 12) | |
| : frames(frames), f(256uLL << FromEnum(frames), 0.0f), melChannels(melChannels), bin(melChannels + 2), | |
| melSpectrum(melChannels), melEnvelope(melChannels), mfccOrder(mfccOrder) {} | |
| [[nodiscard]] MFCC analyze(const Microphone& mic) | |
| { | |
| if (not mic.isLoop()) throw Error{ U"Microphone is must be loop mode." }; | |
| if (not mic.isRecording() || mic.getBufferLength() < f.size()) return MFCC{ Array<double>(mfccOrder, 0.0) }; | |
| // get data from mic | |
| const auto sampleRate = mic.getSampleRate(); | |
| const auto& buffer = mic.getBuffer(); | |
| const size_t writePos = mic.posSample(); | |
| for (const size_t pos : step(f.size())) | |
| { | |
| const size_t idx = (pos + writePos < f.size() ? mic.getBufferLength() : 0) + pos + writePos - f.size(); | |
| f[pos] = buffer[idx].left; // NOTE: Use only one side! | |
| } | |
| // pre-emphasis | |
| for (const size_t i : Range(f.size() - 1, 1, -1)) f[i] -= f[i - 1] * 0.96875f; // 31/32 | |
| // hamming window | |
| for (const size_t i : Range(f.size() - 2, 1)) f[i] *= 0.54f - 0.46f * cos(2 * Math::Pi * i / (f.size() - 1)); | |
| f.front() = 0.0f; | |
| f.back() = 0.0f; | |
| // FFT | |
| FFT::Analyze(fftResult, f.data(), f.size(), sampleRate, frames); | |
| // apply mel filter bank | |
| const auto melMax = freqToMel(sampleRate / 2.0); | |
| const auto melMin = freqToMel(0); | |
| const auto deltaMel = (melMax - melMin) / static_cast<double>(melChannels + 1); | |
| for (const size_t i : step(bin.size())) | |
| { | |
| bin[i] = floor((f.size() + 1) * melToFreq(melMin + i * deltaMel) / sampleRate); | |
| } | |
| for (const size_t i : step(melChannels)) | |
| { | |
| melSpectrum[i] = 0.0; | |
| for (const size_t j : Range(bin[i], bin[i + 1] - 1)) | |
| { | |
| melSpectrum[i] += 1.0 * fftResult.buffer[j] * (j - bin[i]) / (bin[i + 1] - bin[i]); | |
| } | |
| for (const size_t j : Range(bin[i + 1], bin[i + 2] - 1)) | |
| { | |
| melSpectrum[i] += 1.0 * fftResult.buffer[j] * (bin[i + 2] - j) / (bin[i + 2] - bin[i + 1]); | |
| } | |
| melEnvelope[i] = { 2.0 * bin[i + 1] / f.size(), melSpectrum[i] / (bin[i + 2] - bin[i]) }; | |
| } | |
| // DCT | |
| MFCC mfcc{ Array<double>(mfccOrder, 0.0) }; | |
| for (const size_t i : Range(1, mfccOrder)) | |
| { | |
| for (const size_t j : step(melChannels)) | |
| { | |
| mfcc.feature[i - 1] += Math::Log10(Math::Abs(melSpectrum[j])) * Math::Cos(Math::Pi * i * (j + 0.5) / melChannels) * 10; | |
| } | |
| } | |
| return mfcc; | |
| } | |
| [[nodiscard]] std::span<const float> getFFTResult() const { return fftResult.buffer; } | |
| [[nodiscard]] std::span<const Vec2> getMelEnvelope() const { return melEnvelope; } | |
| protected: | |
| FFTSampleLength frames; | |
| Array<float> f; | |
| FFTResult fftResult; | |
| size_t melChannels; | |
| Array<size_t> bin; | |
| Array<double> melSpectrum; | |
| Array<Vec2> melEnvelope; | |
| size_t mfccOrder; | |
| }; | |
| MFCCAnalyzer mfccAnalyzer{}; | |
| // volume | |
| [[nodiscard]] double rmsToVolume(const double rms) | |
| { | |
| if (rms <= 0.0) return 0.0; | |
| return Clamp(1.0 + Math::Log10(rms) / 5.0, 0.0, 1.0); | |
| } | |
| [[nodiscard]] double volumeToRMS(const double volume) | |
| { | |
| return Clamp(Math::Pow(10.0, (volume - 1.0) * 5.0), 0.0, 1.0); | |
| } | |
| // palette | |
| namespace Theme { | |
| struct Palette { Color base, sub, accent, ok, bad; }; | |
| constexpr Palette lightPalette{ | |
| .base = Color{ U"#FEE" }, .sub = Color{ U"#322" }, .accent = Color{ U"#F53" }, | |
| .ok = Color{ U"#3A3" }, .bad = Color{ U"#FA3" } | |
| }; | |
| constexpr Palette darkPalette{ | |
| .base = Color{ U"#322" }, .sub = Color{ U"#FEE" }, .accent = Color{ U"#F53" }, | |
| .ok = Color{ U"#3A3" }, .bad = Color{ U"#FA3" } | |
| }; | |
| bool isDarkMode; | |
| [[nodiscard]] Palette palette() { return isDarkMode ? darkPalette : lightPalette; } | |
| [[nodiscard]] SimpleMenuBar::ColorPalette menuPalette() | |
| { | |
| return { | |
| .menuBarColor = palette().sub, | |
| .activeMenuColor = palette().accent, | |
| .menuTextColor = palette().base, | |
| .itemBoxColor = palette().sub, | |
| .itemMouseoverColor = palette().accent, | |
| .itemTextColor = palette().base, | |
| .itemMouseoverTextColor = palette().base, | |
| .itemDisabledTextColor = { palette().base, 0.5 }, | |
| }; | |
| } | |
| } | |
| // scrollable GUI | |
| namespace MainGUI | |
| { | |
| struct Card { String id; double height; }; | |
| Array<Card> cards = | |
| { | |
| { U"menu", 30 }, | |
| { U"volume", 100 }, | |
| { U"hoge", 200 }, | |
| { U"fuga", 300 }, | |
| { U"piyo", 200 }, | |
| { U"foo", 400 }, | |
| { U"bar", 200 }, | |
| { U"buz", 300 }, | |
| }; | |
| double getHeight() | |
| { | |
| return cards.map([](const auto& card) { return card.height; }).sum(); | |
| } | |
| double scrollY = 0.0; | |
| auto scrolled(const String& uiId, const auto& callback) | |
| { | |
| double offset = -scrollY; | |
| for (const auto& [id, height] : cards) | |
| { | |
| if (id == uiId) break; | |
| offset += height; | |
| } | |
| const Transformer2D scrolling{ Mat3x2::Translate(0.0, offset), TransformCursor::Yes }; | |
| return callback(); | |
| } | |
| } | |
| // main | |
| void Main() | |
| { | |
| LicenseManager::SetApplicationLicense(U"mic2key", { U"mic2key", U"MIT License", U"Copyright 2024 TwoSquirrels" }); | |
| Window::SetMinimumFrameBufferSize(Size{ 400, 300 }); | |
| Window::Resize(800, 600); | |
| Window::SetStyle(WindowStyle::Sizable); | |
| Window::SetToggleFullscreenEnabled(false); | |
| Window::SetTitle(U"mic2key"); | |
| Font font{ FontMethod::SDF, 40, Typeface::Bold }; | |
| font.setBufferThickness(10); | |
| Microphone mic{ StartImmediately::Yes }; | |
| double rmsThreshold = volumeToRMS(0.5); | |
| RectF sliderRect{ Arg::topLeft(20, 50), 760, 20 }; | |
| SimpleMenuBar menuBar | |
| { { | |
| { U"File", { U"Exit (ESC)" } }, | |
| { U"View", { U"Dark mode" } }, | |
| { U"Help", { U"About", U"License (F1)" } }, | |
| } }; | |
| menuBar.setItemChecked({ 1, 0 }, Theme::isDarkMode = false); | |
| menuBar.setColorPalette(Theme::menuPalette()); | |
| Scene::SetBackground(Theme::palette().base); | |
| while (System::Update()) | |
| { | |
| if (System::EnumerateMicrophones().none([&](const auto& info) { return info.microphoneIndex == mic.microphoneIndex(); })) | |
| { | |
| mic.open(StartImmediately::Yes); | |
| } | |
| const auto mfcc = mfccAnalyzer.analyze(mic); | |
| // menu bar | |
| if (const auto item = menuBar.update()) | |
| { | |
| if (item == MenuBarItemIndex{ 0, 0 }) System::Exit(); | |
| if (item == MenuBarItemIndex{ 1, 0 }) | |
| { | |
| menuBar.setItemChecked({ 1, 0 }, Theme::isDarkMode = not Theme::isDarkMode); | |
| Scene::SetBackground(Theme::palette().base); | |
| menuBar.setColorPalette(Theme::menuPalette()); | |
| } | |
| if (item == MenuBarItemIndex{ 2, 0 }) System::LaunchBrowser(U"https://github.com/TwoSquirrels/mic2key"); | |
| if (item == MenuBarItemIndex{ 2, 1 }) LicenseManager::ShowInBrowser(); | |
| } | |
| MainGUI::scrollY += Mouse::Wheel() * 64.0; | |
| MainGUI::scrollY = Min(MainGUI::scrollY, MainGUI::getHeight() - Scene::Height()); | |
| MainGUI::scrollY = Max(MainGUI::scrollY, 0.0); | |
| // volume threshold slider | |
| MainGUI::scrolled(U"volume", [&] | |
| { | |
| sliderRect.w = Scene::Width() - 40.0; | |
| if (sliderRect.stretched(5.0).mouseOver()) { | |
| Cursor::RequestStyle(CursorStyle::Hand); | |
| if (MouseL.pressed()) { | |
| rmsThreshold = volumeToRMS((Cursor::PosF().x - sliderRect.x) / sliderRect.w); | |
| } | |
| } | |
| }); | |
| // draw | |
| // spectrum graph background | |
| for (size_t i : Range(1, 39)) | |
| { | |
| RectF{ Arg::leftCenter(0, 30.0 + (Scene::Height() - 30.0) * std::erfc(i / 20.0)), Scene::Width(), 2 }.draw( | |
| ColorF{ Theme::palette().sub, i % 5 == 0 ? 0.25 : 0.125 } | |
| ); | |
| } | |
| for (const auto buffer = mfccAnalyzer.getFFTResult(); const size_t i : step(buffer.size())) | |
| { | |
| const auto width = 1.0 * Scene::Width() / buffer.size(); | |
| RectF{ | |
| Arg::bottomLeft(i * width, Scene::Height()), | |
| 1.0 * width, | |
| (Scene::Height() - 30.0) * std::erf(buffer[i] * (1.0 - rmsThreshold) * 640.0) | |
| }.draw(HSV{ 300.0 - 300.0 * i / buffer.size(), 0.25 }); | |
| } | |
| LineString envelope{ { 0, Scene::Height() } }; | |
| for (const auto [x, y] : mfccAnalyzer.getMelEnvelope()) | |
| { | |
| envelope << Vec2{ Scene::Width() * x, 30.0 + (Scene::Height() - 30.0) * std::erfc(y * (1.0 - rmsThreshold) * 640.0) }; | |
| Circle{ envelope.back(), 3.0 }.draw(ColorF{ Theme::palette().accent, 0.25 }); | |
| } | |
| envelope << Vec2{ Scene::Width(), Scene::Height() }; | |
| envelope.draw(2.0, ColorF{ Theme::palette().accent, 0.25 }); | |
| // GUI borders | |
| for (const auto& [id, height] : MainGUI::cards) | |
| { | |
| MainGUI::scrolled(id, [&] | |
| { | |
| RectF{ Arg::center(Scene::Width() / 2.0, 0.0), Scene::Width() - 40.0, 2.0}.draw(Theme::palette().sub); | |
| }); | |
| } | |
| // volume threshold slider | |
| MainGUI::scrolled(U"volume", [&] | |
| { | |
| sliderRect.drawShadow(Vec2{ 1.0, 1.0 }, 8.0, 2.0).draw(Theme::palette().sub); | |
| sliderRect.stretched(0.0, sliderRect.w * (rmsToVolume(rmsThreshold) - 1.0), 0.0, 0.0) | |
| .draw(ColorF{ Theme::palette().bad, 0.75 }); | |
| sliderRect.stretched(0.0, 0.0, 0.0, sliderRect.w * -rmsToVolume(rmsThreshold)) | |
| .draw(ColorF{ Theme::palette().ok, 0.75 }); | |
| sliderRect.stretched(0.0, sliderRect.w * (rmsToVolume(mic.rootMeanSquare()) - 1.0), 0.0, 0.0) | |
| .drawShadow(Vec2{ 1.0, 1.0 }, 8.0, 2.0, ColorF{ 0, 0.125 }, false) | |
| .draw(ColorF{ Theme::palette().base, 0.5 }); | |
| Circle{ sliderRect.leftCenter() + Vec2{ sliderRect.w * rmsToVolume(rmsThreshold), 0.0 }, 12 } | |
| .drawShadow(Vec2{ 1.0, 1.0 }, 6.0, 2.0) | |
| .draw(Palette::White); | |
| font(U"Volume threshold: {:05.1f}dB"_fmt(Clamp(Math::Log10(rmsThreshold) * 20.0, -99.9, -0.01))) | |
| .draw(20, Arg::topLeft(20, 20), Theme::palette().sub); | |
| }); | |
| if (MainGUI::getHeight() > Scene::Height()) | |
| { | |
| RectF{ | |
| Arg::topRight(Scene::Width() - 4.0, 36.0 + (Scene::Height() - 42.0) * MainGUI::scrollY / MainGUI::getHeight()), | |
| 6.0, | |
| (Scene::Height() - 38.0) * Scene::Height() / MainGUI::getHeight() | |
| }.rounded(3.0).draw(ColorF{ Theme::palette().sub, 0.5 }); | |
| } | |
| // menu bar | |
| Rect{ 0, 0, Scene::Width(), 30 }.drawShadow(Vec2{ 0.0, 1.0 }, 10.0, 4.0); | |
| menuBar.draw(); | |
| if (isDebug) | |
| { | |
| RectF{ Arg::center(Cursor::PosF()), 2.0, 60.0 }.draw(Palette::Red); | |
| RectF{ Arg::center(Cursor::PosF()), 60.0, 2.0 }.draw(Palette::Red); | |
| Array<String> dump; | |
| Vec2 sceneSize = Scene::Size(); | |
| dump << U"scene size\t= ({:06.1f}, {:06.1f})"_fmt(sceneSize.x, sceneSize.y); | |
| dump << U"cursor\t= ({:06.1f}, {:06.1f})"_fmt(Cursor::PosF().x, Cursor::PosF().y); | |
| dump << U"ui size\t= ({:06.1f}, {:06.1f})"_fmt(sceneSize.x, MainGUI::getHeight()); | |
| dump << U"scroll\t= (0.0, {:06.1f})"_fmt(MainGUI::scrollY); | |
| dump << U"scrolled cursor\t= ({:06.1f}, {:06.1f})"_fmt(Cursor::PosF().x, Cursor::PosF().y + MainGUI::scrollY); | |
| font(dump.join(U"\n", U"", U"")).draw( | |
| TextStyle::Outline(0.0, 0.5, Palette::Black), | |
| 16, | |
| Arg::bottomLeft(8, Scene::Height() - 8), | |
| Palette::White | |
| ); | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment