Last active
November 21, 2025 23:36
-
-
Save kujirahand/14172fde92148c83363c667ec8c6f469 to your computer and use it in GitHub Desktop.
簡単なボイスチェンジャーのプログラム。
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sounddevice as sd | |
| import numpy as np | |
| import librosa | |
| import blessed | |
| import sys | |
| import queue | |
| import time | |
| from scipy.signal import butter, sosfilt_zi, sosfilt | |
| # --- 定数 --- | |
| SAMPLE_RATE = 44100 | |
| CHANNELS = 1 | |
| BLOCK_SIZE = 2048 | |
| NOISE_GATE_THRESHOLD = 0.04 | |
| LOWCUT = 300.0 | |
| HIGCUT = 3400.0 | |
| FILTER_ORDER = 4 | |
| OUTPUT_GAIN = 0.5 | |
| FRAME_RATE = 20.0 # 描画のフレームレート | |
| # --- ターミナルオブジェクト --- | |
| term = blessed.Terminal() | |
| # --- グローバル変数 --- | |
| pitch_shift_semitones = 8.0 | |
| data_queue = queue.Queue() | |
| sos_filter_coeffs = butter(FILTER_ORDER, [LOWCUT, HIGCUT], btype='band', fs=SAMPLE_RATE, output='sos') | |
| zi_filter_state = sosfilt_zi(sos_filter_coeffs) | |
| def get_color_from_amplitude(amplitude): | |
| """振幅に応じた色を返す""" | |
| normalized = min(1.0, amplitude * 4) | |
| if normalized < 0.3: | |
| return term.green | |
| elif normalized < 0.6: | |
| return term.yellow | |
| else: | |
| return term.red | |
| def draw_waveform(data): | |
| """波形の描画に専念する関数""" | |
| global pitch_shift_semitones | |
| # ヘッダー表示 | |
| header = f" VOICE CHANGER | Pitch Shift: {pitch_shift_semitones:.1f} semitones " | |
| print(term.move_y(0) + term.center(term.bold(header))) | |
| num_cols = term.width | |
| if data is None or data.size == 0: | |
| return | |
| step = len(data) // num_cols if len(data) > num_cols else 1 | |
| if step == 0: step = 1 | |
| sliced_data = data[::step] | |
| center_y = term.height // 2 | |
| # 波形描画 | |
| for x, value in enumerate(sliced_data): | |
| if x >= num_cols: | |
| break | |
| amplitude = int(value * (center_y - 2)) | |
| color = get_color_from_amplitude(abs(value)) | |
| for y in range(abs(amplitude)): | |
| pos_y = center_y - np.sign(amplitude) * y | |
| if 1 < pos_y < term.height: | |
| with term.location(x, int(pos_y)): | |
| print(color('█')) | |
| def callback(indata, outdata, frames, time, status): | |
| """音声処理とキューへのデータ投入に専念する関数""" | |
| global zi_filter_state, pitch_shift_semitones, data_queue | |
| if status: | |
| print(status, file=sys.stderr) | |
| try: | |
| mono_indata = indata.flatten() | |
| if np.abs(mono_indata).max() < NOISE_GATE_THRESHOLD: | |
| outdata.fill(0) | |
| if data_queue.qsize() < 2: | |
| data_queue.put(np.zeros_like(mono_indata)) | |
| return | |
| filtered_data, zi_filter_state = sosfilt(sos_filter_coeffs, mono_indata, zi=zi_filter_state) | |
| shifted_data = librosa.effects.pitch_shift( | |
| y=filtered_data, | |
| sr=SAMPLE_RATE, | |
| n_steps=pitch_shift_semitones | |
| ) | |
| len_shifted = len(shifted_data) | |
| outdata_len = len(outdata.flatten()) | |
| if len_shifted >= outdata_len: | |
| reshaped_data = shifted_data[:outdata_len].reshape(outdata.shape) | |
| else: | |
| padding = np.zeros(outdata_len - len_shifted) | |
| padded_data = np.concatenate((shifted_data, padding)) | |
| reshaped_data = padded_data.reshape(outdata.shape) | |
| outdata[:] = reshaped_data * OUTPUT_GAIN | |
| if data_queue.qsize() < 2: | |
| data_queue.put(mono_indata) | |
| except Exception: | |
| outdata.fill(0) | |
| if data_queue.qsize() < 2: | |
| data_queue.put(np.zeros(BLOCK_SIZE, dtype=np.float32)) | |
| def main(): | |
| """メインループで描画とキー入力を制御する関数""" | |
| global pitch_shift_semitones | |
| try: | |
| with sd.Stream(samplerate=SAMPLE_RATE, blocksize=BLOCK_SIZE, channels=CHANNELS, callback=callback): | |
| with term.fullscreen(), term.cbreak(), term.hidden_cursor(): | |
| running = True | |
| last_data = np.zeros(BLOCK_SIZE, dtype=np.float32) | |
| while running: | |
| # --- キューから最新のデータを取得 --- | |
| try: | |
| while not data_queue.empty(): | |
| last_data = data_queue.get_nowait() | |
| except queue.Empty: | |
| pass | |
| # --- 描画処理 --- | |
| print(term.clear, end='') | |
| draw_waveform(last_data) | |
| with term.location(0, term.height - 1): | |
| controls = " [h] Pitch Up | [l] Pitch Down | [Enter] Exit " | |
| print(term.center(term.bold_white_on_blue(controls))) | |
| sys.stdout.flush() | |
| # --- キー入力とフレームレート制御 --- | |
| key = term.inkey(timeout=1/FRAME_RATE) | |
| if key: | |
| if key == 'h': | |
| pitch_shift_semitones = min(24.0, pitch_shift_semitones + 1.0) | |
| elif key == 'l': | |
| pitch_shift_semitones = max(-24.0, pitch_shift_semitones - 1.0) | |
| elif key.name == "KEY_ENTER" or key == '\n': | |
| running = False | |
| except Exception as e: | |
| # term.normalを呼び出してターミナルを通常状態に戻す | |
| print(term.normal) | |
| print(f"\nAn error occurred: {e}") | |
| finally: | |
| print("\nVoice changer stopped.") | |
| if __name__ == "__main__": | |
| main() |
Author
Author
requirements.txtを次のように修正します:
sounddevice
librosa
numpy
blessed
scipy
pyworld
cython
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
以下、pywordを利用した改良版です。