Last active
June 17, 2025 09:52
-
-
Save pppoe252110/7e5e933db76066f17b77acb3ac39c67c to your computer and use it in GitHub Desktop.
Simple voice chat implementation with Mirror Networking, Opus and UniMic https://github.com/adrenak/unimic https://github.com/TyounanMOTI/UnityOpus
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using UnityEngine; | |
| using Mirror; | |
| using Adrenak.UniMic; | |
| using System.Collections; | |
| using System; | |
| using System.Collections.Generic; | |
| using System.Linq; | |
| using UnityOpus; | |
| public class MirrorVoiceChat : MonoBehaviour | |
| { | |
| public struct VoicePacket : NetworkMessage | |
| { | |
| public uint connectionId; | |
| public byte[] compressedData; | |
| public int sampleRate; | |
| public int channels; | |
| } | |
| public static MirrorVoiceChat Instance | |
| { | |
| get | |
| { | |
| if (instance == null) | |
| { | |
| instance = FindFirstObjectByType<MirrorVoiceChat>(); | |
| } | |
| return instance; | |
| } | |
| } | |
| private static MirrorVoiceChat instance; | |
| // --- Configurable Settings --- | |
| [Header("Voice Chat Settings")] | |
| [SerializeField, Tooltip("How long to buffer audio before sending (seconds)")] | |
| private float bufferDuration = 0.1f; // 100ms buffer | |
| [SerializeField, Tooltip("Target sample rate for voice chat (Hz)")] | |
| private int targetSampleRate = 48000; // Opus supports 8, 12, 16, 24, or 48 kHz | |
| [SerializeField, Tooltip("How often to send packets (in seconds)")] | |
| private float sendInterval = 1f / 20f; // 20 packets per second | |
| [SerializeField, Tooltip("Number of audio channels to send (mono=1, stereo=2)")] | |
| private int channels = 1; | |
| [SerializeField, Tooltip("Key to press for voice transmission (e.g., V)")] | |
| private KeyCode pushToTalkKey = KeyCode.V; | |
| [SerializeField, Tooltip("Listen to your mic")] | |
| private bool selfListen = false; | |
| [SerializeField, Tooltip("Opus bitrate (bits per second)")] | |
| private int bitrate = 24000; // 24 kbps is good for voice | |
| [SerializeField, Tooltip("Opus application type")] | |
| private OpusApplication application = OpusApplication.VoIP; | |
| // ----------------------------- | |
| private float nextSendTime; | |
| private OpusEncoder opusEncoder; | |
| private OpusDecoder opusDecoder; | |
| // Buffering variables | |
| private List<float> sendBuffer = new List<float>(); | |
| private List<float> receiveBuffer = new List<float>(); | |
| private float lastReceiveTime; | |
| private bool isPlayingReceivedAudio; | |
| private List<NetworkPlayerSpeaker> networkPlayerSpeakers; | |
| private void Awake() | |
| { | |
| networkPlayerSpeakers = new List<NetworkPlayerSpeaker>(); | |
| instance = this; | |
| } | |
| void Start() | |
| { | |
| // Initialize Opus | |
| var samplingFrequency = targetSampleRate switch | |
| { | |
| 8000 => SamplingFrequency.Frequency_8000, | |
| 12000 => SamplingFrequency.Frequency_12000, | |
| 16000 => SamplingFrequency.Frequency_16000, | |
| 24000 => SamplingFrequency.Frequency_24000, | |
| 48000 => SamplingFrequency.Frequency_48000, | |
| _ => SamplingFrequency.Frequency_48000 | |
| }; | |
| var numChannels = channels == 1 ? NumChannels.Mono : NumChannels.Stereo; | |
| opusEncoder = new OpusEncoder(samplingFrequency, numChannels, application); | |
| opusEncoder.Bitrate = bitrate; | |
| opusDecoder = new OpusDecoder(samplingFrequency, numChannels); | |
| // Initialize microphone | |
| Mic.Init(); | |
| if (Mic.AvailableDevices.Count > 0) | |
| { | |
| var device = Mic.AvailableDevices[0]; | |
| device.StartRecording(); | |
| device.OnFrameCollected += OnFrameCollected; | |
| } | |
| // Register handlers | |
| if (NetworkServer.active) | |
| { | |
| NetworkServer.RegisterHandler<VoicePacket>(OnVoicePacket); | |
| } | |
| if (NetworkClient.isConnected) | |
| { | |
| NetworkClient.RegisterHandler<VoicePacket>(OnVoicePacketReceived); | |
| } | |
| } | |
| private void OnDestroy() | |
| { | |
| // Clean up Opus resources | |
| opusEncoder?.Dispose(); | |
| opusDecoder?.Dispose(); | |
| } | |
| private void OnVoicePacket(NetworkConnectionToClient conn, VoicePacket packet) | |
| { | |
| foreach (var client in NetworkServer.connections.Values) | |
| { | |
| if (client != null && client.connectionId != packet.connectionId) | |
| client.Send(packet); | |
| } | |
| } | |
| private void OnFrameCollected(int frequency, int inputChannels, float[] samples) | |
| { | |
| // Check if we should send based on time and PTT key | |
| if (Input.GetKey(pushToTalkKey)) | |
| { | |
| // Add samples to send buffer | |
| sendBuffer.AddRange(samples); | |
| nextSendTime = Time.time + sendInterval; | |
| // Calculate how many samples we need to send | |
| int targetBufferSize = Mathf.FloorToInt(frequency * bufferDuration); | |
| if (sendBuffer.Count >= targetBufferSize) | |
| { | |
| float[] samplesToSend = new float[targetBufferSize]; | |
| sendBuffer.CopyTo(0, samplesToSend, 0, targetBufferSize); | |
| sendBuffer.RemoveRange(0, targetBufferSize); | |
| SendVoiceData(samplesToSend, frequency, inputChannels); | |
| } | |
| } | |
| } | |
| private void SendVoiceData(float[] samples, int sourceSampleRate, int inputChannels) | |
| { | |
| // Step 1: Downsample to target sample rate if needed | |
| float[] processedSamples = sourceSampleRate == targetSampleRate ? | |
| samples : | |
| Resample(samples, sourceSampleRate, targetSampleRate); | |
| // Step 2: Convert to mono if needed | |
| if (inputChannels > 1 && channels == 1) | |
| { | |
| processedSamples = ConvertToMono(processedSamples, inputChannels); | |
| } | |
| // Step 3: Encode with Opus | |
| byte[] compressed = EncodeWithOpus(processedSamples); | |
| if (compressed == null || compressed.Length == 0) return; | |
| // Step 4: Send packet | |
| NetworkClient.Send(new VoicePacket | |
| { | |
| connectionId = NetworkClient.connection.identity.netId, | |
| compressedData = compressed, | |
| sampleRate = targetSampleRate, | |
| channels = channels | |
| }); | |
| } | |
| private float[] ConvertToMono(float[] stereo, int inputChannels) | |
| { | |
| float[] mono = new float[stereo.Length / inputChannels]; | |
| for (int i = 0; i < mono.Length; i++) | |
| { | |
| float sum = 0; | |
| for (int c = 0; c < inputChannels; c++) | |
| { | |
| sum += stereo[i * inputChannels + c]; | |
| } | |
| mono[i] = sum / inputChannels; | |
| } | |
| return mono; | |
| } | |
| private void OnVoicePacketReceived(VoicePacket packet) | |
| { | |
| // Step 1: Decode with Opus | |
| float[] decoded = DecodeWithOpus(packet.compressedData, packet.channels); | |
| if (decoded == null) return; | |
| if (networkPlayerSpeakers == null) return; | |
| if (!NetworkClient.active) return; | |
| if (NetworkClient.localPlayer == null) return; | |
| if (NetworkClient.localPlayer.netId == packet.connectionId && !selfListen) return; | |
| // Add to receive buffer | |
| receiveBuffer.AddRange(decoded); | |
| // Mark that we have new data | |
| lastReceiveTime = Time.time; | |
| // Start playback if not already playing | |
| if (!isPlayingReceivedAudio) | |
| { | |
| StartCoroutine(PlayReceivedAudio(packet.connectionId)); | |
| } | |
| } | |
| private IEnumerator PlayReceivedAudio(uint connId) | |
| { | |
| isPlayingReceivedAudio = true; | |
| int targetBufferSize = Mathf.FloorToInt(targetSampleRate * bufferDuration); | |
| while (true) | |
| { | |
| if (receiveBuffer.Count >= targetBufferSize) | |
| { | |
| float[] samplesToPlay = new float[targetBufferSize]; | |
| receiveBuffer.CopyTo(0, samplesToPlay, 0, targetBufferSize); | |
| receiveBuffer.RemoveRange(0, targetBufferSize); | |
| var streamedAudioSource = networkPlayerSpeakers.FirstOrDefault(s => s.netId == connId); | |
| if (streamedAudioSource) | |
| streamedAudioSource.Speaker.Feed(targetSampleRate, channels, samplesToPlay); | |
| } | |
| else if (Time.time - lastReceiveTime > bufferDuration * 2f) | |
| { | |
| break; | |
| } | |
| yield return null; | |
| } | |
| isPlayingReceivedAudio = false; | |
| } | |
| // Simple linear resampling | |
| private float[] Resample(float[] input, int srcRate, int dstRate) | |
| { | |
| if (srcRate == dstRate) return input; | |
| int outLength = Mathf.FloorToInt(input.Length * ((float)dstRate / srcRate)); | |
| float[] output = new float[outLength]; | |
| for (int i = 0; i < outLength; i++) | |
| { | |
| float idx = i * ((float)srcRate / dstRate); | |
| int i0 = Mathf.FloorToInt(idx); | |
| int i1 = Mathf.Min(i0 + 1, input.Length - 1); | |
| float frac = idx - i0; | |
| output[i] = Mathf.Lerp(input[i0], input[i1], frac); | |
| } | |
| return output; | |
| } | |
| private byte[] EncodeWithOpus(float[] pcm) | |
| { | |
| try | |
| { | |
| // Opus expects 20ms frames, so calculate frame size | |
| int frameSize = targetSampleRate / 50; // 20ms frame at target sample rate | |
| byte[] output = new byte[1275]; // Maximum Opus packet size | |
| int encodedBytes = opusEncoder.Encode(pcm, output); | |
| if (encodedBytes > 0) | |
| { | |
| byte[] result = new byte[encodedBytes]; | |
| Buffer.BlockCopy(output, 0, result, 0, encodedBytes); | |
| return result; | |
| } | |
| } | |
| catch (Exception e) | |
| { | |
| Debug.LogError("Opus encoding error: " + e.Message); | |
| } | |
| return null; | |
| } | |
| private float[] DecodeWithOpus(byte[] data, int numChannels) | |
| { | |
| try | |
| { | |
| // Maximum frame size for Opus (120ms at 48kHz) | |
| int maxFrameSize = OpusDecoder.maximumPacketDuration * targetSampleRate / 1000; | |
| float[] pcm = new float[maxFrameSize * numChannels]; | |
| int decodedSamples = opusDecoder.Decode(data, data.Length, pcm); | |
| if (decodedSamples > 0) | |
| { | |
| float[] result = new float[decodedSamples * numChannels]; | |
| Buffer.BlockCopy(pcm, 0, result, 0, result.Length * sizeof(float)); | |
| return result; | |
| } | |
| } | |
| catch (Exception e) | |
| { | |
| Debug.LogError("Opus decoding error: " + e.Message); | |
| } | |
| return null; | |
| } | |
| internal void AddPlayer(NetworkPlayerSpeaker networkPlayerSpeaker) | |
| { | |
| networkPlayerSpeakers.Add(networkPlayerSpeaker); | |
| } | |
| internal void RemovePlayer(NetworkPlayerSpeaker networkPlayerSpeaker) | |
| { | |
| if (networkPlayerSpeakers.Contains(networkPlayerSpeaker)) | |
| networkPlayerSpeakers.Remove(networkPlayerSpeaker); | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using Adrenak.UniMic; | |
| using Mirror; | |
| using UnityEngine; | |
| public class NetworkPlayerSpeaker : NetworkBehaviour | |
| { | |
| public StreamedAudioSource Speaker => streamedAudioSource; | |
| [SerializeField] private StreamedAudioSource streamedAudioSource; | |
| private void Start() | |
| { | |
| if (MirrorVoiceChat.Instance) | |
| MirrorVoiceChat.Instance.AddPlayer(this); | |
| } | |
| private void OnDestroy() | |
| { | |
| if (MirrorVoiceChat.Instance) | |
| MirrorVoiceChat.Instance.RemovePlayer(this); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment