Last active
January 16, 2023 01:42
-
-
Save ufcpp/08bc36a4619855bed5b8702107e887ea to your computer and use it in GitHub Desktop.
UnsafeRelaxedJsonEscaping でもエスケープしてしまう全角スペースとか + とかすら素通ししたい
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using System.Text; | |
| using System.Text.Encodings.Web; | |
| using System.Text.Json; | |
| using System.Text.Json.Serialization; | |
| var original = new A(123, "あいう abc \b/\r\n\t\"\\\'🐈"); | |
| var opt = new JsonSerializerOptions | |
| { | |
| //Encoder = new NoEscapingEncoder(), | |
| Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping, | |
| WriteIndented = true, | |
| DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, | |
| }; | |
| var serialized = JsonSerializer.Serialize(original, opt); | |
| var deserialized = JsonSerializer.Deserialize<A>(serialized, opt); | |
| Console.WriteLine(serialized); | |
| Console.WriteLine(deserialized); | |
| foreach (var c in deserialized!.Name) | |
| { | |
| Console.WriteLine($"{c}: U+{(int)c:X2}"); | |
| } | |
| record A(int Id, string Name); | |
| public class NoEscapingJavaScriptEncoder : JavaScriptEncoder | |
| { | |
| public static JavaScriptEncoder NoEscaping { get; } = new NoEscapingJavaScriptEncoder(); | |
| public override int MaxOutputCharactersPerInputCharacter => 12; | |
| private static readonly HashSet<char> _escapingBmpChar = new() { '\\', '\"', '\r', '\n', '\t' }; | |
| public override unsafe int FindFirstCharacterToEncode(char* text, int textLength) | |
| { | |
| for (int i = 0; i < textLength; i++) | |
| { | |
| if (WillEncode(text[i])) return i; | |
| } | |
| return -1; | |
| } | |
| public override unsafe bool TryEncodeUnicodeScalar(int unicodeScalar, char* buffer, int bufferLength, out int numberOfCharactersWritten) | |
| { | |
| static void escape(char c, char* buffer) | |
| { | |
| buffer[0] = '\\'; | |
| buffer[1] = 'u'; | |
| ((ushort)c).TryFormat(new Span<char>(buffer + 2, 4), out _, "X4"); | |
| } | |
| char? escapeChar = unicodeScalar switch | |
| { | |
| '\\' => '\\', | |
| '\r' => 'r', | |
| '\n' => 'n', | |
| '\t' => 't', | |
| '\"' => '\"', | |
| _ => null, | |
| }; | |
| if (escapeChar is { } notNull) | |
| { | |
| if (bufferLength < 2) | |
| { | |
| numberOfCharactersWritten = 0; | |
| return false; | |
| } | |
| buffer[0] = '\\'; | |
| buffer[1] = notNull; | |
| numberOfCharactersWritten = 2; | |
| } | |
| else if (char.IsControl((char)unicodeScalar)) | |
| { | |
| escape((char)unicodeScalar, buffer); | |
| numberOfCharactersWritten = 6; | |
| } | |
| else if (unicodeScalar > 0xFFFF) | |
| { | |
| if (bufferLength < 6) | |
| { | |
| numberOfCharactersWritten = 0; | |
| return false; | |
| } | |
| var r = new Rune(unicodeScalar); | |
| Span<char> utf16 = stackalloc char[2]; | |
| var len = r.EncodeToUtf16(utf16); | |
| escape(utf16[0], buffer); | |
| numberOfCharactersWritten = 6; | |
| if (len > 1) | |
| { | |
| if (bufferLength < 12) | |
| { | |
| numberOfCharactersWritten = 0; | |
| return false; | |
| } | |
| escape(utf16[1], buffer + 6); | |
| numberOfCharactersWritten = 12; | |
| } | |
| } | |
| else | |
| { | |
| buffer[0] = (char)unicodeScalar; | |
| numberOfCharactersWritten = 1; | |
| } | |
| return true; | |
| } | |
| public override bool WillEncode(int unicodeScalar) | |
| { | |
| return char.IsControl((char)unicodeScalar) || unicodeScalar > 0xFFFF || _escapingBmpChar.Contains((char)unicodeScalar); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment