Last active
September 23, 2025 19:21
-
-
Save pmolodo/63a6a300ecc0cc2c0398a6f45c0fe7b8 to your computer and use it in GitHub Desktop.
try_decode() - utility function for attempting to decode an encoded unicode string; useful for printing results in error-handling
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # try_decode by Paul Molodowitch is marked CC0 1.0. | |
| # to view a copy of this mark, visit https://creativecommons.org/publicdomain/zero/1.0/ | |
| import locale | |
| import sys | |
| _TEST_CODECS: tuple[str, ...] = () | |
| def get_test_codecs(): | |
| global _TEST_CODECS # pylint: disable=global-statement | |
| if not _TEST_CODECS: | |
| # many of these will be the same, but in case some differ... | |
| temp_codecs = [ | |
| locale.getpreferredencoding(), | |
| sys.getdefaultencoding(), | |
| sys.stderr.encoding, | |
| sys.stdout.encoding, | |
| sys.getfilesystemencoding(), | |
| "utf-8", | |
| ] | |
| if hasattr(locale, "getencoding"): # on python >= 3.11 | |
| temp_codecs.insert(0, locale.getencoding()) | |
| if sys.platform == "win32": | |
| temp_codecs.extend( | |
| [ | |
| "cp-1252", # windows default in US / much of europe | |
| "utf-16-le", # used by windows "wide" strings | |
| "mbcs", # default windows filesystem encoding in python < 3.6 | |
| "oem", # on windows, "encoding that corresponds to the system's current OEM code page" | |
| ] | |
| ) | |
| # make unique, preserve order | |
| _TEST_CODECS = tuple(dict.fromkeys(temp_codecs)) | |
| return _TEST_CODECS | |
| def try_decode(encoded_str: bytes | str) -> str | bytes: | |
| """ | |
| Attempt to decode a bytes object using a list of common codecs. | |
| If the input is already a string, it is returned as-is. | |
| If decoding fails for all codecs, or an unexpected error occurs, | |
| the original input is returned. | |
| Args: | |
| encoded_str (bytes | str): The input to decode. | |
| Returns: | |
| str | bytes: The decoded string if successful, otherwise the original input. | |
| Example usage: | |
| import subprocess, sys | |
| try: | |
| subprocess.run(["ls", "/nonexistent/path"], check=True, capture_output=True) | |
| except subprocess.CalledProcessError as err: | |
| print("STDOUT:", try_decode(err.stdout)) | |
| print("STDERR:", try_decode(err.stderr, file=sys.stderr)) | |
| """ | |
| try: | |
| if isinstance(encoded_str, str): | |
| return encoded_str | |
| for codec in get_test_codecs(): | |
| try: | |
| return encoded_str.decode(codec) | |
| except UnicodeDecodeError: | |
| pass | |
| except Exception: # pylint: disable=broad-except | |
| # this function is often used inside of error-handling, so we want | |
| # to make it "no-throw" as much as possible - default is just return | |
| # input bytes | |
| pass | |
| return encoded_str |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment