Skip to content

Instantly share code, notes, and snippets.

@pmolodo
Last active September 23, 2025 19:21
Show Gist options
  • Select an option

  • Save pmolodo/63a6a300ecc0cc2c0398a6f45c0fe7b8 to your computer and use it in GitHub Desktop.

Select an option

Save pmolodo/63a6a300ecc0cc2c0398a6f45c0fe7b8 to your computer and use it in GitHub Desktop.
try_decode() - utility function for attempting to decode an encoded unicode string; useful for printing results in error-handling
# try_decode by Paul Molodowitch is marked CC0 1.0.
# to view a copy of this mark, visit https://creativecommons.org/publicdomain/zero/1.0/
import locale
import sys
_TEST_CODECS: tuple[str, ...] = ()
def get_test_codecs():
global _TEST_CODECS # pylint: disable=global-statement
if not _TEST_CODECS:
# many of these will be the same, but in case some differ...
temp_codecs = [
locale.getpreferredencoding(),
sys.getdefaultencoding(),
sys.stderr.encoding,
sys.stdout.encoding,
sys.getfilesystemencoding(),
"utf-8",
]
if hasattr(locale, "getencoding"): # on python >= 3.11
temp_codecs.insert(0, locale.getencoding())
if sys.platform == "win32":
temp_codecs.extend(
[
"cp-1252", # windows default in US / much of europe
"utf-16-le", # used by windows "wide" strings
"mbcs", # default windows filesystem encoding in python < 3.6
"oem", # on windows, "encoding that corresponds to the system's current OEM code page"
]
)
# make unique, preserve order
_TEST_CODECS = tuple(dict.fromkeys(temp_codecs))
return _TEST_CODECS
def try_decode(encoded_str: bytes | str) -> str | bytes:
"""
Attempt to decode a bytes object using a list of common codecs.
If the input is already a string, it is returned as-is.
If decoding fails for all codecs, or an unexpected error occurs,
the original input is returned.
Args:
encoded_str (bytes | str): The input to decode.
Returns:
str | bytes: The decoded string if successful, otherwise the original input.
Example usage:
import subprocess, sys
try:
subprocess.run(["ls", "/nonexistent/path"], check=True, capture_output=True)
except subprocess.CalledProcessError as err:
print("STDOUT:", try_decode(err.stdout))
print("STDERR:", try_decode(err.stderr, file=sys.stderr))
"""
try:
if isinstance(encoded_str, str):
return encoded_str
for codec in get_test_codecs():
try:
return encoded_str.decode(codec)
except UnicodeDecodeError:
pass
except Exception: # pylint: disable=broad-except
# this function is often used inside of error-handling, so we want
# to make it "no-throw" as much as possible - default is just return
# input bytes
pass
return encoded_str
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment