pmolodo · September 23, 2025 19:21
diff --git a/try_decode.py b/try_decode.py
 # try_decode by Paul Molodowitch is marked CC0 1.0.
 # to view a copy of this mark, visit https://creativecommons.org/publicdomain/zero/1.0/

 import locale
 import sys

 _TEST_CODECS: tuple[str, ...] = ()

 def get_test_codecs():
    global _TEST_CODECS  # pylint: disable=global-statement
    if not _TEST_CODECS:
        # many of these will be the same, but in case some differ...
        temp_codecs = [
            locale.getpreferredencoding(),
            sys.getdefaultencoding(),
            sys.stderr.encoding,
            sys.stdout.encoding,
            sys.getfilesystemencoding(),
            "utf-8",
        ]
        if hasattr(locale, "getencoding"):  # on python >= 3.11
            temp_codecs.insert(0, locale.getencoding())

        if sys.platform == "win32":
            temp_codecs.extend(
                [
                    "cp-1252",  # windows default in US / much of europe
                    "utf-16-le",  # used by windows "wide" strings
                    "mbcs",  # default windows filesystem encoding in python < 3.6
                    "oem",  # on windows, "encoding that corresponds to the system's current OEM code page"
                ]
            )

        # make unique, preserve order
        _TEST_CODECS = tuple(dict.fromkeys(temp_codecs))
    return _TEST_CODECS


 def try_decode(encoded_str: bytes | str) -> str | bytes:
    """
    Attempt to decode a bytes object using a list of common codecs.
    If the input is already a string, it is returned as-is.
    If decoding fails for all codecs, or an unexpected error occurs,
    the original input is returned.

    Args:
        encoded_str (bytes | str): The input to decode.

    Returns:
        str | bytes: The decoded string if successful, otherwise the original input.

    Example usage:
        import subprocess, sys

        try:
            subprocess.run(["ls", "/nonexistent/path"], check=True, capture_output=True)
        except subprocess.CalledProcessError as err:
            print("STDOUT:", try_decode(err.stdout))
            print("STDERR:", try_decode(err.stderr, file=sys.stderr))
    """
    try:
        if isinstance(encoded_str, str):
            return encoded_str

        for codec in get_test_codecs():
            try:
                return encoded_str.decode(codec)
            except UnicodeDecodeError:
                pass
    except Exception:  # pylint: disable=broad-except
        # this function is often used inside of error-handling, so we want
        # to make it "no-throw" as much as possible - default is just return
        # input bytes
        pass
    return encoded_str
	# try_decode by Paul Molodowitch is marked CC0 1.0.
	# to view a copy of this mark, visit https://creativecommons.org/publicdomain/zero/1.0/

	import locale
	import sys

	_TEST_CODECS: tuple[str, ...] = ()

	def get_test_codecs():
	global _TEST_CODECS # pylint: disable=global-statement
	if not _TEST_CODECS:
	# many of these will be the same, but in case some differ...
	temp_codecs = [
	locale.getpreferredencoding(),
	sys.getdefaultencoding(),
	sys.stderr.encoding,
	sys.stdout.encoding,
	sys.getfilesystemencoding(),
	"utf-8",
	]
	if hasattr(locale, "getencoding"): # on python >= 3.11
	temp_codecs.insert(0, locale.getencoding())

	if sys.platform == "win32":
	temp_codecs.extend(
	[
	"cp-1252", # windows default in US / much of europe
	"utf-16-le", # used by windows "wide" strings
	"mbcs", # default windows filesystem encoding in python < 3.6
	"oem", # on windows, "encoding that corresponds to the system's current OEM code page"
	]
	)

	# make unique, preserve order
	_TEST_CODECS = tuple(dict.fromkeys(temp_codecs))
	return _TEST_CODECS


	def try_decode(encoded_str: bytes \| str) -> str \| bytes:
	"""
	Attempt to decode a bytes object using a list of common codecs.
	If the input is already a string, it is returned as-is.
	If decoding fails for all codecs, or an unexpected error occurs,
	the original input is returned.

	Args:
	encoded_str (bytes \| str): The input to decode.

	Returns:
	str \| bytes: The decoded string if successful, otherwise the original input.

	Example usage:
	import subprocess, sys

	try:
	subprocess.run(["ls", "/nonexistent/path"], check=True, capture_output=True)
	except subprocess.CalledProcessError as err:
	print("STDOUT:", try_decode(err.stdout))
	print("STDERR:", try_decode(err.stderr, file=sys.stderr))
	"""
	try:
	if isinstance(encoded_str, str):
	return encoded_str

	for codec in get_test_codecs():
	try:
	return encoded_str.decode(codec)
	except UnicodeDecodeError:
	pass
	except Exception: # pylint: disable=broad-except
	# this function is often used inside of error-handling, so we want
	# to make it "no-throw" as much as possible - default is just return
	# input bytes
	pass
	return encoded_str
No results found