matchaxnb · September 19, 2025 15:34
diff --git a/text_algebra.py b/text_algebra.py
 #!/usr/bin/env python
 # License: CC-0
 """text_algebra: a stack-based RPN line-matching toolkit
 Usage: text_algebra [--repl|-f your-program.txt|your inline program]
  --repl starts a REPL to work interactively. 
  -f your-program.txt will load a (possibly multi-line) program

 Definitions:
  Operators: U N D S X C F P R (see examples)
  Operands: anything else will be considered as filenames and read as
  unordered sets to the stack
  Output: push on the stack.
  End of program: at the end of a program, the lines contained in the last element of
  the stack are printed, if there are any.
  Comments: anything after a '#' symbol
    this_is_not_a_comment.txt  #This is a comment


 Examples:
  phone-numbers-mary.txt phone-numbers-luke.txt U # U for Union
  This will output the union of all lines in phone-numbers-mary.txt and phone-numbers-luke.txt

  pn-mary.txt pn-joe.txt N # N for iNtersection
  This will output the lines in common in these two files

  pn-mary.txt pn-gustave.txt D # D for Difference
  This will output the lines that appear in pn-mary.txt but not in pn-gustave.txt

  pn-robert.txt pn-henry.txt S # S for substring
  This will output all lines in pn-robert.txt that are substrings of lines in pn-henry.txt

  pn-alice.txt pn-bob.txt X # X for XOR
  This will output all lines that are in exactly one of pn-alice.txt or pn-bob.txt

  pn-charlie.txt C # C for Count
  This will push pn-random.txt on the stack and display its length (without pushing it onto the stack)

  pn-danish.txt F 'john.*ghost.*' # F for Find
  This will output all lines in pn-danish.txt that match the regex /john.*ghost.*/

  pn-danish.txt pn-echo.txt P # P for Peek
  This will push pn-danish.txt and then pn-echo.txt and print the entire stack without

  pn-danish.txt pn-echo.txt pn-foxtrot.txt U X P C R # R for reset
  This will
    - load the three files on the stack
    - union pn-echo.txt and pn-foxtrot.txt
    - run the symmetric difference of that union with pn-danish.txt
    - peek at the entire stack
    - count the number of items on the stack (should be 1)
    - reset the stack afterwards

 REPL help:
  Use the following REPL commands to adjust its functions:
    quiet will toggle outputting the last item in the stack after each line is read
    exit will leave the REPL
    help will show this help

 Notes:
  F is a pseudo-operator that requires a complement (the regex) to it coming *after* it.
  The regex is not and cannot be read from the stack. It's a static element at the time of
  writing the program. Additionally because I'm a lazy thing the regex must be on the same
  line as F due to the way tokens are processed.
 """

 import sys
 import re
 import os
 import readline
 import rlcompleter
 import atexit
 import glob

 HISTORY_FILE = os.path.expanduser("~/.text_algebra_history")

 def init_readline():
    try:
        readline.read_history_file(HISTORY_FILE)
    except FileNotFoundError:
        pass
    atexit.register(readline.write_history_file, HISTORY_FILE)


 def file_completer(text, state):
    matches = glob.glob(text + "*")
    return matches[state] if state < len(matches) else None


 def show_result(stack):
    if stack:
        for line in sorted(stack[-1]):
            print(line)

 def start_repl(stack):
    init_readline()
    readline.set_completer(file_completer)
    readline.parse_and_bind("tab: complete")
    print("# text-algebra REPL")
    quiet = False
    while True:
        try:
            line = input(">>> ")
            if line.strip() == "exit":
                break
            elif line.strip() == "help":
                print(__doc__)
                continue
            elif line.strip() == "quiet":
                quiet = not quiet
                continue
            # Evaluate the input safely
            machine_process(line, stack)
            if not quiet:
                print(stack[-1])
        except (EOFError, KeyboardInterrupt):
            print("\nExiting.")
            break
        except Exception as e:
            print(f"\nGot an exception: {e}. Ignoring it and continuing as-is.")
            continue


 def read_file_lines(filename):
    """Read a file and return a set of trimmed, deduplicated lines."""
    with open(filename, "r", encoding="utf-8") as f:
        lines = f.readlines()
    return set(line.strip() for line in lines if line.strip())


 def union(a, b):
    return a | b


 def intersection(a, b):
    return a & b


 def difference(a, b):
    return a - b


 def substring_match(a, b):
    return set(line_a for line_a in a if any(line_a in line_b for line_b in b))


 def symmetric_difference(a, b):
    return a ^ b


 def count(a):
    print(len(a))
    return a


 def regex_filter(a, pattern):
    regex = re.compile(pattern)
    return set(line for line in a if regex.search(line))


 def machine_process(line_of_tokens, stack):
    args = line_of_tokens.split('#', 1)[0].split()
    i = 0
    while i < len(args):
        token = args[i]
        if token in {"U", "N", "D", "S", "X"}:
            if len(stack) < 2:
                print(f"Error: Operator '{token}' requires two operands.")
                return
            b = stack.pop()
            a = stack.pop()
            if token == "U":
                stack.append(union(a, b))
            elif token == "N":
                stack.append(intersection(a, b))
            elif token == "D":
                stack.append(difference(a, b))
            elif token == "S":
                stack.append(substring_match(a, b))
            elif token == "X":
                stack.append(symmetric_difference(a, b))
        elif token == "C":
            if not stack:
                print("Error: Operator 'C' requires one operand.")
                return
            top = stack[-1]
            count(top)
        elif token == "F":
            if i + 1 >= len(args):
                print("Error: Operator 'F' requires a regex pattern.")
                return
            pattern = args[i + 1]
            if not stack:
                print("Error: Operator 'F' requires one operand.")
                return
            top = stack.pop()
            stack.append(regex_filter(top, pattern))
            i += 1  # Skip the pattern argument
        elif token in {"P"}:
            print("peeking at stack of length", len(stack))
            print(stack)
        elif token in {"R"}:
            print("resetting stack")
            stack.clear()
        else:
            try:
                lines = read_file_lines(token)
                stack.append(lines)
            except Exception as e:
                print(f"Error reading file '{token}': {e}")
                return
        i += 1

 def execute_program(stack, program_name):
  prog = None
  with open(program_name, 'r', encoding='utf-8') as fh:
    prog = fh.readlines()
  for line in prog:
    machine_process(line, stack)

 def main():
    args = sys.argv[1:]
    stack = []
    if args[0] == "--repl":
        start_repl(stack)
    elif args[0] == "-f":
        execute_program(stack, args[1])
    else:
        machine_process(" ".join(args), stack)
    show_result(stack)


 if __name__ == "__main__":
    main()
	#!/usr/bin/env python
	# License: CC-0
	"""text_algebra: a stack-based RPN line-matching toolkit
	Usage: text_algebra [--repl\|-f your-program.txt\|your inline program]
	--repl starts a REPL to work interactively.
	-f your-program.txt will load a (possibly multi-line) program

	Definitions:
	Operators: U N D S X C F P R (see examples)
	Operands: anything else will be considered as filenames and read as
	unordered sets to the stack
	Output: push on the stack.
	End of program: at the end of a program, the lines contained in the last element of
	the stack are printed, if there are any.
	Comments: anything after a '#' symbol
	this_is_not_a_comment.txt #This is a comment


	Examples:
	phone-numbers-mary.txt phone-numbers-luke.txt U # U for Union
	This will output the union of all lines in phone-numbers-mary.txt and phone-numbers-luke.txt

	pn-mary.txt pn-joe.txt N # N for iNtersection
	This will output the lines in common in these two files

	pn-mary.txt pn-gustave.txt D # D for Difference
	This will output the lines that appear in pn-mary.txt but not in pn-gustave.txt

	pn-robert.txt pn-henry.txt S # S for substring
	This will output all lines in pn-robert.txt that are substrings of lines in pn-henry.txt

	pn-alice.txt pn-bob.txt X # X for XOR
	This will output all lines that are in exactly one of pn-alice.txt or pn-bob.txt

	pn-charlie.txt C # C for Count
	This will push pn-random.txt on the stack and display its length (without pushing it onto the stack)

	pn-danish.txt F 'john.ghost.' # F for Find
	This will output all lines in pn-danish.txt that match the regex /john.ghost./

	pn-danish.txt pn-echo.txt P # P for Peek
	This will push pn-danish.txt and then pn-echo.txt and print the entire stack without

	pn-danish.txt pn-echo.txt pn-foxtrot.txt U X P C R # R for reset
	This will
	- load the three files on the stack
	- union pn-echo.txt and pn-foxtrot.txt
	- run the symmetric difference of that union with pn-danish.txt
	- peek at the entire stack
	- count the number of items on the stack (should be 1)
	- reset the stack afterwards

	REPL help:
	Use the following REPL commands to adjust its functions:
	quiet will toggle outputting the last item in the stack after each line is read
	exit will leave the REPL
	help will show this help

	Notes:
	F is a pseudo-operator that requires a complement (the regex) to it coming after it.
	The regex is not and cannot be read from the stack. It's a static element at the time of
	writing the program. Additionally because I'm a lazy thing the regex must be on the same
	line as F due to the way tokens are processed.
	"""

	import sys
	import re
	import os
	import readline
	import rlcompleter
	import atexit
	import glob

	HISTORY_FILE = os.path.expanduser("~/.text_algebra_history")

	def init_readline():
	try:
	readline.read_history_file(HISTORY_FILE)
	except FileNotFoundError:
	pass
	atexit.register(readline.write_history_file, HISTORY_FILE)


	def file_completer(text, state):
	matches = glob.glob(text + "*")
	return matches[state] if state < len(matches) else None


	def show_result(stack):
	if stack:
	for line in sorted(stack[-1]):
	print(line)

	def start_repl(stack):
	init_readline()
	readline.set_completer(file_completer)
	readline.parse_and_bind("tab: complete")
	print("# text-algebra REPL")
	quiet = False
	while True:
	try:
	line = input(">>> ")
	if line.strip() == "exit":
	break
	elif line.strip() == "help":
	print(__doc__)
	continue
	elif line.strip() == "quiet":
	quiet = not quiet
	continue
	# Evaluate the input safely
	machine_process(line, stack)
	if not quiet:
	print(stack[-1])
	except (EOFError, KeyboardInterrupt):
	print("\nExiting.")
	break
	except Exception as e:
	print(f"\nGot an exception: {e}. Ignoring it and continuing as-is.")
	continue


	def read_file_lines(filename):
	"""Read a file and return a set of trimmed, deduplicated lines."""
	with open(filename, "r", encoding="utf-8") as f:
	lines = f.readlines()
	return set(line.strip() for line in lines if line.strip())


	def union(a, b):
	return a \| b


	def intersection(a, b):
	return a & b


	def difference(a, b):
	return a - b


	def substring_match(a, b):
	return set(line_a for line_a in a if any(line_a in line_b for line_b in b))


	def symmetric_difference(a, b):
	return a ^ b


	def count(a):
	print(len(a))
	return a


	def regex_filter(a, pattern):
	regex = re.compile(pattern)
	return set(line for line in a if regex.search(line))


	def machine_process(line_of_tokens, stack):
	args = line_of_tokens.split('#', 1)[0].split()
	i = 0
	while i < len(args):
	token = args[i]
	if token in {"U", "N", "D", "S", "X"}:
	if len(stack) < 2:
	print(f"Error: Operator '{token}' requires two operands.")
	return
	b = stack.pop()
	a = stack.pop()
	if token == "U":
	stack.append(union(a, b))
	elif token == "N":
	stack.append(intersection(a, b))
	elif token == "D":
	stack.append(difference(a, b))
	elif token == "S":
	stack.append(substring_match(a, b))
	elif token == "X":
	stack.append(symmetric_difference(a, b))
	elif token == "C":
	if not stack:
	print("Error: Operator 'C' requires one operand.")
	return
	top = stack[-1]
	count(top)
	elif token == "F":
	if i + 1 >= len(args):
	print("Error: Operator 'F' requires a regex pattern.")
	return
	pattern = args[i + 1]
	if not stack:
	print("Error: Operator 'F' requires one operand.")
	return
	top = stack.pop()
	stack.append(regex_filter(top, pattern))
	i += 1 # Skip the pattern argument
	elif token in {"P"}:
	print("peeking at stack of length", len(stack))
	print(stack)
	elif token in {"R"}:
	print("resetting stack")
	stack.clear()
	else:
	try:
	lines = read_file_lines(token)
	stack.append(lines)
	except Exception as e:
	print(f"Error reading file '{token}': {e}")
	return
	i += 1

	def execute_program(stack, program_name):
	prog = None
	with open(program_name, 'r', encoding='utf-8') as fh:
	prog = fh.readlines()
	for line in prog:
	machine_process(line, stack)

	def main():
	args = sys.argv[1:]
	stack = []
	if args[0] == "--repl":
	start_repl(stack)
	elif args[0] == "-f":
	execute_program(stack, args[1])
	else:
	machine_process(" ".join(args), stack)
	show_result(stack)


	if __name__ == "__main__":
	main()
No results found