Created
August 31, 2024 20:55
-
-
Save mds2/34bf6d3a1a37c5f3f18d8492b9e31533 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/python3 | |
| from sys import stdin | |
| import argparse | |
| """Utility to convert the standard columnar output of | |
| many UNIX commands (ls, ps, who) into a csv format that other | |
| programs can parse. | |
| Reads input on stdin, produces output on stdout. | |
| Can use delimiters other than commas by specifying delimiters via | |
| command-line flags. | |
| Intended usage is something like the following | |
| ➜ py-play ls -alh *.py | head -n 5 | unix_to_csv.py | |
| -rw-rw-r--,1,mschuresko,mschuresko,989,Jul,27,2023,bang_bang_rocket_sled.py | |
| -rw-rw-r--,1,mschuresko,mschuresko,1.2K,Jul,27,2023,bang_bang_thermostat.py | |
| -rw-rw-r--,1,mschuresko,mschuresko,1.2K,Jul,27,2023,bang_bang_treadmill.py | |
| -rw-rw-r--,1,mschuresko,mschuresko,3.6K,Aug,24,2021,crypt_util.py | |
| -rw-rw-r--,1,mschuresko,mschuresko,1.4K,Aug,30,2022,cycle_counter.py | |
| """ | |
| parser = argparse.ArgumentParser( | |
| description="Turns UNIX command output into csv/tsv/etc") | |
| parser.add_argument('-d', '--delimiter', | |
| help="Delimiter for output", | |
| default=',') | |
| args = parser.parse_args() | |
| # First we read stdin. Yes, we read all of it at once. | |
| # Yes, this will be slow for large inputs, and blocking for | |
| # streaming data, such as, for instance, the output of `tail -f` | |
| lines = stdin.readlines() | |
| lmax = max([len(l) for l in lines]) | |
| # We find where column boundaries are by looking for character columns | |
| # that are spaces in every row of input (which is why we need to read | |
| # all of input first) | |
| # We do this by creating a set of all character columns in the input, | |
| # and removing each column that has a non-space character in at least | |
| # one row | |
| spaces = set([i for i in range(lmax)]) | |
| for l in lines: | |
| for i,c in enumerate(l): | |
| if c != ' ' and i in spaces: | |
| spaces.remove(i) | |
| # Now we sort the spaces we found | |
| # We're looking for gaps between spaces, so we add artificial | |
| # leading and trailing spaces at -1 and lmax | |
| spaces = [-1] + sorted(list(spaces)) + [lmax] | |
| # Any gap between columns with spaces are data columns. | |
| # Find those. | |
| column_ranges = [] | |
| for i in range(len(spaces) - 1): | |
| if spaces[i+1] - spaces[i] > 1: | |
| column_ranges.append((spaces[i] + 1, spaces[i+1])) | |
| # Then go through each line, capture the text in that line | |
| # corresponding to each column of column_ranges, strip the text | |
| # in each column, and collect it as a list of strings. | |
| # Collect the results of each line in a list (of lists of strings) | |
| by_cols = [] | |
| for l in lines: | |
| curr_cols = [] | |
| for c_start, c_end in column_ranges: | |
| curr_cols.append(l[:c_end][c_start:].strip()) | |
| by_cols.append(curr_cols) | |
| # Now that each line is separated into columns, | |
| # we can print them out separated by the delimiter | |
| for row in by_cols: | |
| print(args.delimiter.join(row)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey, I think this does what you want with SQL querying :). I'm not proud of it but it works on simple
ls -loutput.