Skip to content

Instantly share code, notes, and snippets.

@lowell80
Forked from solarkraft/syncthing-automerge.py
Last active May 19, 2025 05:49
Show Gist options
  • Select an option

  • Save lowell80/ade1b19dbd1322dc8eacd5fa332b2ba5 to your computer and use it in GitHub Desktop.

Select an option

Save lowell80/ade1b19dbd1322dc8eacd5fa332b2ba5 to your computer and use it in GitHub Desktop.
Monitors a Syncthing-synced directory and tries to merge conflicting files (based on https://www.rafa.ee/articles/resolve-syncthing-conflicts-using-three-way-merge/). Probably adaptable for other directory types, but only tested with Logseq (works for me™️).
# This script automatically handles Syncthing conflicts on text files by applying a
# git three-way merge between the previously synced version and each divergent version.
# It depends on the watchdog package and git.
# For automatic dependency installation when running with ´uv run --script deconflicter.py´:
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "watchdog",
# ]
# ///
# This code is MIT Licensed:
# Copyright 2024 solarkraft
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import os
import time
import re
import hashlib
import subprocess
from pathlib import Path
from watchdog.observers import Observer as FileSystemObserver
from watchdog.events import FileSystemEventHandler
# Starting here
# TODO: Add support for blocking certain portions files (regex match). Example Obsidian (plugin) updates the 'updated' field in the frontmatter (yaml); then just ignore that. (Right now I get two 'updated: ' lines, with different dates, but that causes a YAML syntax error)
# TODO: Add a global file pattern include/exclude lists. For example, don't try to automatically merge .json files. Or don't try to merge anything in the `.obsidian` folder, for example.
# TODO: Fix up ancestor finding support (pick latest date, NOT after the timestamp embedded in the sync conflict file name)
# TODO: Do we need any special handling for the fact that my ST somehow spits out 2 conflicts at once (Is that just me? ST bug/glitch between versions I'm running??? One file has local time, one has UTC time); but the hash of the 2 files are identical; no need to process *both* of these. Maybe a worker thread and collector thread? Maybe this isn't an issue for the current design at all? Maybe there are too many words in this todo!?
# Improve design
# TODO: Oh yeah, add a CLI.
# TODO: Check for the .stversions directory at the startup, and for the presence of the git command, as there's no point to running without this.
# TODO: Allow this script to run in real-time 'watchdog' mode; or via a scheduled job (run-once)
# TODO: Use pathlib; and make this more OS agnostic. (Not sure how much I care about making it work on Windows; but I generally try)
# TODO: use logging, not print()
# Bigger things...
# TODO: Allow it use "GIT" history as the actual basis for the common ancestor (if the file in question lives in a GIT repo, of course.) Even still, there could be some other smarter "merge" options we could consider.
# TODO: Allow pulling history from .zfs/snapshots; (Won't work inside of a container)
# TODO: Add better auditing and "undo(ish)" support. For example, we should store a copy of the unmerged file BEFORE overwriting it (in the merge goes wrong.
# I've seen examples where entire blocks of text have been repeated unexpectedly; possibly because of a poorly chosen ancestor.) There should be CLI options for this.
# Is there any value is checking for the `.stfolder` at startup to make sure we're running from the correct location? (Does it hurt if we don't? Maybe just at warning to the user...)
# Because you can change this in the UI/config:
STVERSIONS_DIR = ".stversions"
MAX_HASH_HISTORY = 100
hash_history = []
def get_hash(path: Path) -> str:
path = Path(path)
h = hashlib.new("sha256")
h.update(path.read_bytes())
return h.hexdigest()
def get_relative_path(path):
return os.path.relpath(path)
def merge_files(original, backup, conflict):
command = ["git", "merge-file", "--union", original, backup, conflict]
print("Performing three way merge with git command:")
print(" ".join(command))
exitcode = subprocess.call(command, cwd=os.getcwd())
if exitcode != 0:
# Should we abort or just try again on the next change? If this is running as a background process, this could be quite unexpected. Caller should check return status.
raise RuntimeError("Git command failed!")
def merge_if_applicable(src_path):
"""Perform a three way merge on and remove a given possible syncthing conflict file if:
- It is an actual conflict file (determined by naming scheme)
- The associated canonical file exists ("real" file path)
- A backup file in .stversions exists
"""
global hash_history
if not os.path.isfile(src_path):
# print(src_path, "is not a file")
return
if f"/{STVERSIONS_DIR}/" in src_path:
# Ignore any activity in the .stversion folder.
# I've rarely found sync-conflict in my .stversion folder
#
# find ../.stversions \( -name '*.sync-conflict*' -o -name '.syncthing.*' \) | wc -l
# 5
return
candidate_file_path = get_relative_path(src_path)
match = re.search(
# . is converted to %2F when a conflict file is opened in Logseq
r"^(.*?)(?:\.|%2F)sync-conflict-([0-9]{8})-([0-9]{6})-(.{7})\.?([^.]*)$",
candidate_file_path,
)
if match is None:
# The file is not a syncthing conflict file
# print(candidate_file_path, "is not a conflict file")
return
if not src_path.endswith(".md"):
print("Refusing to attempt to merge NON *.md files....")
# Specifically .json files don't merge well...
return
conflict_file_hash = get_hash(candidate_file_path)
conflict_file_path = candidate_file_path
print() # Make run easier to recognize
print(f"Conflict file found: {conflict_file_path} {conflict_file_hash[:12]}")
# print(x.groups())
conflict_file_name = match.group(1)
conflict_file_date = match.group(2)
conflict_file_time = match.group(3)
conflict_file_id = match.group(4)
conflict_file_extension = match.group(5)
# print(conflict_file_path, conflict_file_date, conflict_file_time, conflict_file_id, conflict_file_extension)
original_file_path = conflict_file_name + "." + conflict_file_extension
# Check fix history. Sometimes syncthing emits the same conflict with 2
# files one for local, one with UTC time. IDK why, but let's just clean
# it up. Avoid doubling up change (dup content)
hash_key = (original_file_path, conflict_file_hash)
if hash_key in hash_history:
print(f"Deleting conflict file due to fix history. {hash_key}")
os.remove(os.path.join(os.getcwd(), conflict_file_path))
return
# HACK: Give Syncthing some time to move the tmpfile (.syncthing.MyFileName) to its real location
time.sleep(0.1)
if not os.path.isfile(original_file_path):
print("... but original file", original_file_path, "doesn't exist")
# Here we may be too early to leave before Syncthing has moved its tempfile to the real location
# .syncthing.Testseite.md.tmp
# print("... what about the Syncthing tempfile?")
# p = list(os.path.split(original_file_path))
# tmpfile_name = ".syncthing." + p.pop() + ".tmp"
# print("name:", tmpfile_name, "path:", p)
return
print("For original file:", original_file_path)
backup_file_regex_string = (
STVERSIONS_DIR + "/"
+ conflict_file_name
+ r"~([0-9]{8})-([0-9]{6})\."
+ conflict_file_extension
)
backup_file_regex = re.compile(backup_file_regex_string)
backup_files = []
for dirpath, _, files in os.walk(os.path.join(os.getcwd(), STVERSIONS_DIR)):
for file in files:
candidate_path = str(os.path.join(get_relative_path(dirpath), file))
# print("Test:", candidate)
match = backup_file_regex.match(candidate_path)
if match:
backup_file_date = match.group(1)
backup_file_time = match.group(2)
# print("Matched:", candidate_path, backup_file_date, backup_file_time)
backup_files.append(candidate_path)
# Hmmm. No checking to see which files is most recent or closest to (but not _older_ than) the sync conflict???
# Apparently this just relies on os.listdir() sort order, which is OS, and FS specific....? whoops
# Hacky. Good enough for now. It should work based on text sort order
backup_files.sort(reverse=True)
if len(backup_files) == 0:
print(
f"No backup file candidates were found by pattern {backup_file_regex_string}. There isn't enough data for a three way merge."
)
print("This may be due to custom versioning settings - try simple versioning.")
# (TODO): We can still merge the 2 files here. This will increase compatiblility with other versioning schemes
return
# print("Backup files:", backup_files)
# We want the latest backup file, which is the first in the list (??? maybe they are sorted differently)
backup_file = backup_files[0]
print("Latest backup file:", backup_file)
merge_files(original_file_path, backup_file, conflict_file_path)
# TODO: This really _should_ be done on a temp file for file safety and to reduce race conditions
fixup_obsidian_frontmatter(original_file_path)
print("Deleting conflict file")
os.remove(os.path.join(os.getcwd(), conflict_file_path))
hash_history.insert(0, hash_key)
if len(hash_history) > MAX_HASH_HISTORY:
hash_history = hash_history[:MAX_HASH_HISTORY]
print("Deconfliction done!")
print()
class FileChangeHandler(FileSystemEventHandler):
# To support manually "touch"ing a file to get the script to handle it
@staticmethod
def on_modified(event):
# print(f"A file was modified: {event}")
merge_if_applicable(event.src_path)
# This is how Syncthing creates the conflict files
@staticmethod
def on_moved(event):
# print("A file was moved, may have been syncthing")
# print(event) # Syncthing does some moving-around business
merge_if_applicable(event.dest_path)
def read_obsidian_md(path, divider="---"):
frontmatter = []
body = []
with open(path, "r") as f:
line = next(f).rstrip()
if line == divider:
print("Looking for frontmatter")
while True:
frontmatter.append(line)
line = next(f).rstrip()
if line == divider:
frontmatter.append(line)
break
for line in f:
body.append(line.rstrip())
return frontmatter, body
def fix_frontmatter(frontmatter):
"""
Identify duplicate keys. Use a simple sort order to keep the "highest" value and discard all other values.
A general attempt is made to NOT change anything if there's nothing to update, but trailing spaces and such may be be removed.
NOTE: This does NOT work for anything other than very simple (single line) YAML keys.
"""
simple_yaml_value_re = re.compile(r'^([a-zA-Z_][a-zA-Z0-9]*)\s*:\s*(.*)$')
temp_output = []
known_kv: dict[str, str] = {}
replace_kv: dict[str, str] = {}
for line in frontmatter:
match = simple_yaml_value_re.match(line)
if match:
key, value = match.groups()
if key in known_kv:
prev_value = known_kv[key]
new_value = max(prev_value, value)
if new_value != value:
# TODO: Don't log for unit tests...
print(f"Replacing frontmatter key: {key} = '{new_value}' (was '{prev_value}')")
else:
print(f"Replacing frontmatter key: {key} = '{new_value}' (alternate '{value}')")
line = None
replace_kv[key] = value
known_kv[key] = value
if line is not None:
temp_output.append(line)
output = []
for line in temp_output:
match = simple_yaml_value_re.match(line)
if match:
key, value = match.groups()
if key in replace_kv:
line = f"{key}: {replace_kv[key]}"
output.append(line)
return output
# Lazy unit test
assert fix_frontmatter("""\
---\
updated: 2025-05-04T18:00:14-04:00\
updated: 2025-05-04T18:01:43-04:00\
created: 2025-05-04T17:46:38-04:00\
---\n""".splitlines(keepends=True)) != [
"---",
"updated: 2025-05-04T18:01:43-04:00",
"created: 2025-05-04T17:46:38-04:00",
"---"]
def fixup_obsidian_frontmatter(path: str):
if not path.endswith(".md"):
return
frontmatter, body = read_obsidian_md(path)
if not frontmatter:
# Nothing to fix. We're done
return
new_frontmatter = fix_frontmatter(frontmatter)
if frontmatter == new_frontmatter:
# No fixes needed. Exit now
return
print(f"Front matter changes for {path}. Updating")
with open(path, "w") as f:
for line in new_frontmatter + body:
f.write(line)
f.write("\n")
print(f"Successfully overwrote {path}")
if __name__ == "__main__":
print("Running Syncthing deconflicter")
# timeout=10 prevents events being lost on macOS
observer = FileSystemObserver(timeout=10)
event_handler = FileChangeHandler()
path = "."
# From quickstart
observer.schedule(event_handler, path, recursive=True)
observer.start()
try:
while observer.is_alive():
observer.join(1)
finally:
observer.stop()
observer.join()
print("Stopped Syncthing deconflicter")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment