Last active
July 29, 2025 10:19
-
-
Save BjornFJohansson/c340f1040c3f722513901a52a0fc1d5c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from pathlib import Path | |
| import urllib | |
| import re, os | |
| from urllib.parse import urlparse | |
| from string import punctuation | |
| # I used this script in 2023 to convert a wikidpad wiki into a Obsidian.md wiki | |
| # The wikidpad wiki has to be in the "original sqlite" format ie a collection of text files | |
| # The script was run and tweaked and was not run or tested on more than one wiki. | |
| # If you use this, be sure to run this script on a copy of the files as the files are modified | |
| # This code renames the wiki files: | |
| for file in Path('.').glob("*.wiki"): | |
| new_name = file.with_suffix(".md") | |
| file.rename(new_name) | |
| print(f"Renamed: {file.name} -> {new_name.name}") | |
| pages = sorted(Path('.').glob('*.md')) | |
| # Sometimes wp creates pages with ~ (tilde). Rename these | |
| tildepages = [p for p in pages if "~" in str(p)] | |
| for tildepage in tildepages: | |
| a, b = str(tildepage).split("~", maxsplit=1) | |
| np = Path(f"{a}.md") | |
| if not np.exists(): | |
| tildepage.rename(np) | |
| # Some wp files have unicode and some punctuation quoted. | |
| # subpages are renamed with a pipe | character | |
| for page in pages: | |
| uq = urllib.parse.unquote(str(page), encoding='cp1252') | |
| if not str(page) == uq: | |
| nm = uq.replace("/", "|") | |
| page.rename(nm) | |
| # This is not neccessary, but I wanted to remove the title from the page as Obsidian shows the filename by default. | |
| # The code below strips page name from first line and removes the line if only punctuation remains | |
| i = 0 | |
| for page in pages: | |
| firstline, *rest = page.read_text(encoding='utf-8')[1:].splitlines() | |
| newfirstline = firstline.strip("# ").replace(page.stem, "") | |
| if not set(newfirstline) - set(punctuation): | |
| newfirstline = "" | |
| if firstline != newfirstline: | |
| newpagetext = newfirstline + "\n" + "\n".join(rest) | |
| page.write_text(newpagetext) | |
| i+=1 | |
| # 7175 pages | |
| # remove [alias:...] and replace with obsidian alias: | |
| # https://help.obsidian.md/Linking+notes+and+files/Aliases | |
| i = 0 | |
| regxal = re.compile(r"(?:\[)alias:(.+)(?:\])") | |
| for page in pages[1:]: | |
| pagetext = page.read_text(encoding='utf-8')[1:] | |
| matchobj = re.search(regxal, pagetext) | |
| if matchobj: | |
| aliases = matchobj.group(1).strip().split("; ") | |
| nb = f"---\naliases: {', '.join(aliases)}\n---\n\n" | |
| newpagetext = nb + pagetext[:matchobj.start()]+pagetext[matchobj.end():] | |
| page.write_text(newpagetext) | |
| i+=1 | |
| # Replace wikidpad absolute file links with obsidian md style links: | |
| # for example | |
| # file:/home/bjorn/Desktop/mecwiki/yeast-colony-pcr.md | |
| # [file:/home/bjorn/Desktop/mecwiki/yeast-colony-pcr.md] | |
| # Is replaced by | |
| # [yeast-colony-pcr.md](<file:/home/bjorn/Desktop/mecwiki/yeast-colony-pcr.md>) | |
| # This could be improved to handle images by prepending a ! for some links | |
| regxfl = re.compile(r"(?:\[?)(file:/[^\s\]]+)(?:\]|\s)") | |
| i=0 | |
| def repl_file_links(matchobj): | |
| url = matchobj.group(1) | |
| fn = os.path.basename(url) | |
| return f"[{fn}](<{url}>)" | |
| for page in pages: | |
| pagetext = page.read_text(encoding='utf-8').lstrip("\ufeff") | |
| newpagetext, r = re.subn(regxfl, repl_file_links, pagetext) | |
| if r: | |
| page.write_text(newpagetext) | |
| i+=1 | |
| # The code below collects all file names in the wiki and searches each file for these file names. | |
| # it then replaces all *defined* CamelCase and [wikiwords] links in the page by [[CamelCase]] or [[wikiwords]] | |
| # This part of the script might take some time to run and could be tweaked to be more efficient. | |
| # It ended up doing too many links on the pages for my taste, a simple filter for link length or a blacklist might | |
| # help. | |
| pregs = [re.compile(f"(?:\[)({p.stem})(?:\])") for p in pages] | |
| from tqdm import tqdm | |
| for page in tqdm(pages): | |
| pagetext = page.read_text(encoding='utf-8') | |
| for preg in pregs: | |
| pagetext = re.sub(preg, r"[[\1]]" , pagetext) | |
| page.write_text(pagetext) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment