Skip to content

Instantly share code, notes, and snippets.

@BjornFJohansson
Last active July 29, 2025 10:19
Show Gist options
  • Select an option

  • Save BjornFJohansson/c340f1040c3f722513901a52a0fc1d5c to your computer and use it in GitHub Desktop.

Select an option

Save BjornFJohansson/c340f1040c3f722513901a52a0fc1d5c to your computer and use it in GitHub Desktop.
from pathlib import Path
import urllib
import re, os
from urllib.parse import urlparse
from string import punctuation
# I used this script in 2023 to convert a wikidpad wiki into a Obsidian.md wiki
# The wikidpad wiki has to be in the "original sqlite" format ie a collection of text files
# The script was run and tweaked and was not run or tested on more than one wiki.
# If you use this, be sure to run this script on a copy of the files as the files are modified
# This code renames the wiki files:
for file in Path('.').glob("*.wiki"):
new_name = file.with_suffix(".md")
file.rename(new_name)
print(f"Renamed: {file.name} -> {new_name.name}")
pages = sorted(Path('.').glob('*.md'))
# Sometimes wp creates pages with ~ (tilde). Rename these
tildepages = [p for p in pages if "~" in str(p)]
for tildepage in tildepages:
a, b = str(tildepage).split("~", maxsplit=1)
np = Path(f"{a}.md")
if not np.exists():
tildepage.rename(np)
# Some wp files have unicode and some punctuation quoted.
# subpages are renamed with a pipe | character
for page in pages:
uq = urllib.parse.unquote(str(page), encoding='cp1252')
if not str(page) == uq:
nm = uq.replace("/", "|")
page.rename(nm)
# This is not neccessary, but I wanted to remove the title from the page as Obsidian shows the filename by default.
# The code below strips page name from first line and removes the line if only punctuation remains
i = 0
for page in pages:
firstline, *rest = page.read_text(encoding='utf-8')[1:].splitlines()
newfirstline = firstline.strip("# ").replace(page.stem, "")
if not set(newfirstline) - set(punctuation):
newfirstline = ""
if firstline != newfirstline:
newpagetext = newfirstline + "\n" + "\n".join(rest)
page.write_text(newpagetext)
i+=1
# 7175 pages
# remove [alias:...] and replace with obsidian alias:
# https://help.obsidian.md/Linking+notes+and+files/Aliases
i = 0
regxal = re.compile(r"(?:\[)alias:(.+)(?:\])")
for page in pages[1:]:
pagetext = page.read_text(encoding='utf-8')[1:]
matchobj = re.search(regxal, pagetext)
if matchobj:
aliases = matchobj.group(1).strip().split("; ")
nb = f"---\naliases: {', '.join(aliases)}\n---\n\n"
newpagetext = nb + pagetext[:matchobj.start()]+pagetext[matchobj.end():]
page.write_text(newpagetext)
i+=1
# Replace wikidpad absolute file links with obsidian md style links:
# for example
# file:/home/bjorn/Desktop/mecwiki/yeast-colony-pcr.md
# [file:/home/bjorn/Desktop/mecwiki/yeast-colony-pcr.md]
# Is replaced by
# [yeast-colony-pcr.md](<file:/home/bjorn/Desktop/mecwiki/yeast-colony-pcr.md>)
# This could be improved to handle images by prepending a ! for some links
regxfl = re.compile(r"(?:\[?)(file:/[^\s\]]+)(?:\]|\s)")
i=0
def repl_file_links(matchobj):
url = matchobj.group(1)
fn = os.path.basename(url)
return f"[{fn}](<{url}>)"
for page in pages:
pagetext = page.read_text(encoding='utf-8').lstrip("\ufeff")
newpagetext, r = re.subn(regxfl, repl_file_links, pagetext)
if r:
page.write_text(newpagetext)
i+=1
# The code below collects all file names in the wiki and searches each file for these file names.
# it then replaces all *defined* CamelCase and [wikiwords] links in the page by [[CamelCase]] or [[wikiwords]]
# This part of the script might take some time to run and could be tweaked to be more efficient.
# It ended up doing too many links on the pages for my taste, a simple filter for link length or a blacklist might
# help.
pregs = [re.compile(f"(?:\[)({p.stem})(?:\])") for p in pages]
from tqdm import tqdm
for page in tqdm(pages):
pagetext = page.read_text(encoding='utf-8')
for preg in pregs:
pagetext = re.sub(preg, r"[[\1]]" , pagetext)
page.write_text(pagetext)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment