Skip to content

Instantly share code, notes, and snippets.

@Podbrushkin
Created October 22, 2025 13:04
Show Gist options
  • Select an option

  • Save Podbrushkin/d642d5b4487be7f80146ed0e2d4b1a09 to your computer and use it in GitHub Desktop.

Select an option

Save Podbrushkin/d642d5b4487be7f80146ed0e2d4b1a09 to your computer and use it in GitHub Desktop.
Create graphviz tree with urls and color from neo4j taxonomy
function Get-DotNodeLabel ($arr) {
$maxColumns = 0
$arr | % {
#[int]$cellsCount = $_ -is [hashtable] ? $_.GetEnumerator().count : $_.count
$maxColumns = [math]::max($_.count,$maxColumns)
}
$tdAttrs = "width=""150"" colspan=""$maxColumns"""
$rows = @()
$arr | % {
if ($_ -is [string]) {
$rows += "<tr><td $tdAttrs>$_</td></tr>"
}
elseif ($_ -is [hashtable]) {
$tdAttrsForSingle = $_.count -eq 1 ? $tdAttrs : ''
$urlCells = $_.GetEnumerator() | % { '<td target="_blank" href="{0}" {2}><u>{1}</u></td>' -f $_.Value,$_.Name,$tdAttrsForSingle }
$rows += "<tr>$urlCells</tr>"
}
elseif ($null -eq $_) {}
else {
Write-Host -foreg Yellow What is this? $_
}
}
return @"
<<table border="0" cellborder="0" cellpadding="0" cellspacing="0" >
$rows
</table>>
"@.replace('&','&amp;')
}
function Get-ColorRgb {
param(
$Identity,
[double]$pastelFactor = 1.0,
[double]$alpha = 1.0
)
if ($null -eq $Identity) { return 'black' }
# Generate MD5 hash
$md5 = [System.Security.Cryptography.MD5]::Create()
$bytes = $md5.ComputeHash([System.Text.Encoding]::UTF8.GetBytes($Identity.ToString()))
# Use bytes for RGB components
[int]$r = $bytes[0]
[int]$g = $bytes[4]
[int]$b = $bytes[8]
if ($pastelFactor -lt 1.0) {
$r = [math]::Floor($r * $pastelFactor + 255 * (1 - $pastelFactor))
$g = [math]::Floor($g * $pastelFactor + 255 * (1 - $pastelFactor))
$b = [math]::Floor($b * $pastelFactor + 255 * (1 - $pastelFactor))
}
if ($alpha -lt 1.0) {
return "rgba($r, $g, $b, $alpha)"
}
return "#{0:x2}{1:x2}{2:x2}" -f $r, $g, $b
}
function Get-WikidataExternalUrls ($qids, $PropIds, [switch]$OutSparql) {
$qids = $qids -replace '.*/' | % {"wd:$_"} | select -unique
$blocksToUnion = $propIds | ? {$_ -like 'P*'} | % {
$propid = $_
@"
{
?q wdt:$propid ?$propid`Id.
wd:$propid wdt:P1630 ?$propid`Fmt.
BIND(IRI(REPLACE(?$propid`Id, "(^.*)", ?$propid`Fmt)) AS ?web)
}
"@
}
$propIds | ? {$_ -like '*wiki*'} | % {
#$lang = $_.substring(0,2)
$blocksToUnion += @"
{
?w schema:about ?q;
schema:isPartOf <https://$_.org/>.
BIND(?w AS ?web)
}
"@
}
$propIds | ? {$_ -like 'Q*'} | % {
$blocksToUnion += @"
{
?q p:P1343 ?stmt.
?stmt ps:P1343 wd:$_ .
?stmt pq:P2699 ?web.
}
"@
}
$sparql = @"
SELECT ?q ?web WHERE {
VALUES ?q { $qids }
$($blocksToUnion -join ' UNION ')
}
"@
if ($OutSparql) {return $sparql}
else {return (Invoke-Sparql $sparql)}
}
$cypher = @"
MATCH (n0)
ORDER BY n0.descendants DESC
LIMIT 400
MATCH path = (:Taxon)-[:HAS_CHILD*0..]->(n0)
UNWIND nodes(path) AS n
WITH DISTINCT n
RETURN n.id AS id, n.scientificName AS scientificName
, n.labelRu AS labelRu, n.labelEn AS labelEn
, n.descendants AS descendants
, n.rank AS rank, n.phylum AS phylum
,n.qid AS qid
"@
$nodes = Invoke-CypherNeo4j $cypher
$cypher = @"
MATCH (n)
ORDER BY n.descendants DESC
LIMIT 400
MATCH path = (:Taxon)-[:HAS_CHILD*0..]->(n)
UNWIND relationships(path) AS r
RETURN DISTINCT startNode(r).id AS source, endNode(r).id AS target
"@
$edges = Invoke-CypherNeo4j $cypher
$propIds = @'
P6385 Krugosvet
P11514 bigenc
P2924 old.bigenc
P1417 britannica
P7836 livelib
P7433 fantlab
P1233 isfdb
P12614 azlibru
P1953 Discogs
P1553 music.yandex
P3151 inatur
P846 gbif
P830 eol
ru.wikipedia
commons.wikimedia
ru.wikisource
Q124821483 prodetlit
'@ -split '\r?\n'
$propIdsToAdd = $propIds | ogv -PassThru | % {($_ -split ' ')[0]}
Get-WikidataExternalUrls $nodes.qid $propIdsToAdd | group {$_.q -replace '.*/'} | % {
$q = $_.Name
$urls = $_.Group.web
$nodes | ? qid -eq $q | select -f 1 | % {Add-Member -inp $_ -NotePropertyName urls -NotePropertyValue $urls}
}
$nodes | ? urls | % {
$n = $_
Add-Member -inp $n -NotePropertyName urlsMap -NotePropertyValue @{}
$n.urls | % {
$short = $_ -replace '^.*?https?://(www.)?(..).*','$2'
$n.urlsMap[$short] = $_
}
}
$nodesDot = $nodes | % {
$arr = @(
$_.labelRu
@{$_.scientificName=$idRoot+$_.id}
$_.rank
"descendants: "+$_.descendants
$_.urlsMap
)
$label = Get-DotNodeLabel $arr
$color = $_.phylum ? (Get-ColorRgb $_.phylum -pastelFactor 0.6) : 'white'
"$($_.id) [label=$label fillcolor=""$color""]"
}
$edgesDot = $edges | % {$_.source,$_.target -join ' -> '}
$outFile = "delme.svg"
$gv = 'digraph {',
$nodesDot,
$edgesDot,
'}'
$gv | dot -Gdpi=27 -Grankdir=TB -Nstyle=filled -Nshape=box -Tsvg -o $outFile
start $outfile
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment