Created
October 22, 2025 13:04
-
-
Save Podbrushkin/d642d5b4487be7f80146ed0e2d4b1a09 to your computer and use it in GitHub Desktop.
Create graphviz tree with urls and color from neo4j taxonomy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| function Get-DotNodeLabel ($arr) { | |
| $maxColumns = 0 | |
| $arr | % { | |
| #[int]$cellsCount = $_ -is [hashtable] ? $_.GetEnumerator().count : $_.count | |
| $maxColumns = [math]::max($_.count,$maxColumns) | |
| } | |
| $tdAttrs = "width=""150"" colspan=""$maxColumns""" | |
| $rows = @() | |
| $arr | % { | |
| if ($_ -is [string]) { | |
| $rows += "<tr><td $tdAttrs>$_</td></tr>" | |
| } | |
| elseif ($_ -is [hashtable]) { | |
| $tdAttrsForSingle = $_.count -eq 1 ? $tdAttrs : '' | |
| $urlCells = $_.GetEnumerator() | % { '<td target="_blank" href="{0}" {2}><u>{1}</u></td>' -f $_.Value,$_.Name,$tdAttrsForSingle } | |
| $rows += "<tr>$urlCells</tr>" | |
| } | |
| elseif ($null -eq $_) {} | |
| else { | |
| Write-Host -foreg Yellow What is this? $_ | |
| } | |
| } | |
| return @" | |
| <<table border="0" cellborder="0" cellpadding="0" cellspacing="0" > | |
| $rows | |
| </table>> | |
| "@.replace('&','&') | |
| } | |
| function Get-ColorRgb { | |
| param( | |
| $Identity, | |
| [double]$pastelFactor = 1.0, | |
| [double]$alpha = 1.0 | |
| ) | |
| if ($null -eq $Identity) { return 'black' } | |
| # Generate MD5 hash | |
| $md5 = [System.Security.Cryptography.MD5]::Create() | |
| $bytes = $md5.ComputeHash([System.Text.Encoding]::UTF8.GetBytes($Identity.ToString())) | |
| # Use bytes for RGB components | |
| [int]$r = $bytes[0] | |
| [int]$g = $bytes[4] | |
| [int]$b = $bytes[8] | |
| if ($pastelFactor -lt 1.0) { | |
| $r = [math]::Floor($r * $pastelFactor + 255 * (1 - $pastelFactor)) | |
| $g = [math]::Floor($g * $pastelFactor + 255 * (1 - $pastelFactor)) | |
| $b = [math]::Floor($b * $pastelFactor + 255 * (1 - $pastelFactor)) | |
| } | |
| if ($alpha -lt 1.0) { | |
| return "rgba($r, $g, $b, $alpha)" | |
| } | |
| return "#{0:x2}{1:x2}{2:x2}" -f $r, $g, $b | |
| } | |
| function Get-WikidataExternalUrls ($qids, $PropIds, [switch]$OutSparql) { | |
| $qids = $qids -replace '.*/' | % {"wd:$_"} | select -unique | |
| $blocksToUnion = $propIds | ? {$_ -like 'P*'} | % { | |
| $propid = $_ | |
| @" | |
| { | |
| ?q wdt:$propid ?$propid`Id. | |
| wd:$propid wdt:P1630 ?$propid`Fmt. | |
| BIND(IRI(REPLACE(?$propid`Id, "(^.*)", ?$propid`Fmt)) AS ?web) | |
| } | |
| "@ | |
| } | |
| $propIds | ? {$_ -like '*wiki*'} | % { | |
| #$lang = $_.substring(0,2) | |
| $blocksToUnion += @" | |
| { | |
| ?w schema:about ?q; | |
| schema:isPartOf <https://$_.org/>. | |
| BIND(?w AS ?web) | |
| } | |
| "@ | |
| } | |
| $propIds | ? {$_ -like 'Q*'} | % { | |
| $blocksToUnion += @" | |
| { | |
| ?q p:P1343 ?stmt. | |
| ?stmt ps:P1343 wd:$_ . | |
| ?stmt pq:P2699 ?web. | |
| } | |
| "@ | |
| } | |
| $sparql = @" | |
| SELECT ?q ?web WHERE { | |
| VALUES ?q { $qids } | |
| $($blocksToUnion -join ' UNION ') | |
| } | |
| "@ | |
| if ($OutSparql) {return $sparql} | |
| else {return (Invoke-Sparql $sparql)} | |
| } | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| $cypher = @" | |
| MATCH (n0) | |
| ORDER BY n0.descendants DESC | |
| LIMIT 400 | |
| MATCH path = (:Taxon)-[:HAS_CHILD*0..]->(n0) | |
| UNWIND nodes(path) AS n | |
| WITH DISTINCT n | |
| RETURN n.id AS id, n.scientificName AS scientificName | |
| , n.labelRu AS labelRu, n.labelEn AS labelEn | |
| , n.descendants AS descendants | |
| , n.rank AS rank, n.phylum AS phylum | |
| ,n.qid AS qid | |
| "@ | |
| $nodes = Invoke-CypherNeo4j $cypher | |
| $cypher = @" | |
| MATCH (n) | |
| ORDER BY n.descendants DESC | |
| LIMIT 400 | |
| MATCH path = (:Taxon)-[:HAS_CHILD*0..]->(n) | |
| UNWIND relationships(path) AS r | |
| RETURN DISTINCT startNode(r).id AS source, endNode(r).id AS target | |
| "@ | |
| $edges = Invoke-CypherNeo4j $cypher | |
| $propIds = @' | |
| P6385 Krugosvet | |
| P11514 bigenc | |
| P2924 old.bigenc | |
| P1417 britannica | |
| P7836 livelib | |
| P7433 fantlab | |
| P1233 isfdb | |
| P12614 azlibru | |
| P1953 Discogs | |
| P1553 music.yandex | |
| P3151 inatur | |
| P846 gbif | |
| P830 eol | |
| ru.wikipedia | |
| commons.wikimedia | |
| ru.wikisource | |
| Q124821483 prodetlit | |
| '@ -split '\r?\n' | |
| $propIdsToAdd = $propIds | ogv -PassThru | % {($_ -split ' ')[0]} | |
| Get-WikidataExternalUrls $nodes.qid $propIdsToAdd | group {$_.q -replace '.*/'} | % { | |
| $q = $_.Name | |
| $urls = $_.Group.web | |
| $nodes | ? qid -eq $q | select -f 1 | % {Add-Member -inp $_ -NotePropertyName urls -NotePropertyValue $urls} | |
| } | |
| $nodes | ? urls | % { | |
| $n = $_ | |
| Add-Member -inp $n -NotePropertyName urlsMap -NotePropertyValue @{} | |
| $n.urls | % { | |
| $short = $_ -replace '^.*?https?://(www.)?(..).*','$2' | |
| $n.urlsMap[$short] = $_ | |
| } | |
| } | |
| $nodesDot = $nodes | % { | |
| $arr = @( | |
| $_.labelRu | |
| @{$_.scientificName=$idRoot+$_.id} | |
| $_.rank | |
| "descendants: "+$_.descendants | |
| $_.urlsMap | |
| ) | |
| $label = Get-DotNodeLabel $arr | |
| $color = $_.phylum ? (Get-ColorRgb $_.phylum -pastelFactor 0.6) : 'white' | |
| "$($_.id) [label=$label fillcolor=""$color""]" | |
| } | |
| $edgesDot = $edges | % {$_.source,$_.target -join ' -> '} | |
| $outFile = "delme.svg" | |
| $gv = 'digraph {', | |
| $nodesDot, | |
| $edgesDot, | |
| '}' | |
| $gv | dot -Gdpi=27 -Grankdir=TB -Nstyle=filled -Nshape=box -Tsvg -o $outFile | |
| start $outfile |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment