Last active
December 29, 2015 10:39
-
-
Save MagnusEnger/7658143 to your computer and use it in GitHub Desktop.
Converting MARC to RDF, using Catmandu.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/perl | |
| use Catmandu::Importer::MARC; | |
| use Catmandu::Exporter::RDF; | |
| use Modern::Perl; | |
| my $records = Catmandu::Importer::MARC->new( | |
| file => 'sample_records.marcxml', | |
| type => 'XML', | |
| ); | |
| my $exporter = Catmandu::Exporter::RDF->new( | |
| type => 'Turtle', | |
| fix => 'marc2rdf.fix' | |
| ); | |
| $exporter->add_many( $records ); | |
| $exporter->commit; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| marc_map( '100a','dc:creator' ); | |
| marc_map( '245a','dc:title' ); | |
| remove_field( 'record' ); | |
| prepend("_id","http://example.org/id/"); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| $ perl catmandu_marc2rdf.pl | |
| <http://example.org/id/960778950> <http://purl.org/dc/elements/1.1/creator> "Wall, Larry" ; | |
| <http://purl.org/dc/elements/1.1/title> "Programming Perl" . | |
| <http://example.org/id/11447981x> <http://purl.org/dc/elements/1.1/creator> "DuCharme, Bob" ; | |
| <http://purl.org/dc/elements/1.1/title> "Learning SPARQL" . |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <collection xmlns="http://www.loc.gov/MARC21/slim"> | |
| <record> | |
| <leader>01039nam a2200397 a 4500</leader> | |
| <controlfield tag="001">960778950</controlfield> | |
| <controlfield tag="008"> r eng </controlfield> | |
| <datafield tag="020" ind1=" " ind2=" "> | |
| <subfield code="a">1565921496</subfield> | |
| <subfield code="b">h.</subfield> | |
| </datafield> | |
| <datafield tag="060" ind1="k" ind2=" "> | |
| <subfield code="a">QA 76</subfield> | |
| </datafield> | |
| <datafield tag="060" ind1="a" ind2=" "> | |
| <subfield code="a">QA 75.1</subfield> | |
| </datafield> | |
| <datafield tag="080" ind1="u" ind2="k"> | |
| <subfield code="a">004.43</subfield> | |
| </datafield> | |
| <datafield tag="080" ind1="u" ind2="b"> | |
| <subfield code="a">681.3.04</subfield> | |
| </datafield> | |
| <datafield tag="080" ind1="u" ind2="x"> | |
| <subfield code="a">681.3.04</subfield> | |
| </datafield> | |
| <datafield tag="080" ind1="y" ind2="f"> | |
| <subfield code="a">681.3.04Perl</subfield> | |
| </datafield> | |
| <datafield tag="080" ind1="a" ind2=" "> | |
| <subfield code="a">681.3.04PERL</subfield> | |
| </datafield> | |
| <datafield tag="082" ind1="u" ind2="p"> | |
| <subfield code="a">005.43</subfield> | |
| </datafield> | |
| <datafield tag="082" ind1="d" ind2=" "> | |
| <subfield code="a">005.133</subfield> | |
| </datafield> | |
| <datafield tag="082" ind1="u" ind2="a"> | |
| <subfield code="a">005.133</subfield> | |
| </datafield> | |
| <datafield tag="082" ind1="x" ind2="d"> | |
| <subfield code="a">005.133</subfield> | |
| </datafield> | |
| <datafield tag="082" ind1="u" ind2="f"> | |
| <subfield code="a">005.133</subfield> | |
| </datafield> | |
| <datafield tag="082" ind1="u" ind2="k"> | |
| <subfield code="a">005.133</subfield> | |
| </datafield> | |
| <datafield tag="082" ind1="x" ind2="h"> | |
| <subfield code="a">005.133</subfield> | |
| </datafield> | |
| <datafield tag="082" ind1="u" ind2="x"> | |
| <subfield code="a">005.133</subfield> | |
| </datafield> | |
| <datafield tag="082" ind1="v" ind2="l"> | |
| <subfield code="a">005.133</subfield> | |
| </datafield> | |
| <datafield tag="082" ind1="c" ind2=" "> | |
| <subfield code="a">005.71</subfield> | |
| </datafield> | |
| <datafield tag="100" ind1=" " ind2=" "> | |
| <subfield code="a">Wall, Larry</subfield> | |
| </datafield> | |
| <datafield tag="245" ind1=" " ind2=" "> | |
| <subfield code="a">Programming Perl</subfield> | |
| <subfield code="c">Larry Wall, Tom Christiansen and Randal L. Schwartz ; with Stephen Potter</subfield> | |
| </datafield> | |
| <datafield tag="250" ind1=" " ind2=" "> | |
| <subfield code="a">2nd ed.</subfield> | |
| </datafield> | |
| <datafield tag="260" ind1=" " ind2=" "> | |
| <subfield code="b">O'Reilly</subfield> | |
| <subfield code="c">1996</subfield> | |
| <subfield code="a">Bonn</subfield> | |
| </datafield> | |
| <datafield tag="300" ind1=" " ind2=" "> | |
| <subfield code="a">XXI, 646 s.</subfield> | |
| </datafield> | |
| <datafield tag="500" ind1=" " ind2=" "> | |
| <subfield code="a">"Covers Perl 5" - Omslaget</subfield> | |
| </datafield> | |
| <datafield tag="700" ind1=" " ind2=" "> | |
| <subfield code="a">Christiansen, Tom</subfield> | |
| </datafield> | |
| <datafield tag="700" ind1=" " ind2=" "> | |
| <subfield code="a">Schwartz, Randal L.</subfield> | |
| </datafield> | |
| <datafield tag="942" ind1=" " ind2=" "> | |
| <subfield code="c">BK</subfield> | |
| </datafield> | |
| <datafield tag="999" ind1=" " ind2=" "> | |
| <subfield code="c">1</subfield> | |
| <subfield code="d">1</subfield> | |
| </datafield> | |
| <datafield tag="952" ind1=" " ind2=" "> | |
| <subfield code="w">2011-08-23</subfield> | |
| <subfield code="r">2011-08-23</subfield> | |
| <subfield code="4">0</subfield> | |
| <subfield code="0">0</subfield> | |
| <subfield code="6">005_430000000000000</subfield> | |
| <subfield code="9">1</subfield> | |
| <subfield code="b">BIB</subfield> | |
| <subfield code="1">0</subfield> | |
| <subfield code="o">005.43</subfield> | |
| <subfield code="d">2011-08-23</subfield> | |
| <subfield code="7">0</subfield> | |
| <subfield code="2">ddc</subfield> | |
| <subfield code="y">BK</subfield> | |
| <subfield code="a">BIB</subfield> | |
| </datafield> | |
| </record> | |
| <record> | |
| <leader>00580nam a22001692a 4500</leader> | |
| <controlfield tag="001">11447981x</controlfield> | |
| <controlfield tag="008"> r eng </controlfield> | |
| <datafield tag="020" ind1=" " ind2=" "> | |
| <subfield code="a">9781449306595</subfield> | |
| <subfield code="b">h.</subfield> | |
| </datafield> | |
| <datafield tag="100" ind1=" " ind2=" "> | |
| <subfield code="a">DuCharme, Bob</subfield> | |
| </datafield> | |
| <datafield tag="245" ind1=" " ind2=" "> | |
| <subfield code="a">Learning SPARQL</subfield> | |
| <subfield code="b">querying and updating with SPARQL 1.1</subfield> | |
| <subfield code="c">Bob DuCharme</subfield> | |
| </datafield> | |
| <datafield tag="260" ind1=" " ind2=" "> | |
| <subfield code="a">Beijing</subfield> | |
| <subfield code="b">O'Reilly Media</subfield> | |
| <subfield code="c">2011</subfield> | |
| </datafield> | |
| <datafield tag="300" ind1=" " ind2=" "> | |
| <subfield code="a">xv, 235 s.</subfield> | |
| <subfield code="b">ill.</subfield> | |
| </datafield> | |
| <datafield tag="650" ind1=" " ind2=" "> | |
| <subfield code="a">Semantic web</subfield> | |
| </datafield> | |
| <datafield tag="942" ind1=" " ind2=" "> | |
| <subfield code="c">BK</subfield> | |
| </datafield> | |
| <datafield tag="999" ind1=" " ind2=" "> | |
| <subfield code="c">2</subfield> | |
| <subfield code="d">2</subfield> | |
| </datafield> | |
| <datafield tag="952" ind1=" " ind2=" "> | |
| <subfield code="w">2011-10-26</subfield> | |
| <subfield code="r">2011-10-26</subfield> | |
| <subfield code="4">0</subfield> | |
| <subfield code="0">0</subfield> | |
| <subfield code="9">2</subfield> | |
| <subfield code="b">BIB</subfield> | |
| <subfield code="1">0</subfield> | |
| <subfield code="d">2011-10-26</subfield> | |
| <subfield code="7">0</subfield> | |
| <subfield code="c">STAFF</subfield> | |
| <subfield code="2">ddc</subfield> | |
| <subfield code="y">BK</subfield> | |
| <subfield code="a">BIB</subfield> | |
| </datafield> | |
| <datafield tag="952" ind1=" " ind2=" "> | |
| <subfield code="w">2011-11-22</subfield> | |
| <subfield code="r">2011-11-22</subfield> | |
| <subfield code="4">0</subfield> | |
| <subfield code="0">0</subfield> | |
| <subfield code="9">8</subfield> | |
| <subfield code="b">BIB</subfield> | |
| <subfield code="1">0</subfield> | |
| <subfield code="d">2011-11-22</subfield> | |
| <subfield code="7">0</subfield> | |
| <subfield code="c">HD</subfield> | |
| <subfield code="2">ddc</subfield> | |
| <subfield code="y">EBK</subfield> | |
| <subfield code="a">BIB</subfield> | |
| </datafield> | |
| </record> | |
| </collection> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Yup :). The _id contains the subject of your triples. I'm very happy with works for you!