rdfa-parser
install
npm install rdfa-parser
use
let html = '\
<div vocab="http://xmlns.com/foaf/0.1/">\
<div resource="#manu" typeof="Person">\
<span property="name">Manu Sporny</span> knows\
<a property="knows" href="#alex">Alex</a> and\
<a property="knows" href="#brian">Brian</a>.\
</div>\
<div resource="#alex" typeof="Person">\
<span property="name">Alex Milowski</span> wrote the RDFa processor for this page.\
</div>\
<div resource="#brian" typeof="Person">\
<span property="name">Brian Sletten</span> wrote the syntax highlighting for the raw data.\
</div>\
</div>'
let triples = rdfaParser.parseRDFa(html);
triples data structure
{
"subject": "http://localhost/index.html",
"predicates": [
{
"predicate": {
"nominalValue": "http://www.w3.org/ns/rdfa#usesVocabulary"
},
"objects": [
{
"nominalValue": "http://xmlns.com/foaf/0.1/"
}
]
}
]
}
to get turtle triples
for (let i = 0; i < triples.length; i++) {
console.log(triples[i].toString());
}
download single website
let base = "http://booking.com";
request(base, function (error, response, html) {
let triples = rdfaParser.parseRDFa(html, base);
for (let i = 0; i < triples.length; i++) {
console.log(triples[i].toString());
}
});
crawl website
let start = "http://booking.com";
let depth = 2;
rdfaParser.crawler(start, depth, function (base) {
request(base, function (error, response, html) {
let triples = rdfaParser.parseRDFa(html, base);
for (let i = 0; i < triples.length; i++) {
console.log(triples[i].toString());
}
});
});
more
You can find the whole project including web interface, test harness and triple store here