term-vector
A node.js module that creates a term vector from tokenized text. Use term-vector
when implementing a vector space model
Works with Unicode!
Does ngrams!
const tokens = 'this is really really really cool' // just make a simple term vector// [// { term: [ 'cool' ], positions: [ 5 ] },// { term: [ 'is' ], positions: [ 1 ] },// { term: [ 'really' ], positions: [ 2, 3, 4 ] },// { term: [ 'this' ], positions: [ 0 ] }// ] // make a term vector with ngrams of length 1 and 2// [// { term: [ 'cool' ], positions: [ 5 ] },// { term: [ 'is' ], positions: [ 1 ] },// { term: [ 'is', 'really' ], positions: [ 1 ] },// { term: [ 'really' ], positions: [ 2, 3, 4 ] },// { term: [ 'really', 'really' ], positions: [ 2, 3 ] },// { term: [ 'really', 'cool' ], positions: [ 4 ] },// { term: [ 'this' ], positions: [ 0 ] },// { term: [ 'this', 'is' ], positions: [ 0 ] }// ]