import head from 'lodash.head';
import Annotable from './annotable';
import TokenizerAnnotator from './annotator/tokenize';
import ParserAnnotator from './annotator/parse';
import DependencyParseAnnotator from './annotator/depparse';
import Token from './token';
import Governor from './governor';
/**
* The CoreNLP API JSON structure representing a sentence
* @typedef SentenceJSON
* @property {number} index - 1-based index, as they come indexed by StanfordCoreNLP
* @property {Array.<Token>} tokens
*/
/**
* @class
* @classdesc Class representing a Sentence
* @extends Annotable
* @memberof CoreNLP/simple
* @see {@link https://github.com/stanfordnlp/CoreNLP/blob/master/src/edu/stanford/nlp/simple/Sentence.java}
*/
class Sentence extends Annotable {
/**
* Create a Sentence
* @param {string} text
*/
constructor(text) {
super(text);
this._tokens = [];
this._governors = [];
}
/**
* Get a string representation
* @returns {string} sentence
*/
toString() {
return this._text || this._tokens.map(token => token.toString()).join(' ');
}
/**
* Get the index relative to the parent document
* @returns {number} index
*/
index() {
return this._index;
}
/**
* Get a string representation of the parse tree structure
* @returns {string} parse
*/
parse() {
return this._parse;
}
/**
* Get an array of string representations of the sentence words
* @requires {@link TokenizerAnnotator}
* @throws {Error} in case the require annotator was not applied to the sentence
* @returns {Array.<string>} words
*/
words() {
if (!this.hasAnnotator(TokenizerAnnotator)) {
throw new Error('Asked for words on Sentence, but there are unmet annotator dependencies.');
}
return this._tokens.map(token => token.word());
}
/**
* Get a string representations of the Nth word of the sentence
* @requires {@link TokenizerAnnotator}
* @throws {Error} in case the require annotator was not applied to the sentence
* @throws {Error} in case the token for the given index does not exists
* @param {number} index - 0-based index as they are arranged naturally
* @returns {string} word
*/
word(index) {
if (!this.hasAnnotator(TokenizerAnnotator)) {
throw new Error('Asked for a word on Sentence, but there are unmet annotator dependencies.');
}
if (!this._tokens[index]) {
throw new Error(`Sentence instance does not contain a token with index ${index}`);
}
return this._tokens[index].word();
}
[Symbol.iterator]() {
return this._tokens.values();
}
/**
* Get a string representations of the tokens part of speech of the sentence
* @returns {Array.<string>} posTags
*/
posTags() {
return this._tokens.map(token => token.pos());
}
/**
* Get a string representations of the Nth token part of speech of the sentence
* @throws {Error} in case the token for the given index does not exists
* @param {number} index - 0-based index as they are arranged naturally
* @returns {string} posTag
*/
posTag(index) {
if (!this._tokens[index]) {
throw new Error(`Sentence instance does not contain a token with index ${index}`);
}
return this._tokens[index].pos();
}
/**
* Get a string representations of the tokens lemmas of the sentence
* @returns {Array.<string>} lemmas
*/
lemmas() {
return this._tokens.map(token => token.lemma());
}
/**
* Get a string representations of the Nth token lemma of the sentence
* @throws {Error} in case the token for the given index does not exists
* @param {number} index - 0-based index as they are arranged naturally
* @returns {string} lemma
*/
lemma(index) {
if (!this._tokens[index]) {
throw new Error(`Sentence instance does not contain a token with index ${index}`);
}
return this._tokens[index].lemma();
}
/**
* Get a string representations of the tokens nerTags of the sentence
* @returns {Array.<string>} nerTags
*/
nerTags() {
return this._tokens.map(token => token.ner());
}
/**
* Get a string representations of the Nth token nerTag of the sentence
* @throws {Error} in case the token for the given index does not exists
* @param {number} index - 0-based index as they are arranged naturally
* @returns {string} nerTag
*/
nerTag(index) {
if (!this._tokens[index]) {
throw new Error(`Sentence instance does not contain a token with index ${index}`);
}
return this._tokens[index].ner();
}
/**
* Get a list of annotated governors by the dependency-parser
* @requires {@link DependencyParseAnnotator}
* @throws {Error} in case the require annotator was not applied to the sentence
* @returns {Array.<Governor>} governors
*/
governors() {
if (!this.hasAnnotator(DependencyParseAnnotator)) {
throw new Error('Asked for governors on Sentence, but there are unmet annotator dependencies.');
}
return this._governors;
}
/**
* Get the N-th annotated governor by the dependency-parser annotator
* @requires {@link DependencyParseAnnotator}
* @throws {Error} in case the require annotator was not applied to the sentence
* @returns {Governor} governor
*/
governor(index) {
if (!this.hasAnnotator(DependencyParseAnnotator)) {
throw new Error('Asked for a governor on Sentence, but there are unmet annotator dependencies.');
}
return this._governors[index];
}
// TODO
// eslint-disable-next-line class-methods-use-this, no-unused-vars
incommingDependencyLabel(index) {
}
// TODO
// eslint-disable-next-line class-methods-use-this
natlogPolarities() {
}
// TODO
// eslint-disable-next-line class-methods-use-this, no-unused-vars
natlogPolarity(index) {
}
// TODO
// eslint-disable-next-line class-methods-use-this
openie() {
}
// TODO
// eslint-disable-next-line class-methods-use-this, no-unused-vars
openieTriples(index) {
}
/**
* Get an array of token representations of the sentence words
* @requires {@link TokenizerAnnotator}
* @throws {Error} in case the require annotator was not applied to the sentence
* @returns {Array.<Token>} tokens
*/
tokens() {
if (!this.hasAnnotator(TokenizerAnnotator)) {
throw new Error('Asked for tokens on Sentence, but there are unmet annotator dependencies.');
}
return this._tokens;
}
/**
* Get the Nth token of the sentence
* @requires {@link TokenizerAnnotator}
* @throws {Error} in case the require annotator was not applied to the sentence
* @returns {Token} token
*/
token(index) {
if (!this.hasAnnotator(TokenizerAnnotator)) {
throw new Error('Asked for a token on Sentence, but there are unmet annotator dependencies.');
}
return this._tokens[index];
}
// TODO
// eslint-disable-next-line class-methods-use-this
algorithms() {
}
/**
* Sets the language ISO (given by the pipeline during the annotation process)
* This is solely to keep track of the language chosen for further analysis
* @return {string} text
*/
setLanguageISO(iso) {
super.setLanguageISO(iso);
this._tokens.forEach(token => token.setLanguageISO(iso));
}
/**
* Get a JSON representation of the current sentence
* @description
* The following arrow function `data => Sentence.fromJSON(data).toJSON()` is idempontent, if
* considering shallow comparison, not by reference.
* This JSON will respects the same structure as it expects from {@see Sentence#fromJSON}.
* @returns {SentenceJSON} data
*/
toJSON() {
let json = {
index: this._index,
tokens: this._tokens.map(token => token.toJSON()),
basicDependencies: this._governors.map(governor => governor.toJSON()),
enhancedDependencies: this._enhancedDependencies,
enhancedPlusPlusDependencies: this._enhancedPlusPlusDependencies,
};
if (this._parse) {
json = { ...json, parse: this._parse };
}
return json;
}
/**
* Update an instance of Sentence with data provided by a JSON
* @param {SentenceJSON} data - The document data, as returned by CoreNLP API service
* @param {boolean} [isSentence] - Indicate if the given data represents just the sentence
* or a full document with just a sentence inside
* @returns {Sentence} sentence - The current sentence instance
*/
fromJSON(data, isSentence = false) {
const sentence = isSentence ? data : head(data.sentences);
this._index = data.index;
if (sentence.tokens) {
this.addAnnotator(TokenizerAnnotator);
this._tokens = sentence.tokens.map(tok => Token.fromJSON(tok));
}
if (sentence.parse) {
this.addAnnotator(ParserAnnotator);
this._parse = sentence.parse;
}
if (sentence.basicDependencies) {
this.addAnnotator(DependencyParseAnnotator);
this._governors = sentence.basicDependencies.map(gov =>
new Governor(gov.dep, this._tokens[gov.dependent - 1], this._tokens[gov.governor - 1]));
// @see relation annotator...
this._basicDependencies = sentence.basicDependencies;
this._enhancedDependencies = sentence.enhancedDependencies;
this._enhancedPlusPlusDependencies = sentence.enhancedPlusPlusDependencies;
}
return this;
}
/**
* Get an instance of Sentence from a given JSON
* @param {SentenceJSON} data - The document data, as returned by CoreNLP API service
* @param {boolean} [isSentence] - Indicate if the given data represents just the sentence of a
* full document
* @returns {Sentence} document - A new Sentence instance
*/
static fromJSON(data, isSentence = false) {
const instance = new this();
return instance.fromJSON(data, isSentence);
}
}
export default Sentence;