simple/document.js

import Annotable from './annotable';
import WordsToSentenceAnnotator from './annotator/ssplit';
import Sentence from './sentence';
import CorefChain from './coref-chain';
import CorefAnnotator from './annotator/coref';

/**
 * The CoreNLP API JSON structure representing a document
 * @typedef DocumentJSON
 * @property {number} index
 * @property {Array.<Sentence>} sentences
 */

/**
 * @class
 * @classdesc Class representing a Document
 * @extends Annotable
 * @memberof CoreNLP/simple
 */
class Document extends Annotable {
  /**
   * Create a Document
   * @param {string} text
   */
  constructor(text) {
    super(text);
    this._sentences = [];
  }

  /**
   * Get a string representation
   * @return {string} document
   */
  toString() {
    return this._text || this._sentences.map(sent => sent.toString()).join('. ');
  }

  /**
   * Get a list of sentences
   * @returns {Array.<Sentence>} sentences - The document sentences
   */
  sentences() {
    if (!this.hasAnnotator(WordsToSentenceAnnotator)) {
      throw new Error('Asked for sentences on Document, but there are unmet annotator dependencies.');
    }
    return this._sentences;
  }

  /**
   * Get the sentence for a given index
   * @param {number} index - The position of the sentence to get
   * @returns {Sentence} sentence - The document sentences
   */
  sentence(index) {
    return this.sentences()[index];
  }

  /**
   * @todo Missing implementation
   * @see https://stanfordnlp.github.io/CoreNLP/dcoref.html
   * @returns {Array.<CorefChain>}
   */
  corefs() {
    if (!this.hasAnnotator(CorefAnnotator)) {
      throw new Error('Asked for corefs on Document, but there are unmet annotator dependencies.');
    }
    return this._corefs;
  }

  /**
   * Get the coreference for a given index
   * @param {number} index - 0-based index of the coref chain list
   * @see https://stanfordnlp.github.io/CoreNLP/dcoref.html
   * @returns {CorefChain}
   */
  coref(index) {
    return this.corefs()[index];
  }

  /**
   * Sets the language ISO (given by the pipeline during the annotation process)
   * This is solely to keep track of the language chosen for further analysis
   * @return {string} text
   */
  setLanguageISO(iso) {
    super.setLanguageISO(iso);
    this._sentences.forEach(sentence => sentence.setLanguageISO(iso));
  }

  /**
   * Update an instance of Document with data provided by a JSON
   * @param {DocumentJSON} data - The document data, as returned by CoreNLP API service
   * @returns {Document} document - The current document instance
   */
  fromJSON(data) {
    if (data.sentences) {
      this.addAnnotator(WordsToSentenceAnnotator);
      this._sentences = data.sentences.map(sent => Sentence.fromJSON(sent, true));
    }
    if (data.corefs) {
      this.addAnnotator(CorefAnnotator);
      this._corefs = Object.keys(data.corefs)
        .filter(chainIndex => chainIndex !== 'length')
        .map(chainIndex => CorefChain.fromJSON(data.corefs[chainIndex]).fromDocument(this));
    }
    return this;
  }

  toJSON() {
    return {
      text: this._text,
      sentences: this._sentences,
    };
  }

  /**
   * Get an instance of Document from a given JSON
   * @param {DocumentJSON} data - The document data, as returned by CoreNLP API service
   * @returns {Document} document - A new Document instance
   */
  static fromJSON(data) {
    const instance = new this();
    return instance.fromJSON(data);
  }
}

export default Document;