simple/annotator/ssplit.js

import Annotator from '../annotator';
import TokenizerAnnotator from './tokenize';

/**
 * @class
 * @classdesc Class representing an WordsToSentenceAnnotator.
 *            Combines multiple {@link Token}s into sentences
 * @extends Annotator
 * @memberof CoreNLP/simple/annotator
 * @requires tokenize, ssplit
 * @see {@link https://stanfordnlp.github.io/CoreNLP/ssplit.html|WordsToSentenceAnnotator}
 */
class WordsToSentenceAnnotator extends Annotator {
  /**
   * Create an Annotator
   * @param {Object} [options] a key-value map of options, without the annotator prefix
   */
  constructor(options = {}) {
    super(
      'ssplit',
      {
        eolonly: false,
        isOneSentence: false,
        newlineIsSentenceBreak: 'never',
        boundaryMultiTokenRegex: null,
        boundaryTokenRegex: '\\.|[!?]+',
        boundariesToDiscard: null,
        htmlBoundariesToDiscard: null,
        tokenPatternsToDiscard: null,
        boundaryFollowersRegex: null,
        ...options,
      },
      [
        new TokenizerAnnotator(),
      ],
    );
  }
}

export default WordsToSentenceAnnotator;