package org.cleartk.syntax.opennlp;

import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.tools.sentdetect.SentenceDetector;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.util.Span;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.util.IOUtil;
import org.cleartk.util.ParamUtil;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.descriptor.TypeCapability;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.factory.ConfigurationParameterFactory;
import org.uimafit.factory.initializable.InitializableFactory;
import org.uimafit.util.JCasUtil;

@TypeCapability(outputs = {"org.cleartk.token.type.Sentence"})
/* loaded from: input_file:org/cleartk/syntax/opennlp/SentenceAnnotator.class */
public class SentenceAnnotator extends JCasAnnotator_ImplBase {

    @ConfigurationParameter(mandatory = true, description = "provides the path of the OpenNLP sentence segmenter model file")
    private String sentenceModelPath;

    @ConfigurationParameter(mandatory = false, description = "provides an array of the annotation types that will be processed by this sentence annotator.  If the parameter is not filled, then SentenceAnnotator will process on the contents of jCas.getDocumentText().  It us up to the caller to ensure annotations do not overlap.")
    private String[] windowClassNames;

    @ConfigurationParameter(description = "class type of the sentences that are created by this annotator. If this parameter is not filled, then sentencesof type org.cleartk.type.Sentence will be created.", defaultValue = {"org.cleartk.token.type.Sentence"})
    private String sentenceTypeName;
    Class<? extends Annotation> sentenceClass;
    protected List<Class<? extends Annotation>> windowClasses;
    Constructor<? extends Annotation> sentenceConstructor;
    public static final String multipleNewlinesRegex = "\\s*\\n\\s*\\n\\s*";
    SentenceDetector sentenceDetector;
    Pattern multipleNewlinesPattern;
    Pattern leadingWhitespacePattern;
    Pattern trailingWhitespacePattern;
    public static final String PARAM_SENTENCE_MODEL_PATH = ConfigurationParameterFactory.createConfigurationParameterName(SentenceAnnotator.class, "sentenceModelPath");
    public static final String PARAM_SENTENCE_TYPE_NAME = ConfigurationParameterFactory.createConfigurationParameterName(SentenceAnnotator.class, "sentenceTypeName");
    public static final String PARAM_WINDOW_CLASS_NAMES = ConfigurationParameterFactory.createConfigurationParameterName(SentenceAnnotator.class, "windowClassNames");

    public static AnalysisEngineDescription getDescription() throws ResourceInitializationException {
        return AnalysisEngineFactory.createPrimitiveDescription(SentenceAnnotator.class, new Object[]{PARAM_SENTENCE_MODEL_PATH, ParamUtil.getParameterValue(PARAM_SENTENCE_MODEL_PATH, "/models/en-sent.bin"), PARAM_WINDOW_CLASS_NAMES, ParamUtil.getParameterValue(PARAM_WINDOW_CLASS_NAMES, (String) null)});
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        try {
            this.sentenceClass = InitializableFactory.getClass(this.sentenceTypeName, Annotation.class);
            this.sentenceConstructor = this.sentenceClass.getConstructor(JCas.class, Integer.TYPE, Integer.TYPE);
            if (this.windowClassNames != null && this.windowClassNames.length > 0) {
                this.windowClasses = new ArrayList();
                for (String str : this.windowClassNames) {
                    this.windowClasses.add(InitializableFactory.getClass(str, Annotation.class));
                }
            }
            this.sentenceDetector = new SentenceDetectorME(new SentenceModel(IOUtil.getInputStream(SentenceAnnotator.class, this.sentenceModelPath)));
            this.multipleNewlinesPattern = Pattern.compile(multipleNewlinesRegex, 40);
            this.leadingWhitespacePattern = Pattern.compile("^\\s+");
            this.trailingWhitespacePattern = Pattern.compile("\\s+$");
        } catch (Exception e) {
            throw new ResourceInitializationException(e);
        }
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        if (this.windowClasses == null) {
            processText(jCas, jCas.getDocumentText(), 0);
            return;
        }
        Iterator<Class<? extends Annotation>> it = this.windowClasses.iterator();
        while (it.hasNext()) {
            Iterator it2 = new ArrayList(JCasUtil.select(jCas, it.next())).iterator();
            while (it2.hasNext()) {
                Annotation annotation = (Annotation) it2.next();
                processText(jCas, annotation.getCoveredText(), annotation.getBegin());
            }
        }
    }

    protected void processText(JCas jCas, String str, int i) throws AnalysisEngineProcessException {
        List<Integer> sentenceOffsets = getSentenceOffsets(str);
        Matcher matcher = this.leadingWhitespacePattern.matcher(str);
        int length = matcher.find() ? 0 + matcher.group().length() : 0;
        try {
            for (Integer num : sentenceOffsets) {
                int intValue = num.intValue();
                String substring = str.substring(length, intValue);
                if (substring.trim().length() > 0) {
                    Matcher matcher2 = this.trailingWhitespacePattern.matcher(substring);
                    if (matcher2.find()) {
                        intValue -= matcher2.group().length();
                    }
                    this.sentenceConstructor.newInstance(jCas, Integer.valueOf(i + length), Integer.valueOf(i + intValue)).addToIndexes();
                }
                length = num.intValue();
            }
            if (length < str.length()) {
                String substring2 = str.substring(length, str.length());
                int length2 = str.length();
                if (substring2.trim().length() > 0) {
                    Matcher matcher3 = this.trailingWhitespacePattern.matcher(substring2);
                    if (matcher3.find()) {
                        length2 -= matcher3.group().length();
                    }
                    this.sentenceConstructor.newInstance(jCas, Integer.valueOf(i + length), Integer.valueOf(i + length2)).addToIndexes();
                }
            }
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }
    }

    private List<Integer> getSentenceOffsets(String str) {
        Matcher matcher = this.multipleNewlinesPattern.matcher(str);
        ArrayList arrayList = new ArrayList();
        while (matcher.find()) {
            arrayList.add(Integer.valueOf(matcher.end()));
        }
        for (Span span : this.sentenceDetector.sentPosDetect(str)) {
            arrayList.add(Integer.valueOf(span.getStart()));
        }
        Collections.sort(arrayList);
        return arrayList;
    }
}
