package org.semanticdesktop.aperture.extractor.works;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import org.apache.poi.xssf.usermodel.helpers.HeaderFooterHelper;
import org.ontoware.rdf2go.model.node.URI;
import org.ontoware.rdf2go.vocabulary.RDF;
import org.semanticdesktop.aperture.extractor.Extractor;
import org.semanticdesktop.aperture.extractor.ExtractorException;
import org.semanticdesktop.aperture.extractor.util.StringExtractor;
import org.semanticdesktop.aperture.rdf.RDFContainer;
import org.semanticdesktop.aperture.vocabulary.NFO;
import org.semanticdesktop.aperture.vocabulary.NIE;

/* loaded from: input_file:WEB-INF/lib/aperture-1.2.0.jar:org/semanticdesktop/aperture/extractor/works/WorksExtractor.class */
public class WorksExtractor implements Extractor {

    /* loaded from: input_file:WEB-INF/lib/aperture-1.2.0.jar:org/semanticdesktop/aperture/extractor/works/WorksExtractor$WorksStringExtractor.class */
    private static class WorksStringExtractor extends StringExtractor {
        private static final int MSWORKS_DOCUMENT = 0;
        private static final int MSWORKS_SPREADSHEET = 1;
        private boolean endOfDocumentReached;
        private int okayTrimmedLineCount;
        private int worksType;

        private WorksStringExtractor() {
            this.endOfDocumentReached = false;
            this.okayTrimmedLineCount = 0;
            this.worksType = 0;
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.semanticdesktop.aperture.extractor.util.StringExtractor
        public boolean isTextCharacter(int i) {
            return super.isTextCharacter(i) || i == 0;
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.semanticdesktop.aperture.extractor.util.StringExtractor
        public boolean isStartLine(String str) {
            if ("gtt".equals(str)) {
                return true;
            }
            return super.isStartLine(str);
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.semanticdesktop.aperture.extractor.util.StringExtractor
        public boolean isValidLine(String str) {
            if ("microsoft works".equals(str) || "msworkswpdoc".equals(str)) {
                return false;
            }
            return super.isValidLine(str);
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.semanticdesktop.aperture.extractor.util.StringExtractor
        public String postProcessLine(String str) {
            if (this.endOfDocumentReached) {
                return null;
            }
            if (this.okayTrimmedLineCount < 4 && ((str.length() == 3 && str.endsWith(HeaderFooterHelper.HeaderFooterEntity_Time)) || str.startsWith("VT&"))) {
                this.worksType = 1;
                return null;
            }
            String postProcessLine = super.postProcessLine(str);
            if (postProcessLine == null) {
                return null;
            }
            if (this.worksType == 1) {
                if (postProcessLine.startsWith("@")) {
                    return null;
                }
                if (postProcessLine.length() < 6 && (postProcessLine.indexOf(64) >= 0 || isAllUppercase(postProcessLine))) {
                    return null;
                }
            } else if (postProcessLine.indexOf(0) >= 0) {
                if (this.okayTrimmedLineCount <= 5) {
                    return null;
                }
                this.endOfDocumentReached = true;
                return null;
            }
            if (!((postProcessLine.length() <= 2 || Character.isWhitespace(postProcessLine.charAt(1)) || postProcessLine.charAt(1) == 0) ? false : true)) {
                return null;
            }
            this.okayTrimmedLineCount++;
            if (this.worksType == 1) {
                postProcessLine = postProcessLine.replace((char) 0, ' ');
            }
            return postProcessLine;
        }

        private boolean isAllUppercase(String str) {
            int length = str.length();
            while (true) {
                int i = length;
                length = i - 1;
                if (i <= 0) {
                    return true;
                }
                char charAt = str.charAt(length);
                if (Character.isLetter(charAt) && !Character.isUpperCase(charAt)) {
                    return false;
                }
            }
        }
    }

    @Override // org.semanticdesktop.aperture.extractor.Extractor
    public void extract(URI uri, InputStream inputStream, Charset charset, String str, RDFContainer rDFContainer) throws ExtractorException {
        try {
            String trim = new WorksStringExtractor().extract(inputStream).trim();
            if (trim.length() > 0) {
                rDFContainer.add(NIE.plainTextContent, trim);
                rDFContainer.add(RDF.type, NFO.PaginatedTextDocument);
            }
        } catch (IOException e) {
            throw new ExtractorException(e);
        }
    }
}
