/*
 * Decompiled with CFR 0.152.
 */
package com.plpdf.util;

import com.plpdf.exceptions.PlpdfException;
import com.plpdf.smmodel.SMDocument;
import com.plpdf.smmodel.SMPage;
import com.plpdf.util.PDFTextStripper;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class PDFHighlighter
extends PDFTextStripper {
    private Writer highlighterOutput = null;
    private String[] searchedWords;
    private ByteArrayOutputStream textOS = null;
    private Writer textWriter = null;
    private static final String ENCODING = "UTF-16";

    public PDFHighlighter() throws IOException, PlpdfException {
        super(ENCODING);
        super.setLineSeparator("");
        super.setPageSeparator("");
        super.setWordSeparator("");
        super.setShouldSeparateByBeads(false);
        super.setSuppressDuplicateOverlappingText(false);
    }

    public void generateXMLHighlight(SMDocument pdDocument, String highlightWord, Writer xmlOutput) throws Exception {
        this.generateXMLHighlight(pdDocument, new String[]{highlightWord}, xmlOutput);
    }

    public void generateXMLHighlight(SMDocument pdDocument, String[] sWords, Writer xmlOutput) throws Exception {
        this.highlighterOutput = xmlOutput;
        this.searchedWords = sWords;
        this.highlighterOutput.write("<XML>\n<Body units=characters  version=2>\n<Highlight>\n");
        this.textOS = new ByteArrayOutputStream();
        this.textWriter = new OutputStreamWriter((OutputStream)this.textOS, ENCODING);
        this.writeText(pdDocument, this.textWriter);
        this.highlighterOutput.write("</Highlight>\n</Body>\n</XML>");
        this.highlighterOutput.flush();
    }

    protected void endPage(SMPage pdPage) throws IOException {
        this.textWriter.flush();
        String page = new String(this.textOS.toByteArray(), ENCODING);
        this.textOS.reset();
        if (page.indexOf("a") != -1) {
            page = page.replaceAll("a[0-9]{1,3}", ".");
        }
        int i = 0;
        while (i < this.searchedWords.length) {
            Pattern pattern = Pattern.compile(this.searchedWords[i], 2);
            Matcher matcher = pattern.matcher(page);
            while (matcher.find()) {
                int begin = matcher.start();
                int end = matcher.end();
                this.highlighterOutput.write("    <loc pg=" + (this.getCurrentPageNo() - 1) + " pos=" + begin + " len=" + (end - begin) + ">\n");
            }
            ++i;
        }
    }

    public static void main(String[] args) throws Exception {
        PDFHighlighter xmlExtractor = new PDFHighlighter();
        SMDocument doc = null;
        try {
            if (args.length < 2) {
                PDFHighlighter.usage();
            }
            String[] highlightStrings = new String[args.length - 1];
            System.arraycopy(args, 1, highlightStrings, 0, highlightStrings.length);
            doc = SMDocument.load(args[0]);
            xmlExtractor.generateXMLHighlight(doc, highlightStrings, (Writer)new OutputStreamWriter(System.out));
        }
        finally {
            if (doc != null) {
                doc.close();
            }
        }
    }

    private static void usage() {
        System.err.println("usage: java " + PDFHighlighter.class.getName() + " <pdf file> word1 word2 word3 ...");
        System.exit(1);
    }
}

