Automatic Comparator and Extractor Tool
Compares & extracts the text from given PDF(s)
Extract the text & image from a given PDF as a whole or a particular page
Compares the 2 given PDFs by text, font style, font size, font name, image
Below configurations can be updated based on your needs.
Property | Description |
---|---|
ENABLE_FONT_VALIDATION |
Enables the font name validation if true. By default false |
ENABLE_FONT_SIZE_VALIDATION |
Enables the font size validation if true. By default false |
FETCH_IMAGES |
Extracts the images from PDF if True. By default false |
IMAGE_PATH |
Extracts and Saves the image at IMAGE_PATH. By default ./target folder |
<dependency>
<groupId>in.testonics.omni</groupId>
<artifactId>ace</artifactId>
<version>1.0.2</version>
</dependency>
compile 'in.testonics.omni:ace:1.0.2'
//Returns the outout in Json Format
System.out.println("PDF Font, Size, Type and Text Mismatch validation");
PDFCompare pdfCompare = new PDFCompare();
pdfCompare.setEnableFontValidation(true);
pdfCompare.setEnableFontSizeValidation(true);
pdfCompare.setBoldItalicValidation(true);
JSONObject jsonObject = pdfCompare.compare("PDF3.pdf", "PDF5.pdf", 1);
System.out.println(jsonObject);
public void ExtractText() throws Exception {
System.out.println("Extracting the text");
String text = pdfCompare.getFileText("PDF3.pdf",1);
System.out.println("Extracted Text from page 1 :" + text);
}
pdfCompare.setFetchImagesFlag(true);
pdfCompare.setImagesPath(".//target//");
JSONObject jsonObject = pdfCompare.getImages("PDF-File-Sample_150kB.pdf",1);
String path = ".\\src\\test\\resources\\TestData\\";
String pathExpectedPatterns = path + "ExpectedPatterns.json";
String pdf1 = path + "PDF3.pdf";
String pdf2 = path + "PDF5.pdf";
Map<String, List<String>> mapExpectedPatterns = pdfCompare.getExpectedPatterns(pathExpectedPatterns,new File(pdf1).getName());
pdfCompare.setEnableFontValidation(true);
pdfCompare.setEnableFontSizeValidation(true);
pdfCompare.setExpectedPatterns(mapExpectedPatterns);
JSONObject jsonObject = pdfCompare.compare(pdf1,pdf2,1);
System.out.println(jsonObject);
This project is Apache License 2.0 - see the LICENSE file for details
Can be found in RELEASE_NOTES.
Please, follow Code of Conduct page.