/*
* Copyright (c) 1997-2024 IDRsolutions (https://www.idrsolutions.com)
*/
package org.jpedal.examples.images;
import com.idrsolutions.image.utility.SupportedFormats;
import org.jpedal.PdfDecoderServer;
import org.jpedal.color.ColorSpaces;
import org.jpedal.constants.JPedalSettings;
import org.jpedal.constants.PageInfo;
import org.jpedal.exception.PdfException;
import org.jpedal.fonts.FontMappings;
import org.jpedal.io.ColorSpaceConvertor;
import org.jpedal.utils.LogWriter;
import java.awt.image.BufferedImage;
import java.io.File;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
/**
* <h2>Image Extraction from PDF files</h2>
* <br>
* This class provides a simple Java API to convert pages in a PDF files into images and also
* a static convenience method if you just want to dump all the pages as images from a PDF file
* or directory containing PDF files<br>
* <br>
* See our Support Page for <a href="https://support.idrsolutions.com/jpedal/tutorials/convert-images/convert-pdf-to-bufferedimage"> Examples on Convert PDF pages to Images</a> <br>
* There is a simpler example <a href="https://javadoc.idrsolutions.com/org/jpedal/examples/images/ConvertPagesToImages.html"> (org.jpedal.examples.images.ConvertPagesToImages)</a> for producing images of pages if extra features not needed<br>
* <br>
*/
public final class ConvertPagesToHiResImages extends BaseImageExtraction {
@SuppressWarnings("unused")
public static void main(final String[] args) {
if (args != null && args.length > 1) {
try {
if (args.length == 2) {
writeAllPagesAsHiResImagesToDir(args[0], args[1], "png");
} else {
writeAllPagesAsHiResImagesToDir(args[0], args[1], args[2]);
}
} catch (final PdfException ex) {
throw new RuntimeException(ex.getMessage());
}
} else if (args == null) {
System.out.println("null arguments entered");
} else {
System.out.println("wrong arguments entered");
final StringBuilder arguments = new StringBuilder();
for (final String arg : args) {
arguments.append(arg).append('\n');
}
System.out.println("you entered:\n" + arguments + "as the arguments");
}
}
/**
* static method to write out all pages in a PDF files or directory of PDF files as images
* Not for use with other image conversion methods in multi-threaded environments.
* This method utilises some variables that may impact image conversion taking place on other threads.
*
* @param inputDir directory of files to convert
* @param outputDir directory of output
* @param format format of images
* @throws org.jpedal.exception.PdfException PdfException
*/
public static void writeAllPagesAsHiResImagesToDir(final String inputDir, final String outputDir, final String format) throws PdfException {
/*
* this process is very flaxible to we create a Map and pass in values to select what sort
* of results we want. There is a choice between methods used and image size. Larger images use more
* memory and are slower but look better
*/
final Map<Integer, Object> mapValues = new HashMap<>();
/* USEFUL OPTIONS*/
//do not scale above this figure
mapValues.put(JPedalSettings.EXTRACT_AT_BEST_QUALITY_MAXSCALING, 2);
//alternatively secify a page size (aspect ratio preserved so will do best fit)
//set a page size (JPedal will put best fit to this)
mapValues.put(JPedalSettings.EXTRACT_AT_PAGE_SIZE, new String[]{"2000", "1600"});
//which takes priority (default is false)
mapValues.put(JPedalSettings.PAGE_SIZE_OVERRIDES_IMAGE, Boolean.TRUE);
writeAllPagesAsHiResImagesToDir(inputDir, outputDir, format, mapValues);
}
/**
* static method to write out all pages in a PDF files or directory of PDF files as images
* Not for use with other image conversion methods in multi-threaded environments.
* This method utilises some variables that may impact image conversion taking place on other threads.
*
* @param inputDir directory of files to convert
* @param outputDir directory of output
* @param format format of images
* @param mapValues Map of KeyPair values from <a href="https://files.idrsolutions.com/maven/site/jpedal/apidocs/org/jpedal/constants/JPedalSettings.html">JPedalSettings</a> <br>
* @throws org.jpedal.exception.PdfException PdfException
*/
public static void writeAllPagesAsHiResImagesToDir(final String inputDir, final String outputDir, final String format, final Map<Integer, Object> mapValues) throws PdfException {
if (SupportedFormats.hasEncoderSupportForImageFormat(format)) {
PdfDecoderServer.modifyJPedalParameters(mapValues);
final ConvertPagesToHiResImages convert = new ConvertPagesToHiResImages(inputDir);
convert.setup(format, outputDir);
convert.processFiles(inputDir);
convert.closePDFfile();
} else {
throw new RuntimeException("Unknown image format - " + format);
}
}
/**
* Sets up an ConvertPagesToHiResImages instance to open a PDF File
*
* @param fileName full path to a single PDF file
*/
public ConvertPagesToHiResImages(final String fileName) {
super(fileName);
init();
}
/**
* Sets up an ConvertPagesToHiResImages instance to open a PDF file contained as a BLOB within a byte[] stream
*
* @param byteArray pdf file data
*/
public ConvertPagesToHiResImages(final byte[] byteArray) {
super(byteArray);
init();
}
/**
* main constructor to convert PDF to img
*
* @param pdfFile reference to pdf file data
* @throws PdfException if problem with parsing PDF file
*/
@Override
void decodeFile(final String pdfFile) throws PdfException {
if (openPDFFile()) {
/*
* allow output to multiple images with different values on each
*
* Note we REMOVE shapes as it is a new feature and we do not want to break existing functions
*/
final String separation = System.getProperty("org.jpedal.separation");
if (separation != null) {
Object[] sepValues = {7, "", Boolean.FALSE}; //default of normal
if ("all".equals(separation)) {
sepValues = new Object[]{PdfDecoderServer.RENDERIMAGES, "image_and_shapes", Boolean.FALSE,
PdfDecoderServer.RENDERIMAGES + PdfDecoderServer.REMOVE_RENDERSHAPES, "image_without_shapes", Boolean.FALSE,
PdfDecoderServer.RENDERTEXT, "text_and_shapes", Boolean.TRUE,
7, "all", Boolean.FALSE,
PdfDecoderServer.RENDERTEXT + PdfDecoderServer.REMOVE_RENDERSHAPES, "text_without_shapes", Boolean.TRUE
};
}
final int sepCount = sepValues.length;
for (int seps = 0; seps < sepCount; seps += 3) {
decode_pdf.setRenderMode((Integer) sepValues[seps]);
extractPagesAsImages(output_dir, imageType, "_" + sepValues[seps + 1], (Boolean) sepValues[seps + 2]); //boolean makes last transparent so we can see white text
}
} else { //just get the page
extractPagesAsImages(output_dir, imageType, "", false);
}
}
}
/**
* actual conversion of a PDF page into an image
*
* @param fileType image output format
* @param outputPath location for output of image
* @param prefix file name prefix
* @param isTransparent flag to show if image is transparent
* @throws PdfException if problem with extracting images from PDF file
*/
private void extractPagesAsImages(final String outputPath, final String fileType, final String prefix, final boolean isTransparent) throws PdfException {
//create a directory if it doesn't exist
final File output_path = new File(output_dir);
if (!output_path.exists()) {
output_path.mkdirs();
}
//page range
final int start = 1;
final int end = getPageCount();
/*
* set of JVM flags which allow user control on process
*/
//////////////////TIFF OPTIONS/////////////////////////////////////////
final String multiPageFlag = System.getProperty("org.jpedal.multipage_tiff");
final boolean isSingleOutputFile = "true".equalsIgnoreCase(multiPageFlag);
final String tiffFlag = System.getProperty("org.jpedal.compress_tiff");
final boolean compressTiffs = "true".equalsIgnoreCase(tiffFlag);
setJPEGCompression();
///////////////////////////////////////////////////////////////////////
for (int pageNo = start; pageNo < end + 1; pageNo++) {
/*
* If you are using decoder.getPageAsHiRes() after passing additional parameters into JPedal using the static method
* PdfDecoder.modifyJPedalParameters(), then getPageAsHiRes() wont necessarily be thread safe. If you want to use
* getPageAsHiRes() and pass in additional parameters, in a thread safe mannor, please use the method
* getPageAsHiRes(int pageIndex, Map params) or getPageAsHiRes(int pageIndex, Map params, boolean isTransparent) and
* pass the additional parameters in directly to the getPageAsHiRes() method without calling PdfDecoder.modifyJPedalParameters()
* first.
*
* Please see org/jpedal/examples/images/ConvertPagesToImages.java.html for more details on how to use HiRes image conversion
*/
BufferedImage imageToSave = getPageAsHiResImage(pageNo, isTransparent, null);
decode_pdf.flushObjectValues(true);
//image needs to be sRGB for JPEG
if ("jpg".equals(fileType)) {
imageToSave = ColorSpaceConvertor.convertToRGB(imageToSave);
}
final String outputFileName;
if (isSingleOutputFile) {
outputFileName = outputPath + "allPages" + prefix + '.' + fileType;
} else {
/*
* create a name with zeros for if more than 9 pages appears in correct order
*/
outputFileName = outputPath + "page" + getPageName(end, pageNo) + prefix + '.' + fileType;
}
//if just gray we can reduce memory usage by converting image to Grayscale
/*
* see what Colorspaces used and reduce image if appropriate
* (only does Gray at present)
*
* Can return null value if not sure
*/
final Iterator<Integer> colorspacesUsed = decode_pdf.getPageInfo(PageInfo.COLORSPACES);
int nextID;
boolean isGrayOnly = colorspacesUsed != null; //assume true and disprove
while (colorspacesUsed != null && colorspacesUsed.hasNext()) {
nextID = colorspacesUsed.next();
if (nextID != ColorSpaces.DeviceGray && nextID != ColorSpaces.CalGray) {
isGrayOnly = false;
}
}
//draw onto GRAY image to reduce colour depth
//(converts ARGB to gray)
if (isGrayOnly) {
final BufferedImage image_to_save2 = new BufferedImage(imageToSave.getWidth(), imageToSave.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
image_to_save2.getGraphics().drawImage(imageToSave, 0, 0, null);
imageToSave = image_to_save2;
}
//we save the image out here
if (imageToSave != null) {
try {
saveImage(fileType, isSingleOutputFile, compressTiffs, pageNo, imageToSave, outputFileName);
} catch (final Exception e) {
LogWriter.writeLog("Unable to write out image " + e.getMessage());
}
imageToSave.flush();
}
}
}
private static StringBuilder getPageName(final int end, final int pageNo) {
final StringBuilder pageAsString = new StringBuilder(String.valueOf(pageNo));
final String maxPageSize = String.valueOf(end);
final int padding = maxPageSize.length() - pageAsString.length();
for (int ii = 0; ii < padding; ii++) {
pageAsString.insert(0, '0');
}
return pageAsString;
}
public BufferedImage getPageAsHiResImage(final int page, final boolean isTransparent, final Map<Integer, Object> options) throws PdfException {
return decode_pdf.getPageAsHiRes(page, options, isTransparent);
}
private void setup(final String format, String outputDir) {
//check output dir has separator
if (!outputDir.endsWith(separator)) {
outputDir += separator;
}
imageType = format;
output_dir = outputDir;
}
@Override
void init() {
//mappings for non-embedded fonts to use
FontMappings.setFontReplacements();
type = ExtractTypes.RASTERIZED_PAGE;
super.init();
}
}
|