/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/support/
 *
 * (C) Copyright 1997-2015 IDRsolutions and Contributors.
 *
 * This file is part of JPedal/JPDF2HTML5
 *
 
 *
 * ---------------
 * ConvertPagesToImage.java
 * ---------------
 */

package org.jpedal.examples.images;

import com.idrsolutions.image.tiff.TiffEncoder;
import org.jpedal.*;
import org.jpedal.color.ColorSpaces;
import org.jpedal.constants.PageInfo;
import org.jpedal.exception.PdfException;
import org.jpedal.fonts.FontMappings;
import org.jpedal.objects.PdfFileInformation;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.Iterator;

/**
 <h2>Image Extraction from PDF files</h2>
 *
 * This class provides a simple Java API to extract pages as images from a PDF file and also
 * a static convenience method if you just want to dump all the pages as images from a PDF file
 * or directory containing PDF files<p>
 *
 <h3>Example 1 - access API methods</h3>
 <pre><code>ConvertPagesToImages extract=new ConvertPagesToImages("C:/pdfs/mypdf.pdf");
 * //extract.setPassword("password");
 * if (extract.openPDFFile()) {
 *     int pageCount=extract.getPageCount();
 *     for (int page=1; page&lt;=pageCount; page++) {
 *
 *        BufferedImage image=extract.getPageAsImage(page, isBackgroundTransparent);
 *     }
 * }
 *
 * extract.closePDFfile();</code></pre>
 *
 <h3>Example 2 - convenience static method</h3>
 * Extract all pages as images<p> 
 *
 <pre><code>ConvertPagesToImages.writeAllPagesAsImagesToDir("pdfs", "images" , "png", 1.33f);</code></pre>
 
 <h3>Example 3 - Access directly from the Jar</h3>
 * ConvertPagesToImages can run from jar directly using the command and will extract all files from a PDF file or directory
 * to a defined output directory:<p>
 *
 <code>java -cp libraries_needed org/jpedal/examples/images/ConvertPagesToImages inputValues</code><p>
 *
 * Where inputValues is 4 values:
 <ul>
 <li>First value:  The PDF filename (including the path if needed) or a directory containing PDF files. If it contains spaces it must be enclosed by double quotes (ie "C:/Path with spaces/").</li>
 <li>Second value: The location to write out images extracted from the PDF file or files. If it contains spaces it must be enclosed by double quotes (ie "C:/Path with spaces/").</li>
 <li>Third value:  This indicates the required output image type (default is png if nothing specified). Options are tiff, png, jpg.</li>
 <li>Fourth value: Scaling of page 1.33f gives same size as page appears in Acrobat at 100%</li>
 </ul>
 *
 * There is another example (org.jpedal.examples.images.ConvertPagesToHiResImages) for producing higher res images of pages (but likely to be slower).
 <p><a href="http://www.idrsolutions.com/how-to-convert-pdf-files-to-image">Click here for a list of code examples to convert images</a></p>
 */
public class ConvertPagesToImages extends BaseImageExtraction{
    
    /**
     * show if image transparent
     */
    boolean isTransparent;
    
    /**use 96 dpi as default so pages correct size (72 will be smaller)*/
    private float pageScaling =1.33f;
    
    /** holding all creators that produce OCR pdf's */
    private final String[] ocr = {"TeleForm"};
    
    /**used as part of test to limit pages to first 10 - please do not use */
    public static boolean isTest;
    
    //
    
    //only used if between 0 and 1
    private float JPEGcompression=-1f;
    
    /**
     * convenience static method to convert PDF file or directory of files
     *
     */
    public static void writeAllPagesAsImagesToDir(String inputDir, String outDir, String format,float pageScalingthrows PdfException{
      
        ConvertPagesToImages convert=new ConvertPagesToImages(inputDir);
        
        convert.setup(format, outDir, pageScaling);
        
        convert.processFiles(inputDir);
        
        convert.closePDFfile();
    }
    
    void setup(String format, String outDir, float pageScalingthrows PdfException, RuntimeException {
        
        //check output dir has separator
        if (!outDir.endsWith(separator)) {
            outDir += separator;
        }
        
        this.imageType = format;
        this.output_dir=outDir;
        this.pageScaling = pageScaling;
        
        //check output dir has separator
        if (!user_dir.endsWith(separator)) {
            user_dir += separator;
        }
    }

    @Override
    void processFiles(String inputDirthrows PdfException, RuntimeException {
        
        //check file exists
        final File pdf_file = new File(inputDir);
        //if file exists, open and get number of pages
        if (!pdf_file.exists()) {
            throw new RuntimeException("File " + pdf_file + " not found");
        }
       
        //check output dir has separator
        if (!user_dir.endsWith(separator)) {
            user_dir += separator;
        }
        /**
         * if file name ends pdf, do the file otherwise
         * do every pdf file in the directory. We already know file or
         * directory exists so no need to check that, but we do need to
         * check its a directory
         */
        if (inputDir.toLowerCase().endsWith(".pdf")) {
            
            try {
                decodeFile(inputDir);
            catch (final Exception e) {
                //
                System.err.println("8.Exception " + e + " in pdf code in " + inputDir);
            }
        else {
            /**
             * get list of files and check directory
             */
            
            String[] files = null;
            final File inputFiles;
            /**make sure name ends with a deliminator for correct path later*/
            if (!inputDir.endsWith(separator)) {
                inputDir += separator;
            }
            try {
                inputFiles = new File(inputDir);
                if (!inputFiles.isDirectory()) {
                    System.err.println(inputDir + " is not a directory. Exiting program");
                else {
                    files = inputFiles.list();
                }
            }catch (final Exception ee) {
                throw new PdfException("Exception trying to access file " + ee.getMessage());
                
            }
            if (files!=null) {
                /**now work through all pdf files*/
                for (final String file : files) {
                    if (file.toLowerCase().endsWith(".pdf")) {
                        
                        try {
                            decodeFile(inputDir + file);
                        catch (final Exception e1) {
                            //
                            System.err.println("8.Exception " + e1 + " in pdf code in " + inputDir);
                        }
                    }
                }
            }
        }
    }

    /** Sets up an ConvertPagesToImages instance to open a PDF File
     @param fileName full path to a single PDF file
     */
    public ConvertPagesToImagesString fileName )
    {
          super(fileName);  
          
          init();
    }
    
    /** Sets up an ConvertPagesToImages instance to open  a PDF file contained as a BLOB within a byte[] stream
     *
     @param byteArray
     */
    public ConvertPagesToImages(final byte[] byteArray )
    {
        super(byteArray);
        
        init();
        
    }
    
    
    /**
     * routine to decode a file
     */
    @Override
    void decodeFile(final String file_namethrows PdfException {
        
        /**get just the name of the file without
         * the path to use as a sub-directory
         */
        
        String name = "demo"//set a default just in case
        
        int pointer = file_name.lastIndexOf(separator);
        
        if(pointer==-1) {
            pointer = file_name.lastIndexOf('/');
        }
        
        if (pointer != -1){
            name = file_name.substring(pointer + 1, file_name.length() 4);
        }else if((!ConvertPagesToImages.isTest)&&(file_name.toLowerCase().endsWith(".pdf"))){
            name=file_name.substring(0,file_name.length()-4);
        }
        
        //fix for odd files on Linux created when you view pages
        if(name.startsWith(".")) {
            return;
        }
        
        //create output dir for images
        if(output_dir==null) {
            output_dir = user_dir + "thumbnails" + separator ;
        }       
            
            //true as we are rendering page
            decode_pdf.setExtractionMode(0, pageScaling);
            //don't bother to extract text and images
        
            fileName=file_name;
        /**
         * extract data from pdf (if allowed).
         */
        if(openPDFFile()){
           
            //create a directory if it doesn't exist
            final File output_path = new Fileoutput_dir );
            if(!output_path.exists()) {
                output_path.mkdirs();
            }
            
                         /**
             * allow output to multiple images with different values on each
             *
             * Note we REMOVE shapes as it is a new feature and we do not want to break existing functions
             */
            final String separation=System.getProperty("org.jpedal.separation");
            if(separation!=null){
                
                Object[] sepValues= {7,"",Boolean.FALSE}//default of normal
                if(separation.equals("all")){
                    sepValues=new Object[]{PdfDecoderServer.RENDERIMAGES,"image_and_shapes",Boolean.FALSE,
                        PdfDecoderServer.RENDERIMAGES + PdfDecoderServer.REMOVE_RENDERSHAPES,"image_without_shapes",Boolean.FALSE,
                        PdfDecoderServer.RENDERTEXT,"text_and_shapes",Boolean.TRUE,
                        7,"all",Boolean.FALSE,
                        PdfDecoderServer.RENDERTEXT + PdfDecoderServer.REMOVE_RENDERSHAPES,"text_without_shapes",Boolean.TRUE
                    };
                }
                
                final int sepCount =sepValues.length;
                for(int seps=0;seps<sepCount;seps += 3){
                    
                    decode_pdf.setRenderMode((IntegersepValues[seps]);
                    extractPagesAsImages(file_name, output_dir, name+ '_' +sepValues[seps+1](BooleansepValues[seps + 2])//boolean makes last transparent so we can see white text
                    
                }
                
            }else {
                //just get the page
                extractPagesAsImages(file_name, output_dir, name, isTransparent);
            }
        }
        
        closePDFfile();
        
    }
    
    private void extractPagesAsImages(final String file_name, final String output_dir, final String name, final boolean isTransparent) {
        
        //create a directory if it doesn't exist
        final File output_path = new File(output_dir);
        if (!output_path.exists()) {
            output_path.mkdirs();
        }
        
        final String multiPageFlag=System.getProperty("org.jpedal.multipage_tiff");
        final boolean isSingleOutputFile=multiPageFlag!=null && multiPageFlag.equalsIgnoreCase("true");
        
        //allow user to specify value
        final String rawJPEGComp=System.getProperty("org.jpedal.compression_jpeg");
        if(rawJPEGComp!=null){
            try{
                JPEGcompression=Float.parseFloat(rawJPEGComp);
            }catch(final Exception e){
                e.printStackTrace();
            }
            if(JPEGcompression<|| JPEGcompression>1) {
                throw new RuntimeException("Invalid value for JPEG compression - must be between 0 and 1");
            }
            
        }
        
        final String tiffFlag=System.getProperty("org.jpedal.compress_tiff");
        final String jpgFlag=System.getProperty("org.jpedal.jpeg_dpi");
        final boolean compressTiffs = tiffFlag!=null && tiffFlag.equalsIgnoreCase("true");
        
        //page range
        final int start = 1;
        int end =getPageCount();

        //limit to 1st ten pages in testing
        if((end>10)&&(isTest)) {
            end=10;
        }
    
        /**
         * extract data from pdf and then write out the pages as images
         */
        try {
            
            
            //
                
                for (int page = start; page < end + 1; page++) {
                    getPage(output_dir, name, isTransparent, isSingleOutputFile,rawJPEGComp, jpgFlag, compressTiffs, start, end, page);
                }
                
                //
        catch (final Exception e) {
            
            throw new RuntimeException("Exception " + e.getMessage()+" with thumbnails on File="+file_name);
        }
    }
    
    private void getPage(final String output_dir, final String name, final boolean isTransparent,
            final boolean isSingleOutputFile, final String rawJPEGComp, final String jpgFlag,
            final boolean compressTiffs, final int start, final int end, final int page
            throws PdfException, IOException {
        //read pages
            
            /**
             * create a name with zeros for if more than 9 pages appears in correct order
             */
            String pageAsString=String.valueOf(page);
            final String maxPageSize=String.valueOf(end);
            final int padding=maxPageSize.length()-pageAsString.length();
            for(int ii=0;ii<padding;ii++) {
                pageAsString='0'+pageAsString;
            }
            
            final String image_name;
            if(isSingleOutputFile) {
                image_name =name;
            else {
                image_name =name+"_page_" + pageAsString;
            }
            
            /**
             * get PRODUCER and if OCR disable text printing
             */
            final PdfFileInformation currentFileInformation=decode_pdf.getFileInformationData();
            
            final String[] values=currentFileInformation.getFieldValues();
            final String[] fields=PdfFileInformation.getFieldNames();
            
            for(int i=0;i<fields.length;i++){
                
                if(fields[i].equals("Creator")){
                    
                    for (final String anOcr : ocr) {
                        
                        if (values[i].equals(anOcr)) {
                            
                            decode_pdf.setRenderMode(PdfDecoderServer.RENDERIMAGES);
                            
                        }
                    }
                }
            }
            
            /**
             * get the current page as a BufferedImage
             */
            BufferedImage image_to_save=getPageAsImage(page,isTransparent);
            
            if(isTransparent) {
             
                
                //java adds odd tint if you save this as JPEG which does not have transparency
                // so put as RGB on white background
                // (or save as PNG or TIFF which has transparency)
                // or just call decode_pdf.getPageAsImage(page)
                if(image_to_save!=null && imageType.toLowerCase().startsWith("jp")){
                    
                    final BufferedImage rawVersion=image_to_save;
                    
                    final int w=rawVersion.getWidth();
                    final int h=rawVersion.getHeight();
                    //blank canvas
                    image_to_save = new BufferedImage(w,h , BufferedImage.TYPE_INT_RGB);
                    
                    //
                    final Graphics2D g2 = image_to_save.createGraphics();
                    //white background
                    g2.setPaint(Color.WHITE);
                    g2.fillRect(0,0,w,h);
                    //paint on image
                    g2.drawImage(rawVersion, 00,null);
                }
            }
            
            
            //if just gray we can reduce memory usage by converting image to Grayscale
            
            /**
             * see what Colorspaces used and reduce image if appropriate
             * (only does Gray at present)
             *
             * null if JPedal unsure
             */
            final Iterator colorspacesUsed=decode_pdf.getPageInfo(PageInfo.COLORSPACES);
            
            int nextID;
            boolean isGrayOnly=colorspacesUsed!=null//assume true and disprove
            while(colorspacesUsed!=null && colorspacesUsed.hasNext()){
                nextID= (Integer) (colorspacesUsed.next());
                
                if(nextID!= ColorSpaces.DeviceGray && nextID!=ColorSpaces.CalGray) {
                    isGrayOnly=false;
                }
            }
            
            //draw onto GRAY image to reduce colour depth
            //(converts ARGB to gray)
            if(isGrayOnly){
                final BufferedImage image_to_save2=new BufferedImage(image_to_save.getWidth(),image_to_save.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
                image_to_save2.getGraphics().drawImage(image_to_save,0,0,null);
                image_to_save = image_to_save2;
            }
            
            
            //
            
            if (image_to_save != null) {
                
                //shrink the page to 50% with graphics2D transformation
                //- add your own parameters as needed
                //you may want to replace null with a hints object if you
                //want to fine tune quality.
                
                /** example 1 biliniear scaling
                 * AffineTransform scale = new AffineTransform();
                 * scale.scale(.5, .5); //50% as a decimal
                 * AffineTransformOp scalingOp =new AffineTransformOp(scale, null);
                 * image_to_save =scalingOp.filter(image_to_save, null);
                 
                 */
                
                /** example 2 bicubic scaling - better quality but slower
                 * to preserve aspect ratio set newWidth or newHeight to -1*/
                
                /**allow user to specify maximum dimension for thumbnail*/
                final String maxDimensionAsString = System.getProperty("maxDimension");
                int maxDimension = -1;
                
                if(maxDimensionAsString != null) {
                    maxDimension = Integer.parseInt(maxDimensionAsString);
                }
                
                if(maxDimension != -1){
                    int newWidth=image_to_save.getWidth();
                    int newHeight=image_to_save.getHeight();
                    
                    final Image scaledImage;
                    if(maxDimension != -&& (newWidth > maxDimension || newHeight > maxDimension)){
                        if(newWidth > newHeight){
                            newWidth = maxDimension;
                            scaledImage= image_to_save.getScaledInstance(newWidth,-1,BufferedImage.SCALE_SMOOTH);
                        else {
                            newHeight = maxDimension;
                            scaledImage= image_to_save.getScaledInstance(-1,newHeight,BufferedImage.SCALE_SMOOTH);
                        }
                    else {
                        scaledImage= image_to_save.getScaledInstance(newWidth,-1,BufferedImage.SCALE_SMOOTH);
                    }
                    
                    if(imageType.toLowerCase().startsWith("jp")) {
                        image_to_save = new BufferedImage(scaledImage.getWidth(null),scaledImage.getHeight(null, BufferedImage.TYPE_INT_RGB);
                    else {
                        image_to_save = new BufferedImage(scaledImage.getWidth(null),scaledImage.getHeight(null, BufferedImage.TYPE_INT_ARGB);
                    }
                    
                    final Graphics2D g2 = image_to_save.createGraphics();
                    
                    g2.drawImage(scaledImage, 00,null);
                }
                
                final String imageFormat = System.getProperty("org.jpedal.imageType");
                if(imageFormat!=null){
                    if(isNumber(imageFormat)){
                        final int iFormat = Integer.parseInt(imageFormat);
                        if(iFormat>-&& iFormat<14){
                            final BufferedImage tempImage = new BufferedImage(image_to_save.getWidth(), image_to_save.getHeight(), iFormat);
                            final Graphics2D g = tempImage.createGraphics();
                            g.drawImage(image_to_save, null, null);
                            
                            image_to_save = tempImage;
                        }else{
                            System.err.println("Image Type is not valid. Value should be a digit between 0 - 13 based on the BufferedImage TYPE variables.");
                        }
                    }else{
                        System.err.println("Image Type provided is not an Integer. Value should be a digit between 0 - 13 based on the BufferedImage TYPE variables.");
                    }
                }
                
                if(imageType.toLowerCase().startsWith("tif")){
                    
                    String outputFileName;
                    final boolean isFirstPage = page == start;
                    
                    TiffEncoder tiffEncoder = new TiffEncoder();
                    tiffEncoder.setCompressed(compressTiffs);
                    
                    if(isSingleOutputFile){
                        outputFileName=output_dir + image_name+".tif";
                        File file = new File(outputFileName);
                        if(isFirstPage && file.exists()){
                            file.delete();
                            file.createNewFile();
                        }
                        tiffEncoder.append(image_to_save, outputFileName);
                        
                    }else{
                        outputFileName=output_dir + pageAsString + image_name+".tif";
                        File file = new File(outputFileName);
                        BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));
                        tiffEncoder.write(image_to_save,bos);
                        bos.flush();
                        bos.close();
                    }
                    
                    
                else {
                    
                    //save image
                    decode_pdf.getObjectStore().saveStoredImage(output_dir + image_name,
                            image_to_save,
                            true,
                            false,
                            imageType);
                }
            }
            
            //flush images in case we do more than 1 page so only contains
            //images from current page
            decode_pdf.flushObjectValues(true);
            //flush any text data read
            
        }
    }
    //////////////////////////////////////////////////////////////////////////
    /**
     * main routine which checks for any files passed and runs the demo
     */
    public static void main(final String[] args) {
        
        //long start=System.currentTimeMillis();
        
        System.out.println("Simple demo to extract images from a page");
        
        //check values first and exit with info if too many
        final int count=args.length;
        final boolean failed = count>|| count==0;
        if(failed){
            
            if(count>0){
                System.out.println("wrong arguments entered");
                
                StringBuilder arguments=new StringBuilder();
                for (final String arg : args) {
                    arguments.append(arg).append('\n');
                }
                System.out.println("you entered:\n"+ arguments +"as the arguments");
            }
        }else{
            
            try {
                writeAllPagesAsImagesToDir(args[0], args[1],"png"1.33f);
                
                //System.out.println("Took="+(System.currentTimeMillis()-start)/1000);
            catch (PdfException ex) {
                throw new RuntimeException(ex.getMessage());
            }
        }
    }
    
    /**test to see if string or number*/
    private static boolean isNumber(final String value) {
        
        //assume true and see if proved wrong
        boolean isNumber=true;
        
        final int charCount=value.length();
        for(int i=0;i<charCount;i++){
            final char c=value.charAt(i);
            if((c<'0')|(c>'9')){
                isNumber=false;
                i=charCount;
            }
        }
        
        return isNumber;
    }
    
    @Override
    void init() {
        
        /**
         * font mappings
         */
        if(!isTest){
            
            //mappings for non-embedded fonts to use
            FontMappings.setFontReplacements();
            
        }
            
        type=ExtractTypes.RASTERIZED_PAGE;
        
        super.init();
        
        decode_pdf.setExtractionMode(0, pageScaling);
    }
    
    public BufferedImage getPageAsImage(int page, boolean isTransparentthrows PdfException {
        
        if(!isTransparent) {
            return decode_pdf.getPageAsImage(page);
        else//use this if you want a transparent image
            return decode_pdf.getPageAsTransparentImage(page);
            
        }
    }
}