Uploaded image for project: 'PDFBox'
  1. PDFBox
  2. PDFBOX-1240

Extract Images from PDF file using PDFBox in C#

    XMLWordPrintableJSON

Details

    • New Feature
    • Status: Closed
    • Minor
    • Resolution: Not A Problem
    • 0.7.3
    • None
    • Utilities
    • None

    Description

      public class ExtractImage
      {
      private int imageCounter = 1;
      private static string PASSWORD = "-password";
      private static string PREFIX = "-prefix";

      public ExtractImage()
      {

      }

      public void extractImage(string []args)
      {
      if(args.Length<1||args.Length>4)

      { usage(); }

      else
      {
      string pdfFile = "";
      string password = "";
      string prefix = null;
      for (int i = 0; i < args.Length; i++)
      {
      if(args[i]==PASSWORD)
      {
      i++;
      if (i >= args.Length)

      { usage(); }
      password = args[i];
      }
      else if (args[i]==PREFIX)
      {
      i++;
      if(i>=args.Length)
      { usage(); }

      prefix = args[i];
      }
      else
      {
      if(pdfFile==null)

      { pdfFile = args[i]; }

      }
      }
      if(pdfFile==null)

      { usage(); }

      else
      {
      if(prefix==null && pdfFile.Length>4)

      { prefix = pdfFile.Substring(0, pdfFile.Length - 4); }

      PDDocument document=null;
      try
      {
      document = PDDocument.load(pdfFile);
      if( document.isEncrypted() )
      {

      StandardDecryptionMaterial spm = new StandardDecryptionMaterial(password);
      document.openProtection(spm);
      AccessPermission ap = document.getCurrentAccessPermission();

      if( ! ap.canExtractContent() )

      { Console.WriteLine("Error: You do not have permission to extract images." ); }

      }

      List pages = document.getDocumentCatalog().getAllPages();
      java.util.Iterator iter = pages.iterator();
      while( iter.hasNext() )
      {
      PDPage page = (PDPage)iter.next();
      PDResources resources = page.getResources();
      java.util.Map images = resources.getImages();
      if( images != null )
      {
      Iterator imageIter = images.keySet().iterator();
      while( imageIter.hasNext() )

      { string key = (String )imageIter.next(); PDXObjectImage image = (PDXObjectImage)images.get( key ); string name = getUniqueFileName( key, image.getSuffix() ); Console.WriteLine( "Writing image:" + name ); image.write2file( name ); }

      }
      }
      }
      catch (Exception)

      { throw; }

      }
      }
      }

      private string getUniqueFileName(string prefix, string suffix)
      {
      string uniqueName = null;
      java.io.File f = null;
      f = null;
      while (f == null || f.exists())

      { uniqueName = prefix + "-" + imageCounter; f =new File(uniqueName + "." + suffix); imageCounter++; }

      return uniqueName;
      }

      private void usage()

      { Console.Error.WriteLine("Usage: java org.pdfbox.ExtractImages [OPTIONS] <PDF file>\n" + " -password <password> Password to decrypt document\n" + " -prefix <image-prefix> Image prefix(default to pdf name)\n" + " <PDF file> The PDF document to use\n"); Environment.Exit(1); }

      }

      Reference: http://svn.apache.org/repos/asf/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/ExtractImages.java
      I tested but i have a problem.It ran if(args.Length<1||args.Length>4) and stopped in usage().It loop.Please help me.Many thanks.

      { usage(); }

      Attachments

        Activity

          People

            lehmi Andreas Lehmkühler
            cuongpuyol Pham Minh Cuong
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Time Tracking

                Estimated:
                Original Estimate - 168h
                168h
                Remaining:
                Remaining Estimate - 168h
                168h
                Logged:
                Time Spent - Not Specified
                Not Specified