PDFBox
  1. PDFBox
  2. PDFBOX-1240

Extract Images from PDF file using PDFBox in C#

    Details

    • Type: New Feature New Feature
    • Status: Closed
    • Priority: Minor Minor
    • Resolution: Not a Problem
    • Affects Version/s: 0.7.3
    • Fix Version/s: None
    • Component/s: Utilities
    • Labels:
      None

      Description

      public class ExtractImage
      {
      private int imageCounter = 1;
      private static string PASSWORD = "-password";
      private static string PREFIX = "-prefix";

      public ExtractImage()
      {

      }

      public void extractImage(string []args)
      {
      if(args.Length<1||args.Length>4)

      { usage(); }

      else
      {
      string pdfFile = "";
      string password = "";
      string prefix = null;
      for (int i = 0; i < args.Length; i++)
      {
      if(args[i]==PASSWORD)
      {
      i++;
      if (i >= args.Length)

      { usage(); }
      password = args[i];
      }
      else if (args[i]==PREFIX)
      {
      i++;
      if(i>=args.Length)
      { usage(); }

      prefix = args[i];
      }
      else
      {
      if(pdfFile==null)

      { pdfFile = args[i]; }

      }
      }
      if(pdfFile==null)

      { usage(); }

      else
      {
      if(prefix==null && pdfFile.Length>4)

      { prefix = pdfFile.Substring(0, pdfFile.Length - 4); }

      PDDocument document=null;
      try
      {
      document = PDDocument.load(pdfFile);
      if( document.isEncrypted() )
      {

      StandardDecryptionMaterial spm = new StandardDecryptionMaterial(password);
      document.openProtection(spm);
      AccessPermission ap = document.getCurrentAccessPermission();

      if( ! ap.canExtractContent() )

      { Console.WriteLine("Error: You do not have permission to extract images." ); }

      }

      List pages = document.getDocumentCatalog().getAllPages();
      java.util.Iterator iter = pages.iterator();
      while( iter.hasNext() )
      {
      PDPage page = (PDPage)iter.next();
      PDResources resources = page.getResources();
      java.util.Map images = resources.getImages();
      if( images != null )
      {
      Iterator imageIter = images.keySet().iterator();
      while( imageIter.hasNext() )

      { string key = (String )imageIter.next(); PDXObjectImage image = (PDXObjectImage)images.get( key ); string name = getUniqueFileName( key, image.getSuffix() ); Console.WriteLine( "Writing image:" + name ); image.write2file( name ); }

      }
      }
      }
      catch (Exception)

      { throw; }

      }
      }
      }

      private string getUniqueFileName(string prefix, string suffix)
      {
      string uniqueName = null;
      java.io.File f = null;
      f = null;
      while (f == null || f.exists())

      { uniqueName = prefix + "-" + imageCounter; f =new File(uniqueName + "." + suffix); imageCounter++; }

      return uniqueName;
      }

      private void usage()

      { Console.Error.WriteLine("Usage: java org.pdfbox.ExtractImages [OPTIONS] <PDF file>\n" + " -password <password> Password to decrypt document\n" + " -prefix <image-prefix> Image prefix(default to pdf name)\n" + " <PDF file> The PDF document to use\n"); Environment.Exit(1); }

      }

      Reference: http://svn.apache.org/repos/asf/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/ExtractImages.java
      I tested but i have a problem.It ran if(args.Length<1||args.Length>4) and stopped in usage().It loop.Please help me.Many thanks.

      { usage(); }

        Activity

        Hide
        Andreas Lehmkühler added a comment -

        Please update to a newer version of PDFBox, 0.7.3 is really outdated. There should be an executable of ExtractImages so that you don't have to reimplement it.

        Show
        Andreas Lehmkühler added a comment - Please update to a newer version of PDFBox, 0.7.3 is really outdated. There should be an executable of ExtractImages so that you don't have to reimplement it.
        Hide
        Pham Minh Cuong added a comment -

        Yeah...I updated and extract images....Thanks Andreas Lehmkühler very much !

        Show
        Pham Minh Cuong added a comment - Yeah...I updated and extract images....Thanks Andreas Lehmkühler very much !
        Hide
        Andreas Lehmkühler added a comment -

        Thanks for coming back!

        Set to closed

        Show
        Andreas Lehmkühler added a comment - Thanks for coming back! Set to closed

          People

          • Assignee:
            Andreas Lehmkühler
            Reporter:
            Pham Minh Cuong
          • Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

            • Created:
              Updated:
              Resolved:

              Time Tracking

              Estimated:
              Original Estimate - 168h
              168h
              Remaining:
              Remaining Estimate - 168h
              168h
              Logged:
              Time Spent - Not Specified
              Not Specified

                Development