PDF Converted Image


引入pom

<dependency>
    <groupId>org.apache.pdfbox</groupId>
    <artifactId>pdfbox</artifactId>
    <version>2.0.6</version>
</dependency>

代码

  ImageIO.write(new PDFRenderer(document).renderImageWithDPI(0, 250, ImageType.RGB), "jpg", new FileOutputStream(new File("xxx.jpg")));

总体测试代码

  import java.io.File;
  import java.io.FileOutputStream;
  import java.io.IOException;
  import java.util.HashSet;
  import java.util.Set;
  
  import javax.imageio.ImageIO;
  
  import org.apache.pdfbox.pdmodel.PDDocument;
  import org.apache.pdfbox.rendering.ImageType;
  import org.apache.pdfbox.rendering.PDFRenderer;
  import org.apache.pdfbox.text.PDFTextStripper;
  import org.apache.pdfbox.text.TextPosition;
  
  public class PrintTextLocations extends PDFTextStripper {

      public PrintTextLocations() throws IOException {
          super.setSortByPosition(true);
      }
  
      public static Set<String> set = new HashSet<>();
  
      public static void main(String[] args) throws Exception {
  
          PDDocument document = null;
          
          try {
              File input = new File("xxx.pdf");
              document = PDDocument.load(input);
              PrintTextLocations printer = new PrintTextLocations();
              printer.getText(document);
              System.out.println(set.toString());
              ImageIO.write(new PDFRenderer(document).renderImageWithDPI(0, 250, ImageType.RGB), "jpg", new FileOutputStream(new File("xxx.jpg")));
  
          } finally {
              if (document != null) {
                  document.close();
              }
          }
      }
      
      @Override
      protected void processTextPosition(TextPosition text) {
          set.add(text.getFont().getName());
          System.out.println(text.getFont().getName() + " String[" + text.getXDirAdj() + "," + text.getYDirAdj() + " fs=" + text.getFontSize()
                  + " xscale=" + text.getXScale() + " height=" + text.getHeightDir() + " space=" + text.getWidthOfSpace()
                  + " width=" + text.getWidthDirAdj() + "]" + text.toString());
          super.processTextPosition(text);
      }
  }

说明

  1. 会打印出文字所需字体以及对应文字
  2. 最后会打印出PDF所有包含字体
  3. 将PDF转换为图片
  4. 如图片文字显示不出来请看warn安装对应字体

STORY