admin 管理员组

文章数量: 887021

问题及解决

在之前的文章中提到了如何在手机上显示office文件,这里提一下pdf文件如何在手机上展示。问题发生情况是,公司企业号会从OA系统定时获取信息,并且解析内网文章中的图片、文件,展示在微信中,后台发现程序报错,为什么呢?因为文件的在线预览只处理office软件的,对于pdf文件未处理,导致程序处理时在安卓机的情况下异常,现在使用pdf2html工具进行转换,流程如下:

(苹果设备无需特殊处理,直接通过文件流设置头部信息application/msword、application/msexcel、application/pdf 内置浏览器能够识别)

相关链接:

微信或手机浏览器在线显示office文件:微信或手机浏览器在线显示office文件(已测试ios、android)_牟云飞的博客-CSDN博客

pdf2html工具使用:java实现PDF转HTML - 耀仔 - 博客园

效果图

pdf2html工具转码出来的html很清晰,如果直接放在手机端高度、宽度太大、样式异常,需要对转码后的html进行格式化后展示,后面的示例码,只是简单的处理展示,需要针对进一步优化。

(其实可以更简单,转码后对html文件进行处理,使用nginx反向代理转码目录,详细页面通过iframe直接连接nginx代理后的url)

示例代码

-------------------从OA获取图片-----------------

/**
	 * 从OA上抓取文件
	 * @return
	 */
	public String getFileFromOa(){	
		
		HttpServletRequest req = ServletActionContext.getRequest();
		String userAgent=req.getHeader("User-Agent");//里面包含了设备类型

		//获得文件地址
		 String fileUrl = ServletActionContext.getRequest().getParameter("fileUrl");
		 fileUrl.replaceAll("%2B", "\\+");//转换加号
		 String fileTypeTemp=fileUrl.substring(fileUrl.lastIndexOf(".")+1,fileUrl.length());
		 System.out.println("-----------------   "+fileTypeTemp);
		 //判断是否苹果手机、是否office文件
		if(-1!=userAgent.indexOf("iPhone")||-1!=fileTypeTemp.indexOf("txt")){
			//-----------------//
			//此方法需要浏览器自己能够打开,ios可以但是微信andriod版内置浏览器不支持
			//-----------------//
			//如果是苹果手机
			 fileUrl.replaceAll("%20", "\\+");//转换加号
			 String strURL = MessageUtil.oaUrl+fileUrl;
			 String fileType=strURL.substring(strURL.lastIndexOf(".")+1,strURL.length());
			//获得图片的数据流
			try {
				URL oaUrl = new URL(strURL);
				HttpURLConnection httpConn = (HttpURLConnection) oaUrl.openConnection();
				InputStream in = httpConn.getInputStream();
				//获取输出流
				HttpServletResponse response = ServletActionContext.getResponse();
				req.setCharacterEncoding("UTF-8");
				response.setCharacterEncoding("UTF-8");
				String name=fileUrl.substring(fileUrl.lastIndexOf("/")+1, fileUrl.length());
	
				response.setHeader("Content-Disposition",  
					                   "attachment;filename=" +  
					                		   new String( (name ).getBytes(),  
						                                "iso-8859-1"));
				if("doc".equals(fileType)||"docx".equals(fileType)){
					response.setContentType("application/msword");
				}else if("xls".equals(fileType)||"xlsx".equals(fileType)){
					response.setContentType("application/msexcel"); 
				}else{
					response.setContentType("application/"+fileType);
				}
				OutputStream out = response.getOutputStream();
				//输出图片信息
				byte[] bytes = new byte[1024];  
				int cnt=0;  
				while ((cnt=in.read(bytes,0,bytes.length)) != -1) {  
					out.write(bytes, 0, cnt);  
				}  
				out.flush();
				out.close();
				in.close();
	
			} catch (MalformedURLException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
			return null;
		}else{
			//如果非苹果手机,自己处理文档
			
			 //获得OAuth2验证
	  		String code=req.getParameter("code");
	  		String state=req.getParameter("state");
	  		//根据code获得人员
	  		MessageUtil msgUtil=new MessageUtil();
	  		String userId=msgUtil.getUserIdByCode(code);
			//生成微信js授权
			String jsapi_ticket=msgUtil.getJsapiTicketFromWx();//签名
			String url = MessageUtil.webUrl+"/wx/oaNewsMobileAction.do?action=getFileFromOa&fileUrl="+fileUrl;
	        System.out.println(url);
			Map<String, String> ret = MessageUtil.sign(jsapi_ticket, url);
	        
			req.setAttribute("str1", ret.get("signature"));
			req.setAttribute("time", ret.get("timestamp"));
			req.setAttribute("nonceStr", ret.get("nonceStr"));
			
			fileUrl.replaceAll("%2B", "\\+");//转换加号
			String strURL = MessageUtil.oaUrl+fileUrl;
			//在本地存放OA文件,然后转换成html,再对文档中的图片路径进行修改,最后输出到页面
			try {
				URL oaUrl = new URL(strURL);
				HttpURLConnection httpConn = (HttpURLConnection) oaUrl.openConnection();
				InputStream in = httpConn.getInputStream();
				//获取输出流
				HttpServletResponse response = ServletActionContext.getResponse();
				req.setCharacterEncoding("UTF-8");
				response.setCharacterEncoding("UTF-8");
				String name=fileUrl.substring(fileUrl.lastIndexOf("/")+1, fileUrl.length());
				
				//首先判断本地是否存在
				String path=req.getRealPath("");
				path=path.substring(0, path.lastIndexOf("\\")+1);
				File htmlFile=new File(path +  "OaFileToHtml\\"+name+".html");
				if(!htmlFile.exists()){
					//判断文件夹是否存在,创建文件夹
					String oaFilePath=path + "OaFile";//存放OA文档的文件夹路径;
					File oaFiles=new File(oaFilePath);
					if(!oaFiles.exists()){
						//如果文件夹不存在创建文件夹
						oaFiles.mkdirs();
					}
					//将OA消息存入本地
					File oafile=new File(oaFiles+ File.separator +name);
					OutputStream out = new FileOutputStream(oafile);
					//输出图片信息
					byte[] bytes = new byte[1024];  
					int cnt=0;  
					while ((cnt=in.read(bytes,0,bytes.length)) != -1) {  
						out.write(bytes, 0, cnt);  
					}  
					out.flush();
					out.close();
					in.close();
					//转换成html
					String htmlFilePath =path + "OaFileToHtml";//OA文件转成html的位置

					if(-1!=fileTypeTemp.indexOf("pdf")){
						//如果是pdf文件
						String htmlcontext = Pdf2htmlEXUtil.pdf2html_oa("D:\\pdf2htmlEX-v1.0\\pdf2htmlEX.exe",oafile.getPath(),htmlFilePath,oafile.getName());
						req.setAttribute("htmlcontext", htmlcontext);
					}else{
						//如果是office文件
						String htmlcontext=ConvertFileToHtml.toHtmlString(oafile, htmlFilePath);
						req.setAttribute("htmlcontext", htmlcontext);
					}
					
				}else{
					//已经存在转换成功的文档
					StringBuffer htmlSb = new StringBuffer();
					try {
						BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(htmlFile),Charset.forName("gb2312")));
						while (br.ready()) {
							htmlSb.append(br.readLine());
						}
						br.close();
						// 删除临时文件
						//htmlFile.delete();
					} catch (FileNotFoundException e) {
						e.printStackTrace();
					} catch (IOException e) {
						e.printStackTrace();
					}
					// HTML文件字符串
					String htmlStr = htmlSb.toString();
					//System.out.println("htmlStr=" + htmlStr);
					if(-1!=fileTypeTemp.indexOf("pdf")){
						//如果是pdf文件
						req.setAttribute("htmlcontext", Pdf2htmlEXUtil.clearFormat(htmlStr,""));
					}else{
						//如果是office文件
						// 返回经过清洁的html文本
						req.setAttribute("htmlcontext", ConvertFileToHtml.clearFormat(htmlStr, ""));
					}
					
				}
				
			} catch (MalformedURLException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
			return "lookfile";
		}
		
	}

-------------------pdf2html将pdf转成html文件-----------------

package com.wx.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class Pdf2htmlEXUtil {
    /**
     * 调用pdf2htmlEX将pdf文件转换为html文件
     * 
     * @param exeFilePath
     *            pdf2htmlEX.exe文件路径
     * @param pdfFile
     *            pdf文件绝对路径
     * @param [destDir] 生成的html文件存放路径
     * @param htmlName
     *            生成的html文件名称
     * @return
     */
    public static boolean pdf2html(String exeFilePath, String pdfFile,
            String destDir, String htmlFileName) {
        if (!(exeFilePath != null && !"".equals(exeFilePath) && pdfFile != null
                && !"".equals(pdfFile) && htmlFileName != null && !""
                    .equals(htmlFileName))) {
            System.out.println("传递的参数有误!");
            return false;
        }
        Runtime rt = Runtime.getRuntime();
        StringBuilder command = new StringBuilder();
        command.append(exeFilePath).append(" ");
        if (destDir != null && !"".equals(destDir.trim()))// 生成文件存放位置,需要替换文件路径中的空格
            command.append("--dest-dir ").append(destDir.replace(" ", "\" \""))
                    .append(" ");
        command.append("--optimize-text 1 ");// 尽量减少用于文本的HTML元素的数目 (default: 0)
        command.append("--zoom 1.4 ");
        command.append("--process-outline 0 ");// html中显示链接:0——false,1——true
        command.append("--font-format woff ");// 嵌入html中的字体后缀(default ttf)
                                                // ttf,otf,woff,svg
        command.append(pdfFile.replace(" ", "\" \"")).append(" ");// 需要替换文件路径中的空格
        if (htmlFileName != null && !"".equals(htmlFileName.trim())) {
            command.append(htmlFileName);
            if (htmlFileName.indexOf(".html") == -1)
                command.append(".html");
        }
        try {
            System.out.println("Command:" + command.toString());
            Process p = rt.exec(command.toString());
            StreamGobbler errorGobbler = new StreamGobbler(p.getErrorStream(),
                    "ERROR");
            // 开启屏幕标准错误流
            errorGobbler.start();
            StreamGobbler outGobbler = new StreamGobbler(p.getInputStream(),
                    "STDOUT");
            // 开启屏幕标准输出流
            outGobbler.start();
            int w = p.waitFor();
            int v = p.exitValue();
            if (w == 0 && v == 0) {
                return true;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }

    public static boolean pdf2html_linux(String pdfFile, String destDir,
            String htmlFileName) {
        if (!(pdfFile != null && !"".equals(pdfFile) && htmlFileName != null && !""
                .equals(htmlFileName))) {
            System.out.println("传递的参数有误!");
            return false;
        }
        Runtime rt = Runtime.getRuntime();
        StringBuilder command = new StringBuilder();
        command.append("pdf2htmlEX").append(" ");
        if (destDir != null && !"".equals(destDir.trim()))// 生成文件存放位置,需要替换文件路径中的空格
            command.append("--dest-dir ").append(destDir.replace(" ", "\" \""))
                    .append(" ");
        command.append("--optimize-text 1 ");// 尽量减少用于文本的HTML元素的数目 (default: 0)
        command.append("--process-outline 0 ");// html中显示链接:0——false,1——true
        command.append("--font-format woff ");// 嵌入html中的字体后缀(default ttf)
                                                // ttf,otf,woff,svg
        command.append(pdfFile.replace(" ", "\" \"")).append(" ");// 需要替换文件路径中的空格
        if (htmlFileName != null && !"".equals(htmlFileName.trim())) {
            command.append(htmlFileName);
            if (htmlFileName.indexOf(".html") == -1)
                command.append(".html");
        }
        try {
            System.out.println("Command:" + command.toString());
            Process p = rt.exec(command.toString());
            StreamGobbler errorGobbler = new StreamGobbler(p.getErrorStream(),
                    "ERROR");
            // 开启屏幕标准错误流
            errorGobbler.start();
            StreamGobbler outGobbler = new StreamGobbler(p.getInputStream(),
                    "STDOUT");
            // 开启屏幕标准输出流
            outGobbler.start();
            int w = p.waitFor();
            int v = p.exitValue();
            if (w == 0 && v == 0) {
                return true;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }
    
    
    
    //转换OA的pdf文件
    public static String pdf2html_oa(String exeFilePath, String pdfFile,String destDir, String htmlFileName) {
    	boolean flag =pdf2html(exeFilePath,pdfFile,destDir,htmlFileName);
    	//
    	if(true == flag){
    		String htmlFile = destDir+File.separator+htmlFileName;
	    	// 获取html文件流
			StringBuffer htmlSb = new StringBuffer();
			try {
				BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(htmlFile),Charset.forName("gb2312")));
				while (br.ready()) {
					htmlSb.append(br.readLine());
				}
				br.close();
				// 删除临时文件
				//htmlFile.delete();
			} catch (FileNotFoundException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
			// HTML文件字符串
			String htmlStr = htmlSb.toString();
			//System.out.println("htmlStr=" + htmlStr);
			// 返回经过清洁的html文本
			return Pdf2htmlEXUtil.clearFormat(htmlStr,"");
    	}
    	else{
    		return "";
    	}
    }
    
	/**
	 * 
	 * 清除pdf中一些不需要的html标记
	 * 
	 * 
	 * 
	 * @param htmlStr
	 * 
	 *            带有复杂html标记的html语句
	 * 
	 * @return 去除了不需要html标记的语句
	 */

	public static String clearFormat(String htmlStr, String docImgPath) {

		
//		htmlStr = htmlStr.replaceFirst("<BODY", "<DIV style='width:100%' ").replaceAll("</BODY>", "</DIV>");
//		htmlStr = htmlStr.replaceFirst("<body", "<div style='width:100%' ").replaceAll("</body>", "</div>");
//		htmlStr = htmlStr.replaceFirst("<img", "<img style='width:100%' ").replaceAll("</img>", "</img>");
//		htmlStr = htmlStr.replaceAll("<!DOCTYPE html>", "").replaceAll("<html>", "").replaceAll("</html>", "");
//		htmlStr = htmlStr.replaceAll("<head>", "").replaceAll("</head>", "");
//		htmlStr = htmlStr.replaceAll("<meta[\\s\\S]*>","");
//		htmlStr = htmlStr.replaceAll("<title[^)]*>","");
		
		// 获取body内容的正则
		String bodyReg = "<body .*</body>";
		Pattern bodyPattern = Patternpile(bodyReg);
		Matcher bodyMatcher = bodyPattern.matcher(htmlStr);
		if (bodyMatcher.find()) {
			// 获取BODY内容,并转化BODY标签为DIV
			htmlStr = bodyMatcher.group().replaceFirst("<body", "<DIV").replaceAll("</body>", "</DIV>");
		}
		htmlStr = htmlStr.replaceAll("<img", "<img style='width:100vw;height:100vh' ");
//
//		// 把<P></P>转换成</div></div>保留样式
//		// content = content.replaceAll("(<P)([^>]*>.*?)(<\\/P>)",
//		// "<div$2</div>");
//		// 把<P></P>转换成</div></div>并删除样式
//		htmlStr = htmlStr.replaceAll("(<P)([^>]*)(>.*?)(<\\/P>)", "<p$3</p>");
//		// 删除不需要的标签
//		htmlStr = htmlStr.replaceAll("<[/]?(font|FONT|span|SPAN|xml|XML|del|DEL|ins|INS|meta|META|[ovwxpOVWXP]:\\w+)[^>]*?>","");
//		// 删除不需要的属性
//		htmlStr = htmlStr.replaceAll("<([^>]*)(?:lang|LANG|class|CLASS|style|STYLE|size|SIZE|face|FACE|[ovwxpOVWXP]:\\w+)=(?:'[^']*'|\"\"[^\"\"]*\"\"|[^>]+)([^>]*)>","<$1$2>");
//		//处理图片height
//		//htmlStr = htmlStr.replaceAll("(<img[^>]*?)\\s+width\\s*=\\s*\\S+","$1"); 
//		htmlStr = htmlStr.replaceAll("(<img[^>]*?)\\s+height\\s*=\\s*\\S+","$1"); 
//		htmlStr = htmlStr.replaceAll("(<IMG[^>]*?)\\s+HEIGHT\\s*=\\s*\\S+","$1"); 
		return htmlStr;

	}
    
    public static void main(String[] args) {
    	//测试转换工具
        pdf2html("D:\\pdf2htmlEX-v1.0\\pdf2htmlEX.exe","G:\\20181024.pdf","D:\\pdf2htmlEX-v1.0\\HTML","my.html");
    
    	//测试转换OA文件
        pdf2html_oa("D:\\pdf2htmlEX-v1.0\\pdf2htmlEX.exe","G:\\20181024.pdf","D:\\pdf2htmlEX-v1.0\\HTML","my.html");
    }
}

-------------------转码线程,可以不用线程,也可以同步转换-----------------

package com.wx.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintWriter;
/**
 * 
 * 
 * @author muyunfei
 * 
 * <p>Modification History:</p> 
 * <p>Date       Author      Description</p>
 * <p>------------------------------------------------------------------</p>
 * <p>Oct 25, 2018           牟云飞       		 新建</p>
 */
public class StreamGobbler extends Thread  {
	InputStream is;
    String type;
    OutputStream os;

    public StreamGobbler(InputStream is, String type) {
        this(is, type, null);
    }

    StreamGobbler(InputStream is, String type, OutputStream redirect) {
        this.is = is;
        this.type = type;
        this.os = redirect;
    }

    public void run() {
        InputStreamReader isr = null;
        BufferedReader br = null;
        PrintWriter pw = null;
        try {
            if (os != null)
                pw = new PrintWriter(os);
            isr = new InputStreamReader(is);
            br = new BufferedReader(isr);
            String line = null;
            while ((line = br.readLine()) != null) {
                if (pw != null)
                    pw.println(line);
                System.out.println(type + ">" + line);
            }
            if (pw != null)
                pw.flush();
        } catch (IOException ioe) {
            ioe.printStackTrace();
        } finally {
            try {
                if (pw != null)
                    pw.close();
                if (br != null)
                    br.close();
                if (isr != null)
                    isr.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

本文标签: 在线 浏览器 文件 手机 pdf