始终没有搞懂Java中的字符编码问题

在本站动态页面静态化中,使用的是用程序访问网页,然后把返回的内容写入静态文件。由于本站是采用UTF-8编码,所以,保存的静态文件就要以UTF-8格式保存。

在我本机的环境下(redhat linux9.0 + jdk1.4.2 + tomcat5 )和现在的空间环境windows2000 advanced + jdk1.5 + Resin3.0.18下,使用下面的代码可以正确保存为UTF-8静态文件:


/*

 * Created on 2006-12-1

 */

package com.learndiary.website.util;

import javax.servlet.*;

import javax.servlet.http.*;

import java.io.*;

import org.apache.commons.logging.Log;

import org.apache.commons.logging.LogFactory;

import com.learndiary.website.model.UserInfo;

public class ToHtml extends HttpServlet {

private static Log __log = LogFactory.getFactory().getInstance("com.learndiary.website.util.ToHtml");

public void service(

HttpServletRequest request,

HttpServletResponse response)

throws ServletException, IOException {

String url = "";

String name = "";

ServletContext sc = getServletContext();

String fileName = request.getParameter("fileName");

String artID = request.getParameter("artID");

String pageNum = request.getParameter("pageNum");

String webPath = null;

        if (fileName.equals("main")){

url = "/main.do";

webPath = "/index.htm";

__log.info("main url is: " + url +" ; static html file name is: " + webPath);

}else if(fileName.equals("goal")){

url = "/disGoalContentAction.do?goalID=" + artID; // 你要生成的页面的文件名。

webPath = "/goals/" + artID + ".htm";

__log.info("goal url is: " + url +" ; static html file name is: " + webPath);

}else if(fileName.equals("diary")){

    url = "/disDiaryContentAction.do?goalID=" + artID; // 你要生成的页面的文件名。

    webPath = "/diaries/" + artID + ".htm";

    __log.info("diary url is: " + url +" ; static html file name is: " + webPath);

    }else if(fileName.equals("sitemap")){

    url = "/mapGenerateAction.do?artID=" + artID + "&pageNum=" + pageNum ; // 你要生成的页面的文件名。

    int articleID = Integer.parseInt(artID);

    if (articleID == 0){

        webPath = "/sitemaps/goals-" + pageNum + ".htm";

    }else if (articleID > 0){

        webPath = "/sitemaps/goal" + artID + "-" + pageNum + ".htm";

    }

    __log.info("sitemap url is: " + url +" ; static html file name is: " + webPath);

    }

        name = request.getRealPath(webPath);

       

        __log.info("real name is: " + name);

       

RequestDispatcher rd = sc.getRequestDispatcher(url);

final ByteArrayOutputStream os = new ByteArrayOutputStream();

final ServletOutputStream stream = new ServletOutputStream() {

public void write(byte[] data, int offset, int length) {

os.write(data, offset, length);

}

public void write(int b) throws IOException {

os.write(b);

}

};

final PrintWriter pw = new PrintWriter(new OutputStreamWriter(os));

HttpServletResponse rep = new HttpServletResponseWrapper(response) {

public ServletOutputStream getOutputStream() {

return stream;

}

public PrintWriter getWriter() {

return pw;

}

};

rep.setContentType("text/html; charset=UTF-8");

rd.include(request, rep);

pw.flush();

FileOutputStream fos = new FileOutputStream(name);



byte[] ob= os.toByteArray();

//String string =new String(ob, "GB2312");//在本站如果输入繁体字等这样不能显示繁体字(应该是只能正确处理GB2312的内容)

String string =new String(ob);//这样才能正确处理繁体字

byte[] ob2 = string.getBytes("UTF-8");

fos.write(ob2);

        //os.writeTo(fos);

fos.close();

String jspName = webPath.replaceAll(".htm", ".jsp");

if (!HtmlsManager.isExist(jspName, request)){ //write another same name's jsp static file that will include html file

FileOutputStream jspFos = new FileOutputStream(name.replaceAll(".htm", ".jsp"));

String content = "<%@ include file=\"/common/jsp301.jsp\" %>".concat("<jsp:include page=\"" + webPath + "\" />");

byte[] jspOb = content.getBytes("UTF-8");

jspFos.write(jspOb);

jspFos.close();

__log.info("wrote static file name in ToHtml.java is: " + jspName + ", and content is: " + content);

}

__log.info("wrote static file name in ToHtml.java is: " + name);



}

}

而在原来的空间环境下:windows2000 advanced + jdk1.5 + Resin2下,上面红色的部分必须为下面的代码才行,否则是乱码:


FileOutputStream fos = new FileOutputStream(name);

        os.writeTo(fos);

fos.close();

String jspName = webPath.replaceAll(".htm", ".jsp");

if (!HtmlsManager.isExist(jspName, request)){ //write another same name's jsp static file that will include html file

FileOutputStream jspFos = new FileOutputStream(name.replaceAll(".htm", ".jsp"));

String content = "<%@ include file=\"/common/jsp301.jsp\" %>".concat("<jsp:include page=\"" + webPath + "\" />");

byte[] jspOb = content.getBytes();

jspFos.write(jspOb);

jspFos.close();

__log.info("wrote static file name in ToHtml.java is: " + jspName + ", and content is: " + content);

}

__log.info("wrote static file name in ToHtml.java is: " + name);

由于我对上面文件操作和字符编码的掌握也是模棱两可的,也不知道为什么要这样做,只知道这样做的结果是正确的。所以,就算是结果正确,上面的代码也很有可能有问题。像这些基础的技术确实应该真正理解和掌握。

这里有一个JAVA字符编码讲得比较透彻的文章,以后可以照着学一下。

    上篇:http://www.pconline.com.cn/pcedu/empolder/gj/java/0404/366404.html

    下篇:http://www.pconline.com.cn/pcedu/empolder/gj/java/0405/368760.html