Java 怎么实现一种从十进制表示获取Unicode字符的方法(HTML转义码)

2019年8月2日23:15:40Java 怎么实现一种从十进制表示获取Unicode字符的方法(HTML转义码)已关闭评论 370 1033字阅读3分26秒

1.简介

Java 怎么实现一种从十进制表示获取Unicode字符的方法(HTML转义码)?——请看下文。

2.示例代码

/**
 * 版权所有 编程十万个怎么办(www.tah1986.com)
 */



import java.io.UnsupportedEncodingException;

public class Main {
    public static void main(String[] argv) throws Exception {
        String render = "tah1986.com";
        System.out.println(getDecimalChar(render));
    }

    
    static public String getDecimalChar(String render) {
        String ret = "";
        int tmp = 0;

        byte[] b = null;
        try {
            b = render.getBytes("UTF-8");
        } catch (UnsupportedEncodingException ex) {
        }
        for (int i = 0; i < b.length; ++i) {
            if ((b[i] & 0x80) != 0) {
                tmp = getUCSfromUTF8(b, i);
                
                ret += ("&#" + tmp + ";");
                i += (getBytesforUTF8(b[i]) - 1);
            } else {
                ret += new String(b, i, 1);
            }
        }
        
        
        return (ret);
    }

    
    static public int getUCSfromUTF8(byte[] utf8, int offset) {

        int ret = 0;
        int numByte = getBytesforUTF8(utf8[offset]);
        byte template = (byte) 0x3f;
        for (int i = 0; i < numByte; ++i) {
            
            template = (byte) (template >> 1);
        }
        ret = utf8[offset] & template;
        for (int i = 1; i < numByte; ++i) {
            ret *= 64;
            ret += (utf8[offset + i] & 0x3f);
        }
        return (ret);
    }

    
    static public int getBytesforUTF8(byte b) {

        int ret;
        byte template = (byte) 0x40;
        for (ret = 2; ret < 7; ++ret) {
            template = (byte) (template >> 1);
            if ((b & template) == 0) {
                break;
            }
            
        }
        return (ret);
    }
}

 
继续阅读