kekobin / blog

blog

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

js 转码与解码emoji

kekobin opened this issue · comments

简介

javascript正常的英文编码是utf-8的,mysql默认存的也是这种编码,而emoji表情是utf-16的,这就导致了db存储emoji会有问题,所以最好的方式是,把emoji先转成utf-8的这种实体编码,存到数据库里,要使用的时候,从db拿出来,再解码成utf-16的形式。

直接上code:

export default  {
	// 表情转码
    utf16toEntities(str) {
      const patt = /[\ud800-\udbff][\udc00-\udfff]/g; // 检测utf16字符正则
      str = str.replace(patt, (char) => {
        let H;
        let L;
        let code;
        let s;

        if (char.length === 2) {
          H = char.charCodeAt(0); // 取出高位
          L = char.charCodeAt(1); // 取出低位
          code = (H - 0xD800) * 0x400 + 0x10000 + L - 0xDC00; // 转换算法
          s = `&#${code};`;
        } else {
          s = char;
        }

        return s;
      });

      return str;
    },
    // 表情解码
    entitiestoUtf16(strObj) {
      const patt = /&#\d+;/g;
      const arr = strObj.match(patt) || [];

      let H;
      let L;
      let code;

      for (let i = 0; i < arr.length; i += 1) {
        code = arr[i];
        code = code.replace('&#', '').replace(';', '');
        // 高位
        H = Math.floor((code - 0x10000) / 0x400) + 0xD800;
        // 低位
        L = ((code - 0x10000) % 0x400) + 0xDC00;
        code = `&#${code};`;
        const s = String.fromCharCode(H, L);
        strObj = strObj.replace(code, s);
      }
      return strObj;
    }
}

使用实例:

const s = 'test emoji 👇👉👈🙌';            
const dbSaveStr = utf16toEntities(s);
// 结果是: 'test emoji &#128071;&#128073;&#128072;&#128588;' 这样子的实体编码字符串存到db就没问题了

// 要使用时,想数据库中拿到上面的存储记录
cosnt dbOutStr = 'test emoji &#128071;&#128073;&#128072;&#128588;' ;
// 然后将其中的emoji转码成表情使用           
const ret = entitiestoUtf16(dbOutStr )    
// 得到: 'test emoji 👇👉👈🙌';      

将emoji转成UTF-16

这是更通用的方式,因为上面那种只是在web端能显示,如果是要存到db,给c++获取数据再客户端app展示,就行不通了。必须转成unicode-16才行。

// http://www.2ality.com/2013/09/javascript-unicode.html
function toUTF16(codePoint) {
  var TEN_BITS = parseInt('1111111111', 2);
  function u(codeUnit) {
    return '\\u'+codeUnit.toString(16).toUpperCase();
  }

  if (codePoint <= 0xFFFF) {
    return u(codePoint);
  }
  codePoint -= 0x10000;

  // Shift right to get to most significant 10 bits
  var leadSurrogate = 0xD800 + (codePoint >> 10);

  // Mask to get least significant 10 bits
  var tailSurrogate = 0xDC00 + (codePoint & TEN_BITS);

  return u(leadSurrogate) + u(tailSurrogate);
}

// using codePointAt, it's easy to go from emoji
// to decimal and back.
// Emoji to decimal representation
"😀".codePointAt(0)
>128512

// Decimal to emoji
String.fromCodePoint(128512)
>"😀"

// going from emoji to hexadecimal is a little
// bit trickier. To convert from decimal to hexadecimal,
// we can use toUTF16.
// Decimal to hexadecimal
toUTF16(128512)
> "\uD83D\uDE00"

// Hexadecimal to emoji
"\uD83D\uDE00"
> "😀"

来源

判断字符串是否为emoji字符

const emojiReg = /(?:[\u2700-\u27bf]|(?:\ud83c[\udde6-\uddff]){2}|[\ud800-\udbff][\udc00-\udfff])[\ufe0e\ufe0f]?(?:[\u0300-\u036f\ufe20-\ufe23\u20d0-\u20f0]|\ud83c[\udffb-\udfff])?(?:\u200d(?:[^\ud800-\udfff]|(?:\ud83c[\udde6-\uddff]){2}|[\ud800-\udbff][\udc00-\udfff])[\ufe0e\ufe0f]?(?:[\u0300-\u036f\ufe20-\ufe23\u20d0-\u20f0]|\ud83c[\udffb-\udfff])?)*/;

参考

unicode在线转码

老哥 我用你的 实例有问题
看看我的

//把utf16的emoji表情字符进行转码成八进制的字符
export function utf16toEntities(str) {
    var patt = /[\ud800-\udbff][\udc00-\udfff]/g; // 检测utf16字符正则  
    return str.replace(patt, function(char) {
        var H, L, code;
        if (char.length === 2) {
            H = char.charCodeAt(0); // 取出高位  
            L = char.charCodeAt(1); // 取出低位  
            code = (H - 0xD800) * 0x400 + 0x10000 + L - 0xDC00; // 转换算法  
            return "&#" + code + ";";
        } else {
            return char;
        }
    });
}

//将编码后的八进制的emoji表情重新解码成十六进制的表情字符
export function entitiesToUtf16(str) {
    return str.replace(/&#(\d+);/g, function(match, dec) {
        let H = Math.floor((dec - 0x10000) / 0x400) + 0xD800;
        let L = Math.floor(dec - 0x10000) % 0x400 + 0xDC00;
        return String.fromCharCode(H, L);
    });
}


var a = utf16toEntities("🤓2333😎");
console.log("a: ", a);


var b = entitiesToUtf16(a)
console.log("b: ",b)

var c = entitiesToUtf16("🤓2333😎");
console.log("c: ", c);