#4 Web 分词 API

2023-04-21

看了 How to split JavaScript strings into sentences, words or graphemes with "Intl.Segmenter" 了解到,现在 Web 已经支持分词了:

const text = `我爱北京天安门,天安门上太阳升。伟大领袖毛主席,指引我们向前进。`;
const granularities = ["sentence", "word", "grapheme"];
granularities.forEach(function (granularity) {
  // console.log([granularity, index, self])
  let segmenter = new Intl.Segmenter("zh", { granularity: granularity });
  let seg = segmenter.segment(text);
  // console.log(seg) // Segments{}
  let result = Array.from(seg, (s) => s.segment);
  console.log(result);
});
// ['我爱北京天安门,天安门上太阳升。', '伟大领袖毛主席,指引我们向前进。']
// ['我', '爱', '北京', '天安门', ',', '天安门', '上', '太阳', '升', '。', '伟大', '领袖', '毛主席', ',', '指引', '我们', '向', '前进', '。']
// ['我', '爱', '北', '京', '天', '安', '门', ',', '天', '安', '门', '上', '太', '阳', '升', '。', '伟', '大', '领', '袖', '毛', '主', '席', ',', '指', '引', '我', '们', '向', '前', '进', '。']

#3 JavaScript Unicode 问题

2019-06-20
function base64EncodeUnicode(str) {
  // First we escape the string using encodeURIComponent to get the UTF-8 encoding of the characters,
  // then we convert the percent encodings into raw bytes, and finally feed it to btoa() function.
  utf8Bytes = encodeURIComponent(str).replace(
    /%([0-9A-F]{2})/g,
    function (match, p1) {
      return String.fromCharCode("0x" + p1);
    },
  );

  return btoa(utf8Bytes);
}
x.decode('utf-8').encode('raw_unicode_escape').decode('ascii')[2:].strip('0')

#2 JS: split 方法

2017-02-12
"ni wo ta".split(" ");
// [ 'ni', 'wo', 'ta' ]

"ni wo ta".split(" ", 1);
// [ 'ni' ]
"ni wo ta".split(" ", 2);
// [ 'ni', 'wo' ]
"ni wo ta".split(" ", 3);
// [ 'ni', 'wo', 'ta' ]
"ni wo ta".split(" ", 4);
// [ 'ni', 'wo', 'ta' ]

"ni wo ta".split(":");
// [ 'ni wo ta' ]
"ni wo ta".split(":", 1);
// [ 'ni wo ta' ]
"ni wo ta".split(":", 2);
// [ 'ni wo ta' ]

如果要一刀将字符串切两半:

var line = "a : b : c";
var part1 = line.split(":", 1)[0];
if (a !== line) {
  var a = part1.trim();
  var b = line.substr(part1.length + 1).trim();
  console.log([a, b]);
}
var line = "a : b : c";
var index = line.indexOf(":");
if (index != -1) {
  var a = line.substr(0, index).trim();
  var b = line.substr(index + 1).trim();
  console.log([a, b]);
}

参考资料与拓展阅读

#1 转载:jQuery 插件开发全解析

2015-11-09

jQuery 插件的开发包括两种:
一种是类级别的插件开发,即给 jQuery 添加新的全局函数,相当于给 jQuery 类本身添加方法(jQuery 的全局函数就是属于 jQuery 命名空间的函数)。
另一种是对象级别的插件开发,即给 jQuery 对象添加方法。下面就两种函数的开发做详细的说明。