first commit
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
[submodule "vendor/big-list-of-naughty-strings"]
|
||||
path = vendor/big-list-of-naughty-strings
|
||||
url = https://github.com/minimaxir/big-list-of-naughty-strings.git
|
||||
+9
@@ -0,0 +1,9 @@
|
||||
language: node_js
|
||||
node_js:
|
||||
- "6"
|
||||
- "7"
|
||||
- "8"
|
||||
- "9"
|
||||
- "10"
|
||||
- "11"
|
||||
- "node"
|
||||
+2
@@ -0,0 +1,2 @@
|
||||
Carl Xiong <xiongc05@gmail.com>
|
||||
Parsha Pourkhomami <parshap@gmail.com>
|
||||
+7
@@ -0,0 +1,7 @@
|
||||
Copyright 2023 Carl Xiong & Parsha Pourkhomami
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
+13
@@ -0,0 +1,13 @@
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
Version 2, December 2004
|
||||
|
||||
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim or modified
|
||||
copies of this license document, and changing it is allowed as long
|
||||
as the name is changed.
|
||||
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. You just DO WHAT THE FUCK YOU WANT TO.
|
||||
+28
@@ -0,0 +1,28 @@
|
||||
# utf8-byte-length [](http://travis-ci.org/parshap/utf8-byte-length)
|
||||
|
||||
Get the utf8 byte length of a string, taking into account multi-byte
|
||||
characters and surrogate pairs.
|
||||
|
||||
By default, this module defers to `Buffer.byteLength`. A browser
|
||||
implementation is also provided that doesn't use `Buffer.byteLength`
|
||||
minimize build size.
|
||||
|
||||
## Example
|
||||
|
||||
```js
|
||||
var getLength = require("utf8-byte-length")
|
||||
console.log(truncate("a☃", 2)) // a = 1 byte, ☃ = 3 bytes
|
||||
// -> 4
|
||||
```
|
||||
|
||||
## API
|
||||
|
||||
### `var getLength = require("utf8-byte-length")`
|
||||
|
||||
*When using browserify or webpack*, this automatically resolves to an
|
||||
implementation that does not use `Buffer.byteLength`.
|
||||
|
||||
### `getLength(string)`
|
||||
|
||||
Returns the byte length of `string`. Throws an error if `string` is not
|
||||
a string.
|
||||
+47
@@ -0,0 +1,47 @@
|
||||
'use strict';
|
||||
|
||||
function isHighSurrogate(codePoint) {
|
||||
return codePoint >= 0xd800 && codePoint <= 0xdbff;
|
||||
}
|
||||
|
||||
function isLowSurrogate(codePoint) {
|
||||
return codePoint >= 0xdc00 && codePoint <= 0xdfff;
|
||||
}
|
||||
|
||||
// Truncate string by size in bytes
|
||||
module.exports = function getByteLength(string) {
|
||||
if (typeof string !== "string") {
|
||||
throw new Error("Input must be string");
|
||||
}
|
||||
|
||||
var charLength = string.length;
|
||||
var byteLength = 0;
|
||||
var codePoint = null;
|
||||
var prevCodePoint = null;
|
||||
for (var i = 0; i < charLength; i++) {
|
||||
codePoint = string.charCodeAt(i);
|
||||
// handle 4-byte non-BMP chars
|
||||
// low surrogate
|
||||
if (isLowSurrogate(codePoint)) {
|
||||
// when parsing previous hi-surrogate, 3 is added to byteLength
|
||||
if (prevCodePoint != null && isHighSurrogate(prevCodePoint)) {
|
||||
byteLength += 1;
|
||||
}
|
||||
else {
|
||||
byteLength += 3;
|
||||
}
|
||||
}
|
||||
else if (codePoint <= 0x7f ) {
|
||||
byteLength += 1;
|
||||
}
|
||||
else if (codePoint >= 0x80 && codePoint <= 0x7ff) {
|
||||
byteLength += 2;
|
||||
}
|
||||
else if (codePoint >= 0x800 && codePoint <= 0xffff) {
|
||||
byteLength += 3;
|
||||
}
|
||||
prevCodePoint = codePoint;
|
||||
}
|
||||
|
||||
return byteLength;
|
||||
};
|
||||
+8
@@ -0,0 +1,8 @@
|
||||
"use strict";
|
||||
|
||||
module.exports = function getByteLength(string) {
|
||||
if (typeof string !== "string") {
|
||||
throw new Error("Input must be string");
|
||||
}
|
||||
return Buffer.byteLength(string, "utf8");
|
||||
};
|
||||
+26
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"name": "utf8-byte-length",
|
||||
"version": "1.0.5",
|
||||
"description": "Get utf8 byte length of string",
|
||||
"main": "index.js",
|
||||
"browser": "browser.js",
|
||||
"scripts": {
|
||||
"test": "tape test.js"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/parshap/utf8-byte-length.git"
|
||||
},
|
||||
"keywords": [
|
||||
"utf8"
|
||||
],
|
||||
"author": "Carl Xiong <xiongc05@gmail.com>",
|
||||
"license": "(WTFPL OR MIT)",
|
||||
"bugs": {
|
||||
"url": "https://github.com/parshap/utf8-byte-length/issues"
|
||||
},
|
||||
"homepage": "https://github.com/parshap/utf8-byte-length#readme",
|
||||
"devDependencies": {
|
||||
"tape": "^4.2.2"
|
||||
}
|
||||
}
|
||||
+67
@@ -0,0 +1,67 @@
|
||||
"use strict";
|
||||
|
||||
var test = require("tape");
|
||||
var getLength = require("./index");
|
||||
var browserGetLength = require("./browser");
|
||||
|
||||
function repeat(string, times) {
|
||||
return new Array(times + 1).join(string);
|
||||
}
|
||||
|
||||
// Test writing files to the fs
|
||||
//
|
||||
|
||||
try {
|
||||
var blns = require("./vendor/big-list-of-naughty-strings/blns.json");
|
||||
}
|
||||
catch (err) {
|
||||
console.error("Error: Cannot load file './vendor/big-list-of-naughty-strings/blns.json'");
|
||||
console.error();
|
||||
console.error("Make sure you've initialized git submodules by running");
|
||||
console.error();
|
||||
console.error(" git submodule update --init");
|
||||
console.error();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
|
||||
// 8-byte, 4-character string
|
||||
var THUMB = "👍🏽";
|
||||
|
||||
// Tests run against both implementations
|
||||
[getLength, browserGetLength].forEach(function(getLength) {
|
||||
// Strings with known lengths
|
||||
[
|
||||
["", 0],
|
||||
["a", 1],
|
||||
["☃", 3],
|
||||
["a☃", 4],
|
||||
[repeat("a", 250) + '\uD800\uDC00', 254],
|
||||
[repeat("a", 251) + '\uD800\uDC00', 255],
|
||||
[repeat("a", 252) + '\uD800\uDC00', 256],
|
||||
[THUMB, 8],
|
||||
[THUMB[0], 3],
|
||||
[THUMB[1], 3],
|
||||
[THUMB[2], 3],
|
||||
[THUMB[3], 3],
|
||||
[THUMB.slice(0, 2), 4],
|
||||
[THUMB.slice(2, 4), 4],
|
||||
[THUMB.slice(1, 3), 6],
|
||||
].forEach(function(desc) {
|
||||
var string = desc[0];
|
||||
var length = desc[1];
|
||||
test(JSON.stringify(string) + "=" + length, function(t) {
|
||||
t.equal(getLength(string), length);
|
||||
t.end();
|
||||
});
|
||||
});
|
||||
|
||||
// Make sure result matches Buffer.byteLength for various strings
|
||||
blns.forEach(function(str) {
|
||||
test(JSON.stringify(str), function(t) {
|
||||
t.equal(getLength(str), Buffer.byteLength(str));
|
||||
t.end();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user