/* eslint-disable no-bitwise */ /** * TextEncoder Polyfill Implementation * * React Native compatible UTF-8 text encoder * Based on the Encoding Standard: https://encoding.spec.whatwg.org/ * * @module text-encoding@1.0.0 */ import type { ITextEncoder, TextEncoderEncodeIntoResult } from './interfaces'; export class TextEncoderPolyfill implements ITextEncoder { readonly encoding = 'utf-8'; /** * Encode a string to UTF-8 bytes * Handles multi-byte characters and surrogate pairs */ encode(input: string = ''): Uint8Array { const bytes: number[] = []; for (let i = 0; i < input.length; i++) { let charCode = input.charCodeAt(i); // Handle UTF-16 surrogate pairs (for characters > 0xFFFF) if (charCode >= 0xd800 && charCode <= 0xdbff) { // High surrogate if (i + 1 < input.length) { const low = input.charCodeAt(i + 1); if (low >= 0xdc00 && low <= 0xdfff) { // Valid surrogate pair charCode = 0x10000 + ((charCode - 0xd800) << 10) + (low - 0xdc00); i++; // Skip the low surrogate } else { // Invalid surrogate pair - encode replacement character bytes.push(0xef, 0xbf, 0xbd); continue; } } else { // Incomplete surrogate pair at end of string bytes.push(0xef, 0xbf, 0xbd); continue; } } else if (charCode >= 0xdc00 && charCode <= 0xdfff) { // Lone low surrogate - invalid bytes.push(0xef, 0xbf, 0xbd); continue; } // Encode the character to UTF-8 if (charCode < 0x80) { // 1-byte sequence (ASCII) bytes.push(charCode); } else if (charCode < 0x800) { // 2-byte sequence bytes.push(0xc0 | (charCode >> 6), 0x80 | (charCode & 0x3f)); } else if (charCode < 0x10000) { // 3-byte sequence bytes.push( 0xe0 | (charCode >> 12), 0x80 | ((charCode >> 6) & 0x3f), 0x80 | (charCode & 0x3f), ); } else if (charCode < 0x110000) { // 4-byte sequence bytes.push( 0xf0 | (charCode >> 18), 0x80 | ((charCode >> 12) & 0x3f), 0x80 | ((charCode >> 6) & 0x3f), 0x80 | (charCode & 0x3f), ); } else { // Invalid Unicode code point - encode replacement character bytes.push(0xef, 0xbf, 0xbd); } } return new Uint8Array(bytes); } /** * Encode string into existing Uint8Array * Returns how many characters were read and bytes written */ encodeInto(source: string, destination: Uint8Array): TextEncoderEncodeIntoResult { let read = 0; let written = 0; const destLength = destination.length; for (let i = 0; i < source.length && written < destLength; i++) { let charCode = source.charCodeAt(i); // Calculate bytes needed for this character let bytesNeeded = 1; // Handle surrogate pairs if (charCode >= 0xd800 && charCode <= 0xdbff) { if (i + 1 < source.length) { const low = source.charCodeAt(i + 1); if (low >= 0xdc00 && low <= 0xdfff) { charCode = 0x10000 + ((charCode - 0xd800) << 10) + (low - 0xdc00); bytesNeeded = 4; } else { bytesNeeded = 3; // Replacement character } } else { bytesNeeded = 3; // Replacement character } } else if (charCode >= 0xdc00 && charCode <= 0xdfff) { bytesNeeded = 3; // Replacement character } else if (charCode < 0x80) { bytesNeeded = 1; } else if (charCode < 0x800) { bytesNeeded = 2; } else if (charCode < 0x10000) { bytesNeeded = 3; } else if (charCode < 0x110000) { bytesNeeded = 4; } else { bytesNeeded = 3; // Replacement character } // Check if we have space if (written + bytesNeeded > destLength) { break; } // Write the bytes if (charCode >= 0xd800 && charCode <= 0xdbff) { if (i + 1 < source.length) { const low = source.charCodeAt(i + 1); if (low >= 0xdc00 && low <= 0xdfff) { // Valid surrogate pair const codePoint = 0x10000 + ((charCode - 0xd800) << 10) + (low - 0xdc00); destination[written++] = 0xf0 | (codePoint >> 18); destination[written++] = 0x80 | ((codePoint >> 12) & 0x3f); destination[written++] = 0x80 | ((codePoint >> 6) & 0x3f); destination[written++] = 0x80 | (codePoint & 0x3f); i++; // Skip low surrogate read += 2; continue; } } // Invalid surrogate - write replacement destination[written++] = 0xef; destination[written++] = 0xbf; destination[written++] = 0xbd; } else if (charCode >= 0xdc00 && charCode <= 0xdfff) { // Lone low surrogate - write replacement destination[written++] = 0xef; destination[written++] = 0xbf; destination[written++] = 0xbd; } else if (charCode < 0x80) { destination[written++] = charCode; } else if (charCode < 0x800) { destination[written++] = 0xc0 | (charCode >> 6); destination[written++] = 0x80 | (charCode & 0x3f); } else if (charCode < 0x10000) { destination[written++] = 0xe0 | (charCode >> 12); destination[written++] = 0x80 | ((charCode >> 6) & 0x3f); destination[written++] = 0x80 | (charCode & 0x3f); } else if (charCode < 0x110000) { destination[written++] = 0xf0 | (charCode >> 18); destination[written++] = 0x80 | ((charCode >> 12) & 0x3f); destination[written++] = 0x80 | ((charCode >> 6) & 0x3f); destination[written++] = 0x80 | (charCode & 0x3f); } else { // Invalid code point - write replacement destination[written++] = 0xef; destination[written++] = 0xbf; destination[written++] = 0xbd; } read++; } return { read, written }; } }