/* eslint-disable no-bitwise */ /** * TextDecoder Polyfill Implementation * * React Native compatible UTF-8 text decoder * Based on the Encoding Standard: https://encoding.spec.whatwg.org/ * * @module text-encoding@1.0.0 */ import type { ITextDecoder, TextDecodeOptions, TextDecoderOptions, } from './interfaces'; export class TextDecoderPolyfill implements ITextDecoder { readonly encoding: string; readonly fatal: boolean; readonly ignoreBOM: boolean; private pendingBytes: number[] = []; constructor(label: string = 'utf-8', options: TextDecoderOptions = {}) { // Only support UTF-8 for now if (label.toLowerCase() !== 'utf-8' && label.toLowerCase() !== 'utf8') { throw new RangeError(`Unsupported encoding: ${label}. Only UTF-8 is supported.`); } this.encoding = 'utf-8'; this.fatal = options.fatal || false; this.ignoreBOM = options.ignoreBOM || false; } /** * Decode UTF-8 bytes to string * Handles multi-byte sequences and validates UTF-8 encoding */ decode(input?: ArrayBufferView | ArrayBuffer | null, options: TextDecodeOptions = {}): string { let bytes: number[] = []; // Convert input to byte array if (input) { if (input instanceof ArrayBuffer) { bytes = Array.from(new Uint8Array(input)); } else if (input instanceof Uint8Array) { bytes = Array.from(input); } else if ('buffer' in input && input.buffer instanceof ArrayBuffer) { const view = input as ArrayBufferView; bytes = Array.from(new Uint8Array(view.buffer, view.byteOffset || 0, view.byteLength)); } else if (Array.isArray(input)) { bytes = input as number[]; } } // Handle streaming mode - prepend pending bytes if (this.pendingBytes.length > 0) { bytes = [...this.pendingBytes, ...bytes]; this.pendingBytes = []; } let result = ''; let i = 0; // Skip BOM if present and not ignored if (!this.ignoreBOM && bytes.length >= 3) { if (bytes[0] === 0xef && bytes[1] === 0xbb && bytes[2] === 0xbf) { i = 3; // Skip UTF-8 BOM } } while (i < bytes.length) { const byte1 = bytes[i]; // Determine the number of bytes in this character let bytesNeeded: number; let codePoint: number; if ((byte1 & 0x80) === 0) { // 1-byte sequence (0xxxxxxx) - ASCII codePoint = byte1; bytesNeeded = 1; } else if ((byte1 & 0xe0) === 0xc0) { // 2-byte sequence (110xxxxx 10xxxxxx) bytesNeeded = 2; codePoint = byte1 & 0x1f; } else if ((byte1 & 0xf0) === 0xe0) { // 3-byte sequence (1110xxxx 10xxxxxx 10xxxxxx) bytesNeeded = 3; codePoint = byte1 & 0x0f; } else if ((byte1 & 0xf8) === 0xf0) { // 4-byte sequence (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx) bytesNeeded = 4; codePoint = byte1 & 0x07; } else { // Invalid start byte if (this.fatal) { throw new TypeError(`Invalid UTF-8 sequence start byte: 0x${byte1.toString(16)}`); } result += '\uFFFD'; // Replacement character i++; continue; } // Check if we have enough bytes if (i + bytesNeeded > bytes.length) { // Incomplete sequence at end if (options.stream) { // Save pending bytes for next call this.pendingBytes = bytes.slice(i); break; } else { // Not streaming - this is an error if (this.fatal) { throw new TypeError('Incomplete UTF-8 sequence at end of input'); } result += '\uFFFD'; break; } } // Read continuation bytes let valid = true; for (let j = 1; j < bytesNeeded; j++) { const byte = bytes[i + j]; if ((byte & 0xc0) !== 0x80) { // Invalid continuation byte valid = false; break; } codePoint = (codePoint << 6) | (byte & 0x3f); } if (!valid) { if (this.fatal) { throw new TypeError('Invalid UTF-8 continuation byte'); } result += '\uFFFD'; i++; continue; } // Validate code point if (bytesNeeded === 2 && codePoint < 0x80) { // Overlong encoding valid = false; } else if (bytesNeeded === 3 && codePoint < 0x800) { // Overlong encoding valid = false; } else if (bytesNeeded === 4 && codePoint < 0x10000) { // Overlong encoding valid = false; } else if (codePoint > 0x10ffff) { // Code point out of Unicode range valid = false; } else if (codePoint >= 0xd800 && codePoint <= 0xdfff) { // UTF-16 surrogate - invalid in UTF-8 valid = false; } if (!valid) { if (this.fatal) { throw new TypeError(`Invalid UTF-8 code point: 0x${codePoint.toString(16)}`); } result += '\uFFFD'; i += bytesNeeded; continue; } // Convert code point to string if (codePoint < 0x10000) { // BMP character result += String.fromCharCode(codePoint); } else { // Supplementary character - use surrogate pair codePoint -= 0x10000; const high = 0xd800 + (codePoint >> 10); const low = 0xdc00 + (codePoint & 0x3ff); result += String.fromCharCode(high, low); } i += bytesNeeded; } return result; } }