From 3342f7e40bcc1d53631aec387789f13698df3f44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Chris=20Da=C3=9Fler?= Date: Fri, 29 Aug 2025 14:54:44 +0200 Subject: [PATCH] Initial commit: Text encoding component with UTF-8 polyfills MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .gitignore | 237 +++++++++++++++++++++++++++++++++++++++++ README.md | 159 +++++++++++++++++++++++++++ TextDecoderPolyfill.ts | 187 ++++++++++++++++++++++++++++++++ TextEncoderPolyfill.ts | 180 +++++++++++++++++++++++++++++++ TextEncodingFactory.ts | 132 +++++++++++++++++++++++ TextEncodingService.ts | 89 ++++++++++++++++ index.d.ts | 220 ++++++++++++++++++++++++++++++++++++++ index.ts | 46 ++++++++ interfaces.ts | 158 +++++++++++++++++++++++++++ package.json | 22 ++++ 10 files changed, 1430 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 TextDecoderPolyfill.ts create mode 100644 TextEncoderPolyfill.ts create mode 100644 TextEncodingFactory.ts create mode 100644 TextEncodingService.ts create mode 100644 index.d.ts create mode 100644 index.ts create mode 100644 interfaces.ts create mode 100644 package.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..53e7358 --- /dev/null +++ b/.gitignore @@ -0,0 +1,237 @@ +# Created by https://www.toptal.com/developers/gitignore/api/macos,windows,linux,visualstudiocode,node +# Edit at https://www.toptal.com/developers/gitignore?templates=macos,windows,linux,visualstudiocode,node + +### Linux ### +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +### Node ### +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* +.pnpm-debug.log* + +# Diagnostic reports (https://nodejs.org/api/report.html) +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage +*.lcov + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# Snowpack dependency directory (https://snowpack.dev/) +web_modules/ + +# TypeScript cache +*.tsbuildinfo + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Optional stylelint cache +.stylelintcache + +# Microbundle cache +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# parcel-bundler cache (https://parceljs.org/) +.cache +.parcel-cache + +# Next.js build output +.next +out + +# Nuxt.js build / generate output +.nuxt +dist + +# Gatsby files +.cache/ +# Comment in the public line in if your project uses Gatsby and not Next.js +# https://nextjs.org/blog/next-9-1#public-directory-support +# public + +# vuepress build output +.vuepress/dist + +# vuepress v2.x temp and cache directory +.temp + +# Docusaurus cache and generated files +.docusaurus + +# Serverless directories +.serverless/ + +# FuseBox cache +.fusebox/ + +# DynamoDB Local files +.dynamodb/ + +# TernJS port file +.tern-port + +# Stores VSCode versions used for testing VSCode extensions +.vscode-test + +# yarn v2 +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* + +### Node Patch ### +# Serverless Webpack directories +.webpack/ + +# Optional stylelint cache + +# SvelteKit build / generate output +.svelte-kit + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +### Windows ### +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +# End of https://www.toptal.com/developers/gitignore/api/macos,windows,linux,visualstudiocode,node \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..40e3c38 --- /dev/null +++ b/README.md @@ -0,0 +1,159 @@ +# Text Encoding Component + +UTF-8 text encoding/decoding utilities with automatic polyfill support for React Native. + +## Features + +- **Standard Compliance**: Compatible with the standard TextEncoder/TextDecoder Web API +- **React Native Support**: Automatic polyfills for environments without native support +- **UTF-8 Only**: Focused implementation supporting only UTF-8 encoding for reliability +- **Performance**: Uses native implementations when available, falls back to efficient polyfills +- **TypeScript**: Full TypeScript support with comprehensive type definitions + +## Installation + +This package is designed to be loaded via IOR (Interoperable Object Reference) from Gitea: + +```typescript +import { textEncoding } from 'ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0'; +``` + +## Usage + +### Simple Text Encoding/Decoding + +The easiest way to use this component is through the default service: + +```typescript +import { textEncoding } from 'ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0'; + +// Encode string to bytes +const encoded = textEncoding.encode('Hello, 世界! 🌍'); +console.log(encoded); // Uint8Array + +// Decode bytes to string +const decoded = textEncoding.decode(encoded); +console.log(decoded); // "Hello, 世界! 🌍" +``` + +### Factory Functions + +For more control, use the factory functions: + +```typescript +import { createTextEncoder, createTextDecoder } from 'ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0'; + +const encoder = createTextEncoder(); +const decoder = createTextDecoder(); + +const bytes = encoder.encode('Hello World'); +const text = decoder.decode(bytes); +``` + +### Advanced Usage + +Create decoder instances with options: + +```typescript +import { createTextDecoder } from 'ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0'; + +// Throw on invalid sequences instead of using replacement character +const fatalDecoder = createTextDecoder('utf-8', { fatal: true }); + +// Ignore byte order mark +const ignoreBomDecoder = createTextDecoder('utf-8', { ignoreBOM: true }); +``` + +### Direct Polyfill Usage + +Access polyfill classes directly for advanced use cases: + +```typescript +import { TextEncoderPolyfill, TextDecoderPolyfill } from 'ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0'; + +const encoder = new TextEncoderPolyfill(); +const decoder = new TextDecoderPolyfill('utf-8', { fatal: false }); +``` + +## API Reference + +### textEncoding (Default Service) + +The main service instance with convenient methods: + +- `encode(text: string): Uint8Array` - Encode string to UTF-8 bytes +- `decode(bytes: Uint8Array | ArrayBuffer | number[]): string` - Decode bytes to string +- `stringToUtf8(text: string): Uint8Array` - Alias for encode() +- `utf8ToString(bytes: Uint8Array | number[]): string` - Alias for decode() + +### Factory Functions + +- `createTextEncoder(): ITextEncoder` - Create encoder instance +- `createTextDecoder(label?: string, options?: TextDecoderOptions): ITextDecoder` - Create decoder instance +- `installTextEncodingPolyfills(): void` - Install global polyfills + +### Interfaces + +#### ITextEncoder + +- `encoding: string` - Always 'utf-8' +- `encode(input?: string): Uint8Array` - Encode string to bytes +- `encodeInto(source: string, destination: Uint8Array): TextEncoderEncodeIntoResult` - Encode into existing array + +#### ITextDecoder + +- `encoding: string` - Always 'utf-8' +- `fatal: boolean` - Whether to throw on invalid sequences +- `ignoreBOM: boolean` - Whether to ignore byte order mark +- `decode(input?: ArrayBufferView | ArrayBuffer, options?: TextDecodeOptions): string` - Decode bytes to string + +## Error Handling + +The component handles various error conditions gracefully: + +```typescript +import { textEncoding } from 'ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0'; + +// Invalid UTF-8 sequences are replaced with � (U+FFFD) by default +const invalidBytes = new Uint8Array([0xFF, 0xFE, 0xFD]); +const result = textEncoding.decode(invalidBytes); +console.log(result); // "���" + +// Use fatal mode to throw on errors +import { createTextDecoder } from 'ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0'; +const fatalDecoder = createTextDecoder('utf-8', { fatal: true }); +try { + fatalDecoder.decode(invalidBytes); +} catch (error) { + console.error('Invalid UTF-8 sequence:', error.message); +} +``` + +## Platform Support + +- **React Native**: Full support with automatic polyfills +- **Node.js**: Uses native TextEncoder/TextDecoder when available +- **Browsers**: Uses native implementations in modern browsers +- **Automatic Fallback**: Seamlessly falls back to polyfills when native support is unavailable + +## Performance Notes + +- Native implementations are preferred when available for optimal performance +- Polyfills are optimized for correctness and reasonable performance +- UTF-8 validation is performed to ensure data integrity +- Surrogate pair handling for proper Unicode support + +## Unicode Support + +This implementation fully supports the Unicode standard: + +- All valid Unicode code points (U+0000 to U+10FFFF) +- Proper surrogate pair handling for characters above U+FFFF +- UTF-8 validation with proper error handling +- BOM (Byte Order Mark) support with optional ignoring + +## Version Information + +- Version: 1.0.0 +- Component Name: text-encoding +- IOR: `ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0` \ No newline at end of file diff --git a/TextDecoderPolyfill.ts b/TextDecoderPolyfill.ts new file mode 100644 index 0000000..8a979c9 --- /dev/null +++ b/TextDecoderPolyfill.ts @@ -0,0 +1,187 @@ +/* eslint-disable no-bitwise */ +/** + * TextDecoder Polyfill Implementation + * + * React Native compatible UTF-8 text decoder + * Based on the Encoding Standard: https://encoding.spec.whatwg.org/ + * + * @module text-encoding@1.0.0 + */ + +import type { + ITextDecoder, + TextDecodeOptions, + TextDecoderOptions, +} from './interfaces'; + +export class TextDecoderPolyfill implements ITextDecoder { + readonly encoding: string; + readonly fatal: boolean; + readonly ignoreBOM: boolean; + + private pendingBytes: number[] = []; + + constructor(label: string = 'utf-8', options: TextDecoderOptions = {}) { + // Only support UTF-8 for now + if (label.toLowerCase() !== 'utf-8' && label.toLowerCase() !== 'utf8') { + throw new RangeError(`Unsupported encoding: ${label}. Only UTF-8 is supported.`); + } + + this.encoding = 'utf-8'; + this.fatal = options.fatal || false; + this.ignoreBOM = options.ignoreBOM || false; + } + + /** + * Decode UTF-8 bytes to string + * Handles multi-byte sequences and validates UTF-8 encoding + */ + decode(input?: ArrayBufferView | ArrayBuffer | null, options: TextDecodeOptions = {}): string { + let bytes: number[] = []; + + // Convert input to byte array + if (input) { + if (input instanceof ArrayBuffer) { + bytes = Array.from(new Uint8Array(input)); + } else if (input instanceof Uint8Array) { + bytes = Array.from(input); + } else if ('buffer' in input && input.buffer instanceof ArrayBuffer) { + const view = input as ArrayBufferView; + bytes = Array.from(new Uint8Array(view.buffer, view.byteOffset || 0, view.byteLength)); + } else if (Array.isArray(input)) { + bytes = input as number[]; + } + } + + // Handle streaming mode - prepend pending bytes + if (this.pendingBytes.length > 0) { + bytes = [...this.pendingBytes, ...bytes]; + this.pendingBytes = []; + } + + let result = ''; + let i = 0; + + // Skip BOM if present and not ignored + if (!this.ignoreBOM && bytes.length >= 3) { + if (bytes[0] === 0xef && bytes[1] === 0xbb && bytes[2] === 0xbf) { + i = 3; // Skip UTF-8 BOM + } + } + + while (i < bytes.length) { + const byte1 = bytes[i]; + + // Determine the number of bytes in this character + let bytesNeeded: number; + let codePoint: number; + + if ((byte1 & 0x80) === 0) { + // 1-byte sequence (0xxxxxxx) - ASCII + codePoint = byte1; + bytesNeeded = 1; + } else if ((byte1 & 0xe0) === 0xc0) { + // 2-byte sequence (110xxxxx 10xxxxxx) + bytesNeeded = 2; + codePoint = byte1 & 0x1f; + } else if ((byte1 & 0xf0) === 0xe0) { + // 3-byte sequence (1110xxxx 10xxxxxx 10xxxxxx) + bytesNeeded = 3; + codePoint = byte1 & 0x0f; + } else if ((byte1 & 0xf8) === 0xf0) { + // 4-byte sequence (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx) + bytesNeeded = 4; + codePoint = byte1 & 0x07; + } else { + // Invalid start byte + if (this.fatal) { + throw new TypeError(`Invalid UTF-8 sequence start byte: 0x${byte1.toString(16)}`); + } + result += '\uFFFD'; // Replacement character + i++; + continue; + } + + // Check if we have enough bytes + if (i + bytesNeeded > bytes.length) { + // Incomplete sequence at end + if (options.stream) { + // Save pending bytes for next call + this.pendingBytes = bytes.slice(i); + break; + } else { + // Not streaming - this is an error + if (this.fatal) { + throw new TypeError('Incomplete UTF-8 sequence at end of input'); + } + result += '\uFFFD'; + break; + } + } + + // Read continuation bytes + let valid = true; + for (let j = 1; j < bytesNeeded; j++) { + const byte = bytes[i + j]; + if ((byte & 0xc0) !== 0x80) { + // Invalid continuation byte + valid = false; + break; + } + codePoint = (codePoint << 6) | (byte & 0x3f); + } + + if (!valid) { + if (this.fatal) { + throw new TypeError('Invalid UTF-8 continuation byte'); + } + result += '\uFFFD'; + i++; + continue; + } + + // Validate code point + if (bytesNeeded === 2 && codePoint < 0x80) { + // Overlong encoding + valid = false; + } else if (bytesNeeded === 3 && codePoint < 0x800) { + // Overlong encoding + valid = false; + } else if (bytesNeeded === 4 && codePoint < 0x10000) { + // Overlong encoding + valid = false; + } else if (codePoint > 0x10ffff) { + // Code point out of Unicode range + valid = false; + } else if (codePoint >= 0xd800 && codePoint <= 0xdfff) { + // UTF-16 surrogate - invalid in UTF-8 + valid = false; + } + + if (!valid) { + if (this.fatal) { + throw new TypeError(`Invalid UTF-8 code point: 0x${codePoint.toString(16)}`); + } + result += '\uFFFD'; + i += bytesNeeded; + continue; + } + + // Convert code point to string + if (codePoint < 0x10000) { + // BMP character + result += String.fromCharCode(codePoint); + } else { + // Supplementary character - use surrogate pair + codePoint -= 0x10000; + const high = 0xd800 + (codePoint >> 10); + const low = 0xdc00 + (codePoint & 0x3ff); + result += String.fromCharCode(high, low); + } + + i += bytesNeeded; + } + + return result; + } +} \ No newline at end of file diff --git a/TextEncoderPolyfill.ts b/TextEncoderPolyfill.ts new file mode 100644 index 0000000..35ceee1 --- /dev/null +++ b/TextEncoderPolyfill.ts @@ -0,0 +1,180 @@ +/* eslint-disable no-bitwise */ +/** + * TextEncoder Polyfill Implementation + * + * React Native compatible UTF-8 text encoder + * Based on the Encoding Standard: https://encoding.spec.whatwg.org/ + * + * @module text-encoding@1.0.0 + */ + +import type { ITextEncoder, TextEncoderEncodeIntoResult } from './interfaces'; + +export class TextEncoderPolyfill implements ITextEncoder { + readonly encoding = 'utf-8'; + + /** + * Encode a string to UTF-8 bytes + * Handles multi-byte characters and surrogate pairs + */ + encode(input: string = ''): Uint8Array { + const bytes: number[] = []; + + for (let i = 0; i < input.length; i++) { + let charCode = input.charCodeAt(i); + + // Handle UTF-16 surrogate pairs (for characters > 0xFFFF) + if (charCode >= 0xd800 && charCode <= 0xdbff) { + // High surrogate + if (i + 1 < input.length) { + const low = input.charCodeAt(i + 1); + if (low >= 0xdc00 && low <= 0xdfff) { + // Valid surrogate pair + charCode = 0x10000 + ((charCode - 0xd800) << 10) + (low - 0xdc00); + i++; // Skip the low surrogate + } else { + // Invalid surrogate pair - encode replacement character + bytes.push(0xef, 0xbf, 0xbd); + continue; + } + } else { + // Incomplete surrogate pair at end of string + bytes.push(0xef, 0xbf, 0xbd); + continue; + } + } else if (charCode >= 0xdc00 && charCode <= 0xdfff) { + // Lone low surrogate - invalid + bytes.push(0xef, 0xbf, 0xbd); + continue; + } + + // Encode the character to UTF-8 + if (charCode < 0x80) { + // 1-byte sequence (ASCII) + bytes.push(charCode); + } else if (charCode < 0x800) { + // 2-byte sequence + bytes.push(0xc0 | (charCode >> 6), 0x80 | (charCode & 0x3f)); + } else if (charCode < 0x10000) { + // 3-byte sequence + bytes.push( + 0xe0 | (charCode >> 12), + 0x80 | ((charCode >> 6) & 0x3f), + 0x80 | (charCode & 0x3f), + ); + } else if (charCode < 0x110000) { + // 4-byte sequence + bytes.push( + 0xf0 | (charCode >> 18), + 0x80 | ((charCode >> 12) & 0x3f), + 0x80 | ((charCode >> 6) & 0x3f), + 0x80 | (charCode & 0x3f), + ); + } else { + // Invalid Unicode code point - encode replacement character + bytes.push(0xef, 0xbf, 0xbd); + } + } + + return new Uint8Array(bytes); + } + + /** + * Encode string into existing Uint8Array + * Returns how many characters were read and bytes written + */ + encodeInto(source: string, destination: Uint8Array): TextEncoderEncodeIntoResult { + let read = 0; + let written = 0; + const destLength = destination.length; + + for (let i = 0; i < source.length && written < destLength; i++) { + let charCode = source.charCodeAt(i); + + // Calculate bytes needed for this character + let bytesNeeded = 1; + + // Handle surrogate pairs + if (charCode >= 0xd800 && charCode <= 0xdbff) { + if (i + 1 < source.length) { + const low = source.charCodeAt(i + 1); + if (low >= 0xdc00 && low <= 0xdfff) { + charCode = 0x10000 + ((charCode - 0xd800) << 10) + (low - 0xdc00); + bytesNeeded = 4; + } else { + bytesNeeded = 3; // Replacement character + } + } else { + bytesNeeded = 3; // Replacement character + } + } else if (charCode >= 0xdc00 && charCode <= 0xdfff) { + bytesNeeded = 3; // Replacement character + } else if (charCode < 0x80) { + bytesNeeded = 1; + } else if (charCode < 0x800) { + bytesNeeded = 2; + } else if (charCode < 0x10000) { + bytesNeeded = 3; + } else if (charCode < 0x110000) { + bytesNeeded = 4; + } else { + bytesNeeded = 3; // Replacement character + } + + // Check if we have space + if (written + bytesNeeded > destLength) { + break; + } + + // Write the bytes + if (charCode >= 0xd800 && charCode <= 0xdbff) { + if (i + 1 < source.length) { + const low = source.charCodeAt(i + 1); + if (low >= 0xdc00 && low <= 0xdfff) { + // Valid surrogate pair + const codePoint = 0x10000 + ((charCode - 0xd800) << 10) + (low - 0xdc00); + destination[written++] = 0xf0 | (codePoint >> 18); + destination[written++] = 0x80 | ((codePoint >> 12) & 0x3f); + destination[written++] = 0x80 | ((codePoint >> 6) & 0x3f); + destination[written++] = 0x80 | (codePoint & 0x3f); + i++; // Skip low surrogate + read += 2; + continue; + } + } + // Invalid surrogate - write replacement + destination[written++] = 0xef; + destination[written++] = 0xbf; + destination[written++] = 0xbd; + } else if (charCode >= 0xdc00 && charCode <= 0xdfff) { + // Lone low surrogate - write replacement + destination[written++] = 0xef; + destination[written++] = 0xbf; + destination[written++] = 0xbd; + } else if (charCode < 0x80) { + destination[written++] = charCode; + } else if (charCode < 0x800) { + destination[written++] = 0xc0 | (charCode >> 6); + destination[written++] = 0x80 | (charCode & 0x3f); + } else if (charCode < 0x10000) { + destination[written++] = 0xe0 | (charCode >> 12); + destination[written++] = 0x80 | ((charCode >> 6) & 0x3f); + destination[written++] = 0x80 | (charCode & 0x3f); + } else if (charCode < 0x110000) { + destination[written++] = 0xf0 | (charCode >> 18); + destination[written++] = 0x80 | ((charCode >> 12) & 0x3f); + destination[written++] = 0x80 | ((charCode >> 6) & 0x3f); + destination[written++] = 0x80 | (charCode & 0x3f); + } else { + // Invalid code point - write replacement + destination[written++] = 0xef; + destination[written++] = 0xbf; + destination[written++] = 0xbd; + } + + read++; + } + + return { read, written }; + } +} \ No newline at end of file diff --git a/TextEncodingFactory.ts b/TextEncodingFactory.ts new file mode 100644 index 0000000..f21aa0b --- /dev/null +++ b/TextEncodingFactory.ts @@ -0,0 +1,132 @@ +/** + * Text Encoding Factory + * + * Creates encoder/decoder instances with automatic polyfill selection + * + * @module text-encoding@1.0.0 + */ + +import { TextDecoderPolyfill } from './TextDecoderPolyfill'; +import { TextEncoderPolyfill } from './TextEncoderPolyfill'; +import type { + ITextDecoder, + ITextEncoder, + ITextEncodingFactory, + TextDecoderOptions, +} from './interfaces'; + +export class TextEncodingFactory implements ITextEncodingFactory { + private static instance: TextEncodingFactory; + + /** + * Get factory singleton instance + */ + static getInstance(): TextEncodingFactory { + if (!TextEncodingFactory.instance) { + TextEncodingFactory.instance = new TextEncodingFactory(); + } + return TextEncodingFactory.instance; + } + + /** + * Create a new TextEncoder instance + * Uses native implementation if available, otherwise polyfill + */ + createEncoder(): ITextEncoder { + // @ts-expect-error - Check for native TextEncoder + if (typeof TextEncoder !== 'undefined') { + try { + // @ts-expect-error - Try to use native + return new TextEncoder(); + } catch (e) { + // Fall back to polyfill if native fails + console.warn('[TextEncodingFactory] Native TextEncoder failed, using polyfill:', e); + } + } + + return new TextEncoderPolyfill(); + } + + /** + * Create a new TextDecoder instance + * Uses native implementation if available, otherwise polyfill + */ + createDecoder(label: string = 'utf-8', options?: TextDecoderOptions): ITextDecoder { + // @ts-expect-error - Check for native TextDecoder + if (typeof TextDecoder !== 'undefined') { + try { + // @ts-expect-error - Try to use native + return new TextDecoder(label, options); + } catch (e) { + // Fall back to polyfill if native fails + console.warn('[TextEncodingFactory] Native TextDecoder failed, using polyfill:', e); + } + } + + return new TextDecoderPolyfill(label, options); + } + + /** + * Check if native TextEncoder/TextDecoder are available + */ + isNativelySupported(): boolean { + // @ts-expect-error - Check global scope + if (typeof TextEncoder === 'undefined' || typeof TextDecoder === 'undefined') { + return false; + } + + // Try to instantiate to make sure they work + try { + // @ts-expect-error + const encoder = new TextEncoder(); + // @ts-expect-error + const decoder = new TextDecoder(); + + // Basic functionality test + const testString = 'test'; + const encoded = encoder.encode(testString); + const decoded = decoder.decode(encoded); + + return decoded === testString; + } catch (_e) { + return false; + } + } +} + +/** + * Convenience function to create an encoder + */ +export function createTextEncoder(): ITextEncoder { + return TextEncodingFactory.getInstance().createEncoder(); +} + +/** + * Convenience function to create a decoder + */ +export function createTextDecoder(label?: string, options?: TextDecoderOptions): ITextDecoder { + return TextEncodingFactory.getInstance().createDecoder(label, options); +} + +/** + * Install polyfills globally if not present + * This makes TextEncoder/TextDecoder available everywhere + */ +export function installTextEncodingPolyfills(): void { + // @ts-expect-error + if (typeof global !== 'undefined') { + // @ts-expect-error + if (typeof global.TextEncoder === 'undefined') { + // @ts-expect-error + global.TextEncoder = TextEncoderPolyfill; + console.info('[TextEncodingFactory] Installed TextEncoder polyfill globally'); + } + + // @ts-expect-error + if (typeof global.TextDecoder === 'undefined') { + // @ts-expect-error + global.TextDecoder = TextDecoderPolyfill; + console.info('[TextEncodingFactory] Installed TextDecoder polyfill globally'); + } + } +} \ No newline at end of file diff --git a/TextEncodingService.ts b/TextEncodingService.ts new file mode 100644 index 0000000..bc729e2 --- /dev/null +++ b/TextEncodingService.ts @@ -0,0 +1,89 @@ +/** + * Text Encoding Service Implementation + * + * Provides shared encoder/decoder instances with automatic polyfill fallback + * + * @module text-encoding@1.0.0 + */ + +import type { ITextDecoder, ITextEncoder, ITextEncodingService } from './interfaces'; +import { TextDecoderPolyfill } from './TextDecoderPolyfill'; +import { TextEncoderPolyfill } from './TextEncoderPolyfill'; + +export class TextEncodingService implements ITextEncodingService { + readonly encoder: ITextEncoder; + readonly decoder: ITextDecoder; + + constructor() { + // Check for native support and use it if available + if (this.hasNativeSupport()) { + // Use native implementations if available + // @ts-expect-error - TextEncoder might exist globally + this.encoder = + typeof TextEncoder !== 'undefined' ? new TextEncoder() : new TextEncoderPolyfill(); + + // @ts-expect-error - TextDecoder might exist globally + this.decoder = + typeof TextDecoder !== 'undefined' ? new TextDecoder() : new TextDecoderPolyfill(); + } else { + // Use polyfills + this.encoder = new TextEncoderPolyfill(); + this.decoder = new TextDecoderPolyfill(); + } + } + + /** + * Check if native TextEncoder/TextDecoder are available + */ + private hasNativeSupport(): boolean { + // @ts-expect-error - Check global scope + return typeof TextEncoder !== 'undefined' && typeof TextDecoder !== 'undefined'; + } + + /** + * Encode string to UTF-8 bytes + */ + encode(text: string): Uint8Array { + return this.encoder.encode(text); + } + + /** + * Decode bytes to string + */ + decode(bytes: Uint8Array | ArrayBuffer | number[]): string { + if (Array.isArray(bytes)) { + // Convert number array to Uint8Array + return this.decoder.decode(new Uint8Array(bytes)); + } + return this.decoder.decode(bytes as Uint8Array | ArrayBuffer); + } + + /** + * Convenience method: string to UTF-8 + */ + stringToUtf8(text: string): Uint8Array { + return this.encode(text); + } + + /** + * Convenience method: UTF-8 to string + */ + utf8ToString(bytes: Uint8Array | number[]): string { + return this.decode(bytes); + } +} + +/** + * Singleton instance for shared use + */ +let serviceInstance: TextEncodingService | null = null; + +/** + * Get or create the singleton service instance + */ +export function getTextEncodingService(): ITextEncodingService { + if (!serviceInstance) { + serviceInstance = new TextEncodingService(); + } + return serviceInstance; +} \ No newline at end of file diff --git a/index.d.ts b/index.d.ts new file mode 100644 index 0000000..bb1eadc --- /dev/null +++ b/index.d.ts @@ -0,0 +1,220 @@ +/** + * Type declarations for @metatrom/text-encoding + * Self-contained for IOR type generation + */ + +declare module '@metatrom/text-encoding' { + /** + * TextEncoder interface - encodes strings to UTF-8 byte arrays + */ + export interface ITextEncoder { + /** + * The encoding format (always 'utf-8' for this implementation) + */ + readonly encoding: string; + + /** + * Encode a string into a Uint8Array using UTF-8 encoding + * @param input The string to encode + * @returns UTF-8 encoded byte array + */ + encode(input?: string): Uint8Array; + + /** + * Encode a string into an existing Uint8Array (partial encoding) + * @param source The string to encode + * @param destination The array to write to + * @returns Object with read (chars) and written (bytes) counts + */ + encodeInto?(source: string, destination: Uint8Array): TextEncoderEncodeIntoResult; + } + + /** + * TextDecoder interface - decodes UTF-8 byte arrays to strings + */ + export interface ITextDecoder { + /** + * The encoding format (always 'utf-8' for this implementation) + */ + readonly encoding: string; + + /** + * Whether to throw on invalid sequences (vs replacement char) + */ + readonly fatal: boolean; + + /** + * Whether to ignore BOM (byte order mark) + */ + readonly ignoreBOM: boolean; + + /** + * Decode a byte array into a string using UTF-8 decoding + * @param input The bytes to decode (Uint8Array or ArrayBuffer) + * @param options Decoding options + * @returns Decoded string + */ + decode(input?: ArrayBufferView | ArrayBuffer, options?: TextDecodeOptions): string; + } + + /** + * Result of encodeInto operation + */ + export interface TextEncoderEncodeIntoResult { + /** + * Number of UTF-16 code units read from source + */ + read: number; + + /** + * Number of bytes written to destination + */ + written: number; + } + + /** + * Options for text decoding + */ + export interface TextDecodeOptions { + /** + * Whether this is a streaming decode (more data coming) + */ + stream?: boolean; + } + + /** + * Options for TextDecoder constructor + */ + export interface TextDecoderOptions { + /** + * If true, throw on invalid byte sequences + */ + fatal?: boolean; + + /** + * If true, ignore byte order mark + */ + ignoreBOM?: boolean; + } + + /** + * Factory for creating encoder/decoder instances + */ + export interface ITextEncodingFactory { + /** + * Create a new TextEncoder instance + */ + createEncoder(): ITextEncoder; + + /** + * Create a new TextDecoder instance + * @param label Encoding label (default 'utf-8') + * @param options Decoder options + */ + createDecoder(label?: string, options?: TextDecoderOptions): ITextDecoder; + + /** + * Check if TextEncoder/TextDecoder are natively available + */ + isNativelySupported(): boolean; + } + + /** + * Text encoding service for shared instances + */ + export interface ITextEncodingService { + /** + * Shared encoder instance + */ + readonly encoder: ITextEncoder; + + /** + * Shared decoder instance + */ + readonly decoder: ITextDecoder; + + /** + * Encode string to bytes + */ + encode(text: string): Uint8Array; + + /** + * Decode bytes to string + */ + decode(bytes: Uint8Array | ArrayBuffer | number[]): string; + + /** + * Convert string to UTF-8 byte array (convenience method) + */ + stringToUtf8(text: string): Uint8Array; + + /** + * Convert UTF-8 byte array to string (convenience method) + */ + utf8ToString(bytes: Uint8Array | number[]): string; + } + + /** + * TextEncoder Polyfill Implementation + */ + export declare class TextEncoderPolyfill implements ITextEncoder { + readonly encoding: string; + encode(input?: string): Uint8Array; + encodeInto(source: string, destination: Uint8Array): TextEncoderEncodeIntoResult; + } + + /** + * TextDecoder Polyfill Implementation + */ + export declare class TextDecoderPolyfill implements ITextDecoder { + readonly encoding: string; + readonly fatal: boolean; + readonly ignoreBOM: boolean; + + constructor(label?: string, options?: TextDecoderOptions); + decode(input?: ArrayBufferView | ArrayBuffer | null, options?: TextDecodeOptions): string; + } + + /** + * Text Encoding Factory + */ + export declare class TextEncodingFactory implements ITextEncodingFactory { + static getInstance(): TextEncodingFactory; + createEncoder(): ITextEncoder; + createDecoder(label?: string, options?: TextDecoderOptions): ITextDecoder; + isNativelySupported(): boolean; + } + + /** + * Text Encoding Service Implementation + */ + export declare class TextEncodingService implements ITextEncodingService { + readonly encoder: ITextEncoder; + readonly decoder: ITextDecoder; + + encode(text: string): Uint8Array; + decode(bytes: Uint8Array | ArrayBuffer | number[]): string; + stringToUtf8(text: string): Uint8Array; + utf8ToString(bytes: Uint8Array | number[]): string; + } + + /** + * Factory functions + */ + export function createTextEncoder(): ITextEncoder; + export function createTextDecoder(label?: string, options?: TextDecoderOptions): ITextDecoder; + export function installTextEncodingPolyfills(): void; + export function getTextEncodingService(): ITextEncodingService; + + /** + * Default shared text encoding service + * Use this for most encoding/decoding needs + */ + export const textEncoding: ITextEncodingService; + + /** + * Version information + */ + export const VERSION: string; + export const COMPONENT_NAME: string; +} \ No newline at end of file diff --git a/index.ts b/index.ts new file mode 100644 index 0000000..24efebc --- /dev/null +++ b/index.ts @@ -0,0 +1,46 @@ +/** + * Text Encoding Module + * + * Provides UTF-8 text encoding/decoding for React Native with automatic polyfill support. + * Compatible with the standard TextEncoder/TextDecoder Web API. + * + * @module text-encoding@1.0.0 + */ + +// Export factory and helpers +export { + createTextDecoder, + createTextEncoder, + installTextEncodingPolyfills, + TextEncodingFactory, +} from './TextEncodingFactory'; +export { TextDecoderPolyfill } from './TextDecoderPolyfill'; + +// Export implementations (for advanced usage) +export { TextEncoderPolyfill } from './TextEncoderPolyfill'; +export { + getTextEncodingService, + TextEncodingService, +} from './TextEncodingService'; + +// Create and export default service instance +import { getTextEncodingService } from './TextEncodingService'; + +/** + * Default shared text encoding service + * Use this for most encoding/decoding needs + */ +export const textEncoding = getTextEncodingService(); + +// Version information +export const VERSION = '1.0.0'; +export const COMPONENT_NAME = 'text-encoding'; + +// Auto-install polyfills on import (for React Native) +import { installTextEncodingPolyfills } from './TextEncodingFactory'; + +// Only install in React Native environment +// @ts-expect-error +if (typeof global !== 'undefined' && !global.window) { + installTextEncodingPolyfills(); +} \ No newline at end of file diff --git a/interfaces.ts b/interfaces.ts new file mode 100644 index 0000000..f576769 --- /dev/null +++ b/interfaces.ts @@ -0,0 +1,158 @@ +/** + * Text Encoding Interface Definitions + * + * Provides UTF-8 text encoding/decoding capabilities for React Native + * Compatible with the standard TextEncoder/TextDecoder Web API + * + * @module text-encoding@1.0.0/interfaces + */ + +/** + * TextEncoder interface - encodes strings to UTF-8 byte arrays + */ +export interface ITextEncoder { + /** + * The encoding format (always 'utf-8' for this implementation) + */ + readonly encoding: string; + + /** + * Encode a string into a Uint8Array using UTF-8 encoding + * @param input The string to encode + * @returns UTF-8 encoded byte array + */ + encode(input?: string): Uint8Array; + + /** + * Encode a string into an existing Uint8Array (partial encoding) + * @param source The string to encode + * @param destination The array to write to + * @returns Object with read (chars) and written (bytes) counts + */ + encodeInto?(source: string, destination: Uint8Array): TextEncoderEncodeIntoResult; +} + +/** + * TextDecoder interface - decodes UTF-8 byte arrays to strings + */ +export interface ITextDecoder { + /** + * The encoding format (always 'utf-8' for this implementation) + */ + readonly encoding: string; + + /** + * Whether to throw on invalid sequences (vs replacement char) + */ + readonly fatal: boolean; + + /** + * Whether to ignore BOM (byte order mark) + */ + readonly ignoreBOM: boolean; + + /** + * Decode a byte array into a string using UTF-8 decoding + * @param input The bytes to decode (Uint8Array or ArrayBuffer) + * @param options Decoding options + * @returns Decoded string + */ + decode(input?: ArrayBufferView | ArrayBuffer, options?: TextDecodeOptions): string; +} + +/** + * Result of encodeInto operation + */ +export interface TextEncoderEncodeIntoResult { + /** + * Number of UTF-16 code units read from source + */ + read: number; + + /** + * Number of bytes written to destination + */ + written: number; +} + +/** + * Options for text decoding + */ +export interface TextDecodeOptions { + /** + * Whether this is a streaming decode (more data coming) + */ + stream?: boolean; +} + +/** + * Options for TextDecoder constructor + */ +export interface TextDecoderOptions { + /** + * If true, throw on invalid byte sequences + */ + fatal?: boolean; + + /** + * If true, ignore byte order mark + */ + ignoreBOM?: boolean; +} + +/** + * Factory for creating encoder/decoder instances + */ +export interface ITextEncodingFactory { + /** + * Create a new TextEncoder instance + */ + createEncoder(): ITextEncoder; + + /** + * Create a new TextDecoder instance + * @param label Encoding label (default 'utf-8') + * @param options Decoder options + */ + createDecoder(label?: string, options?: TextDecoderOptions): ITextDecoder; + + /** + * Check if TextEncoder/TextDecoder are natively available + */ + isNativelySupported(): boolean; +} + +/** + * Text encoding service for shared instances + */ +export interface ITextEncodingService { + /** + * Shared encoder instance + */ + readonly encoder: ITextEncoder; + + /** + * Shared decoder instance + */ + readonly decoder: ITextDecoder; + + /** + * Encode string to bytes + */ + encode(text: string): Uint8Array; + + /** + * Decode bytes to string + */ + decode(bytes: Uint8Array | ArrayBuffer | number[]): string; + + /** + * Convert string to UTF-8 byte array (convenience method) + */ + stringToUtf8(text: string): Uint8Array; + + /** + * Convert UTF-8 byte array to string (convenience method) + */ + utf8ToString(bytes: Uint8Array | number[]): string; +} \ No newline at end of file diff --git a/package.json b/package.json new file mode 100644 index 0000000..1c18e48 --- /dev/null +++ b/package.json @@ -0,0 +1,22 @@ +{ + "name": "@metatrom/text-encoding", + "version": "1.0.0", + "main": "index.ts", + "type": "module", + "description": "Text encoding/decoding utilities with polyfills for React Native", + "repository": { + "type": "git", + "url": "git@gitea.metatrom.net:universal-components/text-encoding.git" + }, + "metatrom": { + "ior": "ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0", + "capabilities": { + "p2p": false, + "contracts": false, + "viewer": false, + "sync": false + } + }, + "dependencies": {}, + "optionalDependencies": {} +} \ No newline at end of file