commit 3342f7e40bcc1d53631aec387789f13698df3f44 Author: Chris Daßler Date: Fri Aug 29 14:54:44 2025 +0200 Initial commit: Text encoding component with UTF-8 polyfills 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..53e7358 --- /dev/null +++ b/.gitignore @@ -0,0 +1,237 @@ +# Created by https://www.toptal.com/developers/gitignore/api/macos,windows,linux,visualstudiocode,node +# Edit at https://www.toptal.com/developers/gitignore?templates=macos,windows,linux,visualstudiocode,node + +### Linux ### +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +### Node ### +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* +.pnpm-debug.log* + +# Diagnostic reports (https://nodejs.org/api/report.html) +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage +*.lcov + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# Snowpack dependency directory (https://snowpack.dev/) +web_modules/ + +# TypeScript cache +*.tsbuildinfo + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Optional stylelint cache +.stylelintcache + +# Microbundle cache +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# parcel-bundler cache (https://parceljs.org/) +.cache +.parcel-cache + +# Next.js build output +.next +out + +# Nuxt.js build / generate output +.nuxt +dist + +# Gatsby files +.cache/ +# Comment in the public line in if your project uses Gatsby and not Next.js +# https://nextjs.org/blog/next-9-1#public-directory-support +# public + +# vuepress build output +.vuepress/dist + +# vuepress v2.x temp and cache directory +.temp + +# Docusaurus cache and generated files +.docusaurus + +# Serverless directories +.serverless/ + +# FuseBox cache +.fusebox/ + +# DynamoDB Local files +.dynamodb/ + +# TernJS port file +.tern-port + +# Stores VSCode versions used for testing VSCode extensions +.vscode-test + +# yarn v2 +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* + +### Node Patch ### +# Serverless Webpack directories +.webpack/ + +# Optional stylelint cache + +# SvelteKit build / generate output +.svelte-kit + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +### Windows ### +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +# End of https://www.toptal.com/developers/gitignore/api/macos,windows,linux,visualstudiocode,node \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..40e3c38 --- /dev/null +++ b/README.md @@ -0,0 +1,159 @@ +# Text Encoding Component + +UTF-8 text encoding/decoding utilities with automatic polyfill support for React Native. + +## Features + +- **Standard Compliance**: Compatible with the standard TextEncoder/TextDecoder Web API +- **React Native Support**: Automatic polyfills for environments without native support +- **UTF-8 Only**: Focused implementation supporting only UTF-8 encoding for reliability +- **Performance**: Uses native implementations when available, falls back to efficient polyfills +- **TypeScript**: Full TypeScript support with comprehensive type definitions + +## Installation + +This package is designed to be loaded via IOR (Interoperable Object Reference) from Gitea: + +```typescript +import { textEncoding } from 'ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0'; +``` + +## Usage + +### Simple Text Encoding/Decoding + +The easiest way to use this component is through the default service: + +```typescript +import { textEncoding } from 'ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0'; + +// Encode string to bytes +const encoded = textEncoding.encode('Hello, 世界! 🌍'); +console.log(encoded); // Uint8Array + +// Decode bytes to string +const decoded = textEncoding.decode(encoded); +console.log(decoded); // "Hello, 世界! 🌍" +``` + +### Factory Functions + +For more control, use the factory functions: + +```typescript +import { createTextEncoder, createTextDecoder } from 'ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0'; + +const encoder = createTextEncoder(); +const decoder = createTextDecoder(); + +const bytes = encoder.encode('Hello World'); +const text = decoder.decode(bytes); +``` + +### Advanced Usage + +Create decoder instances with options: + +```typescript +import { createTextDecoder } from 'ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0'; + +// Throw on invalid sequences instead of using replacement character +const fatalDecoder = createTextDecoder('utf-8', { fatal: true }); + +// Ignore byte order mark +const ignoreBomDecoder = createTextDecoder('utf-8', { ignoreBOM: true }); +``` + +### Direct Polyfill Usage + +Access polyfill classes directly for advanced use cases: + +```typescript +import { TextEncoderPolyfill, TextDecoderPolyfill } from 'ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0'; + +const encoder = new TextEncoderPolyfill(); +const decoder = new TextDecoderPolyfill('utf-8', { fatal: false }); +``` + +## API Reference + +### textEncoding (Default Service) + +The main service instance with convenient methods: + +- `encode(text: string): Uint8Array` - Encode string to UTF-8 bytes +- `decode(bytes: Uint8Array | ArrayBuffer | number[]): string` - Decode bytes to string +- `stringToUtf8(text: string): Uint8Array` - Alias for encode() +- `utf8ToString(bytes: Uint8Array | number[]): string` - Alias for decode() + +### Factory Functions + +- `createTextEncoder(): ITextEncoder` - Create encoder instance +- `createTextDecoder(label?: string, options?: TextDecoderOptions): ITextDecoder` - Create decoder instance +- `installTextEncodingPolyfills(): void` - Install global polyfills + +### Interfaces + +#### ITextEncoder + +- `encoding: string` - Always 'utf-8' +- `encode(input?: string): Uint8Array` - Encode string to bytes +- `encodeInto(source: string, destination: Uint8Array): TextEncoderEncodeIntoResult` - Encode into existing array + +#### ITextDecoder + +- `encoding: string` - Always 'utf-8' +- `fatal: boolean` - Whether to throw on invalid sequences +- `ignoreBOM: boolean` - Whether to ignore byte order mark +- `decode(input?: ArrayBufferView | ArrayBuffer, options?: TextDecodeOptions): string` - Decode bytes to string + +## Error Handling + +The component handles various error conditions gracefully: + +```typescript +import { textEncoding } from 'ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0'; + +// Invalid UTF-8 sequences are replaced with � (U+FFFD) by default +const invalidBytes = new Uint8Array([0xFF, 0xFE, 0xFD]); +const result = textEncoding.decode(invalidBytes); +console.log(result); // "���" + +// Use fatal mode to throw on errors +import { createTextDecoder } from 'ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0'; +const fatalDecoder = createTextDecoder('utf-8', { fatal: true }); +try { + fatalDecoder.decode(invalidBytes); +} catch (error) { + console.error('Invalid UTF-8 sequence:', error.message); +} +``` + +## Platform Support + +- **React Native**: Full support with automatic polyfills +- **Node.js**: Uses native TextEncoder/TextDecoder when available +- **Browsers**: Uses native implementations in modern browsers +- **Automatic Fallback**: Seamlessly falls back to polyfills when native support is unavailable + +## Performance Notes + +- Native implementations are preferred when available for optimal performance +- Polyfills are optimized for correctness and reasonable performance +- UTF-8 validation is performed to ensure data integrity +- Surrogate pair handling for proper Unicode support + +## Unicode Support + +This implementation fully supports the Unicode standard: + +- All valid Unicode code points (U+0000 to U+10FFFF) +- Proper surrogate pair handling for characters above U+FFFF +- UTF-8 validation with proper error handling +- BOM (Byte Order Mark) support with optional ignoring + +## Version Information + +- Version: 1.0.0 +- Component Name: text-encoding +- IOR: `ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0` \ No newline at end of file diff --git a/TextDecoderPolyfill.ts b/TextDecoderPolyfill.ts new file mode 100644 index 0000000..8a979c9 --- /dev/null +++ b/TextDecoderPolyfill.ts @@ -0,0 +1,187 @@ +/* eslint-disable no-bitwise */ +/** + * TextDecoder Polyfill Implementation + * + * React Native compatible UTF-8 text decoder + * Based on the Encoding Standard: https://encoding.spec.whatwg.org/ + * + * @module text-encoding@1.0.0 + */ + +import type { + ITextDecoder, + TextDecodeOptions, + TextDecoderOptions, +} from './interfaces'; + +export class TextDecoderPolyfill implements ITextDecoder { + readonly encoding: string; + readonly fatal: boolean; + readonly ignoreBOM: boolean; + + private pendingBytes: number[] = []; + + constructor(label: string = 'utf-8', options: TextDecoderOptions = {}) { + // Only support UTF-8 for now + if (label.toLowerCase() !== 'utf-8' && label.toLowerCase() !== 'utf8') { + throw new RangeError(`Unsupported encoding: ${label}. Only UTF-8 is supported.`); + } + + this.encoding = 'utf-8'; + this.fatal = options.fatal || false; + this.ignoreBOM = options.ignoreBOM || false; + } + + /** + * Decode UTF-8 bytes to string + * Handles multi-byte sequences and validates UTF-8 encoding + */ + decode(input?: ArrayBufferView | ArrayBuffer | null, options: TextDecodeOptions = {}): string { + let bytes: number[] = []; + + // Convert input to byte array + if (input) { + if (input instanceof ArrayBuffer) { + bytes = Array.from(new Uint8Array(input)); + } else if (input instanceof Uint8Array) { + bytes = Array.from(input); + } else if ('buffer' in input && input.buffer instanceof ArrayBuffer) { + const view = input as ArrayBufferView; + bytes = Array.from(new Uint8Array(view.buffer, view.byteOffset || 0, view.byteLength)); + } else if (Array.isArray(input)) { + bytes = input as number[]; + } + } + + // Handle streaming mode - prepend pending bytes + if (this.pendingBytes.length > 0) { + bytes = [...this.pendingBytes, ...bytes]; + this.pendingBytes = []; + } + + let result = ''; + let i = 0; + + // Skip BOM if present and not ignored + if (!this.ignoreBOM && bytes.length >= 3) { + if (bytes[0] === 0xef && bytes[1] === 0xbb && bytes[2] === 0xbf) { + i = 3; // Skip UTF-8 BOM + } + } + + while (i < bytes.length) { + const byte1 = bytes[i]; + + // Determine the number of bytes in this character + let bytesNeeded: number; + let codePoint: number; + + if ((byte1 & 0x80) === 0) { + // 1-byte sequence (0xxxxxxx) - ASCII + codePoint = byte1; + bytesNeeded = 1; + } else if ((byte1 & 0xe0) === 0xc0) { + // 2-byte sequence (110xxxxx 10xxxxxx) + bytesNeeded = 2; + codePoint = byte1 & 0x1f; + } else if ((byte1 & 0xf0) === 0xe0) { + // 3-byte sequence (1110xxxx 10xxxxxx 10xxxxxx) + bytesNeeded = 3; + codePoint = byte1 & 0x0f; + } else if ((byte1 & 0xf8) === 0xf0) { + // 4-byte sequence (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx) + bytesNeeded = 4; + codePoint = byte1 & 0x07; + } else { + // Invalid start byte + if (this.fatal) { + throw new TypeError(`Invalid UTF-8 sequence start byte: 0x${byte1.toString(16)}`); + } + result += '\uFFFD'; // Replacement character + i++; + continue; + } + + // Check if we have enough bytes + if (i + bytesNeeded > bytes.length) { + // Incomplete sequence at end + if (options.stream) { + // Save pending bytes for next call + this.pendingBytes = bytes.slice(i); + break; + } else { + // Not streaming - this is an error + if (this.fatal) { + throw new TypeError('Incomplete UTF-8 sequence at end of input'); + } + result += '\uFFFD'; + break; + } + } + + // Read continuation bytes + let valid = true; + for (let j = 1; j < bytesNeeded; j++) { + const byte = bytes[i + j]; + if ((byte & 0xc0) !== 0x80) { + // Invalid continuation byte + valid = false; + break; + } + codePoint = (codePoint << 6) | (byte & 0x3f); + } + + if (!valid) { + if (this.fatal) { + throw new TypeError('Invalid UTF-8 continuation byte'); + } + result += '\uFFFD'; + i++; + continue; + } + + // Validate code point + if (bytesNeeded === 2 && codePoint < 0x80) { + // Overlong encoding + valid = false; + } else if (bytesNeeded === 3 && codePoint < 0x800) { + // Overlong encoding + valid = false; + } else if (bytesNeeded === 4 && codePoint < 0x10000) { + // Overlong encoding + valid = false; + } else if (codePoint > 0x10ffff) { + // Code point out of Unicode range + valid = false; + } else if (codePoint >= 0xd800 && codePoint <= 0xdfff) { + // UTF-16 surrogate - invalid in UTF-8 + valid = false; + } + + if (!valid) { + if (this.fatal) { + throw new TypeError(`Invalid UTF-8 code point: 0x${codePoint.toString(16)}`); + } + result += '\uFFFD'; + i += bytesNeeded; + continue; + } + + // Convert code point to string + if (codePoint < 0x10000) { + // BMP character + result += String.fromCharCode(codePoint); + } else { + // Supplementary character - use surrogate pair + codePoint -= 0x10000; + const high = 0xd800 + (codePoint >> 10); + const low = 0xdc00 + (codePoint & 0x3ff); + result += String.fromCharCode(high, low); + } + + i += bytesNeeded; + } + + return result; + } +} \ No newline at end of file diff --git a/TextEncoderPolyfill.ts b/TextEncoderPolyfill.ts new file mode 100644 index 0000000..35ceee1 --- /dev/null +++ b/TextEncoderPolyfill.ts @@ -0,0 +1,180 @@ +/* eslint-disable no-bitwise */ +/** + * TextEncoder Polyfill Implementation + * + * React Native compatible UTF-8 text encoder + * Based on the Encoding Standard: https://encoding.spec.whatwg.org/ + * + * @module text-encoding@1.0.0 + */ + +import type { ITextEncoder, TextEncoderEncodeIntoResult } from './interfaces'; + +export class TextEncoderPolyfill implements ITextEncoder { + readonly encoding = 'utf-8'; + + /** + * Encode a string to UTF-8 bytes + * Handles multi-byte characters and surrogate pairs + */ + encode(input: string = ''): Uint8Array { + const bytes: number[] = []; + + for (let i = 0; i < input.length; i++) { + let charCode = input.charCodeAt(i); + + // Handle UTF-16 surrogate pairs (for characters > 0xFFFF) + if (charCode >= 0xd800 && charCode <= 0xdbff) { + // High surrogate + if (i + 1 < input.length) { + const low = input.charCodeAt(i + 1); + if (low >= 0xdc00 && low <= 0xdfff) { + // Valid surrogate pair + charCode = 0x10000 + ((charCode - 0xd800) << 10) + (low - 0xdc00); + i++; // Skip the low surrogate + } else { + // Invalid surrogate pair - encode replacement character + bytes.push(0xef, 0xbf, 0xbd); + continue; + } + } else { + // Incomplete surrogate pair at end of string + bytes.push(0xef, 0xbf, 0xbd); + continue; + } + } else if (charCode >= 0xdc00 && charCode <= 0xdfff) { + // Lone low surrogate - invalid + bytes.push(0xef, 0xbf, 0xbd); + continue; + } + + // Encode the character to UTF-8 + if (charCode < 0x80) { + // 1-byte sequence (ASCII) + bytes.push(charCode); + } else if (charCode < 0x800) { + // 2-byte sequence + bytes.push(0xc0 | (charCode >> 6), 0x80 | (charCode & 0x3f)); + } else if (charCode < 0x10000) { + // 3-byte sequence + bytes.push( + 0xe0 | (charCode >> 12), + 0x80 | ((charCode >> 6) & 0x3f), + 0x80 | (charCode & 0x3f), + ); + } else if (charCode < 0x110000) { + // 4-byte sequence + bytes.push( + 0xf0 | (charCode >> 18), + 0x80 | ((charCode >> 12) & 0x3f), + 0x80 | ((charCode >> 6) & 0x3f), + 0x80 | (charCode & 0x3f), + ); + } else { + // Invalid Unicode code point - encode replacement character + bytes.push(0xef, 0xbf, 0xbd); + } + } + + return new Uint8Array(bytes); + } + + /** + * Encode string into existing Uint8Array + * Returns how many characters were read and bytes written + */ + encodeInto(source: string, destination: Uint8Array): TextEncoderEncodeIntoResult { + let read = 0; + let written = 0; + const destLength = destination.length; + + for (let i = 0; i < source.length && written < destLength; i++) { + let charCode = source.charCodeAt(i); + + // Calculate bytes needed for this character + let bytesNeeded = 1; + + // Handle surrogate pairs + if (charCode >= 0xd800 && charCode <= 0xdbff) { + if (i + 1 < source.length) { + const low = source.charCodeAt(i + 1); + if (low >= 0xdc00 && low <= 0xdfff) { + charCode = 0x10000 + ((charCode - 0xd800) << 10) + (low - 0xdc00); + bytesNeeded = 4; + } else { + bytesNeeded = 3; // Replacement character + } + } else { + bytesNeeded = 3; // Replacement character + } + } else if (charCode >= 0xdc00 && charCode <= 0xdfff) { + bytesNeeded = 3; // Replacement character + } else if (charCode < 0x80) { + bytesNeeded = 1; + } else if (charCode < 0x800) { + bytesNeeded = 2; + } else if (charCode < 0x10000) { + bytesNeeded = 3; + } else if (charCode < 0x110000) { + bytesNeeded = 4; + } else { + bytesNeeded = 3; // Replacement character + } + + // Check if we have space + if (written + bytesNeeded > destLength) { + break; + } + + // Write the bytes + if (charCode >= 0xd800 && charCode <= 0xdbff) { + if (i + 1 < source.length) { + const low = source.charCodeAt(i + 1); + if (low >= 0xdc00 && low <= 0xdfff) { + // Valid surrogate pair + const codePoint = 0x10000 + ((charCode - 0xd800) << 10) + (low - 0xdc00); + destination[written++] = 0xf0 | (codePoint >> 18); + destination[written++] = 0x80 | ((codePoint >> 12) & 0x3f); + destination[written++] = 0x80 | ((codePoint >> 6) & 0x3f); + destination[written++] = 0x80 | (codePoint & 0x3f); + i++; // Skip low surrogate + read += 2; + continue; + } + } + // Invalid surrogate - write replacement + destination[written++] = 0xef; + destination[written++] = 0xbf; + destination[written++] = 0xbd; + } else if (charCode >= 0xdc00 && charCode <= 0xdfff) { + // Lone low surrogate - write replacement + destination[written++] = 0xef; + destination[written++] = 0xbf; + destination[written++] = 0xbd; + } else if (charCode < 0x80) { + destination[written++] = charCode; + } else if (charCode < 0x800) { + destination[written++] = 0xc0 | (charCode >> 6); + destination[written++] = 0x80 | (charCode & 0x3f); + } else if (charCode < 0x10000) { + destination[written++] = 0xe0 | (charCode >> 12); + destination[written++] = 0x80 | ((charCode >> 6) & 0x3f); + destination[written++] = 0x80 | (charCode & 0x3f); + } else if (charCode < 0x110000) { + destination[written++] = 0xf0 | (charCode >> 18); + destination[written++] = 0x80 | ((charCode >> 12) & 0x3f); + destination[written++] = 0x80 | ((charCode >> 6) & 0x3f); + destination[written++] = 0x80 | (charCode & 0x3f); + } else { + // Invalid code point - write replacement + destination[written++] = 0xef; + destination[written++] = 0xbf; + destination[written++] = 0xbd; + } + + read++; + } + + return { read, written }; + } +} \ No newline at end of file diff --git a/TextEncodingFactory.ts b/TextEncodingFactory.ts new file mode 100644 index 0000000..f21aa0b --- /dev/null +++ b/TextEncodingFactory.ts @@ -0,0 +1,132 @@ +/** + * Text Encoding Factory + * + * Creates encoder/decoder instances with automatic polyfill selection + * + * @module text-encoding@1.0.0 + */ + +import { TextDecoderPolyfill } from './TextDecoderPolyfill'; +import { TextEncoderPolyfill } from './TextEncoderPolyfill'; +import type { + ITextDecoder, + ITextEncoder, + ITextEncodingFactory, + TextDecoderOptions, +} from './interfaces'; + +export class TextEncodingFactory implements ITextEncodingFactory { + private static instance: TextEncodingFactory; + + /** + * Get factory singleton instance + */ + static getInstance(): TextEncodingFactory { + if (!TextEncodingFactory.instance) { + TextEncodingFactory.instance = new TextEncodingFactory(); + } + return TextEncodingFactory.instance; + } + + /** + * Create a new TextEncoder instance + * Uses native implementation if available, otherwise polyfill + */ + createEncoder(): ITextEncoder { + // @ts-expect-error - Check for native TextEncoder + if (typeof TextEncoder !== 'undefined') { + try { + // @ts-expect-error - Try to use native + return new TextEncoder(); + } catch (e) { + // Fall back to polyfill if native fails + console.warn('[TextEncodingFactory] Native TextEncoder failed, using polyfill:', e); + } + } + + return new TextEncoderPolyfill(); + } + + /** + * Create a new TextDecoder instance + * Uses native implementation if available, otherwise polyfill + */ + createDecoder(label: string = 'utf-8', options?: TextDecoderOptions): ITextDecoder { + // @ts-expect-error - Check for native TextDecoder + if (typeof TextDecoder !== 'undefined') { + try { + // @ts-expect-error - Try to use native + return new TextDecoder(label, options); + } catch (e) { + // Fall back to polyfill if native fails + console.warn('[TextEncodingFactory] Native TextDecoder failed, using polyfill:', e); + } + } + + return new TextDecoderPolyfill(label, options); + } + + /** + * Check if native TextEncoder/TextDecoder are available + */ + isNativelySupported(): boolean { + // @ts-expect-error - Check global scope + if (typeof TextEncoder === 'undefined' || typeof TextDecoder === 'undefined') { + return false; + } + + // Try to instantiate to make sure they work + try { + // @ts-expect-error + const encoder = new TextEncoder(); + // @ts-expect-error + const decoder = new TextDecoder(); + + // Basic functionality test + const testString = 'test'; + const encoded = encoder.encode(testString); + const decoded = decoder.decode(encoded); + + return decoded === testString; + } catch (_e) { + return false; + } + } +} + +/** + * Convenience function to create an encoder + */ +export function createTextEncoder(): ITextEncoder { + return TextEncodingFactory.getInstance().createEncoder(); +} + +/** + * Convenience function to create a decoder + */ +export function createTextDecoder(label?: string, options?: TextDecoderOptions): ITextDecoder { + return TextEncodingFactory.getInstance().createDecoder(label, options); +} + +/** + * Install polyfills globally if not present + * This makes TextEncoder/TextDecoder available everywhere + */ +export function installTextEncodingPolyfills(): void { + // @ts-expect-error + if (typeof global !== 'undefined') { + // @ts-expect-error + if (typeof global.TextEncoder === 'undefined') { + // @ts-expect-error + global.TextEncoder = TextEncoderPolyfill; + console.info('[TextEncodingFactory] Installed TextEncoder polyfill globally'); + } + + // @ts-expect-error + if (typeof global.TextDecoder === 'undefined') { + // @ts-expect-error + global.TextDecoder = TextDecoderPolyfill; + console.info('[TextEncodingFactory] Installed TextDecoder polyfill globally'); + } + } +} \ No newline at end of file diff --git a/TextEncodingService.ts b/TextEncodingService.ts new file mode 100644 index 0000000..bc729e2 --- /dev/null +++ b/TextEncodingService.ts @@ -0,0 +1,89 @@ +/** + * Text Encoding Service Implementation + * + * Provides shared encoder/decoder instances with automatic polyfill fallback + * + * @module text-encoding@1.0.0 + */ + +import type { ITextDecoder, ITextEncoder, ITextEncodingService } from './interfaces'; +import { TextDecoderPolyfill } from './TextDecoderPolyfill'; +import { TextEncoderPolyfill } from './TextEncoderPolyfill'; + +export class TextEncodingService implements ITextEncodingService { + readonly encoder: ITextEncoder; + readonly decoder: ITextDecoder; + + constructor() { + // Check for native support and use it if available + if (this.hasNativeSupport()) { + // Use native implementations if available + // @ts-expect-error - TextEncoder might exist globally + this.encoder = + typeof TextEncoder !== 'undefined' ? new TextEncoder() : new TextEncoderPolyfill(); + + // @ts-expect-error - TextDecoder might exist globally + this.decoder = + typeof TextDecoder !== 'undefined' ? new TextDecoder() : new TextDecoderPolyfill(); + } else { + // Use polyfills + this.encoder = new TextEncoderPolyfill(); + this.decoder = new TextDecoderPolyfill(); + } + } + + /** + * Check if native TextEncoder/TextDecoder are available + */ + private hasNativeSupport(): boolean { + // @ts-expect-error - Check global scope + return typeof TextEncoder !== 'undefined' && typeof TextDecoder !== 'undefined'; + } + + /** + * Encode string to UTF-8 bytes + */ + encode(text: string): Uint8Array { + return this.encoder.encode(text); + } + + /** + * Decode bytes to string + */ + decode(bytes: Uint8Array | ArrayBuffer | number[]): string { + if (Array.isArray(bytes)) { + // Convert number array to Uint8Array + return this.decoder.decode(new Uint8Array(bytes)); + } + return this.decoder.decode(bytes as Uint8Array | ArrayBuffer); + } + + /** + * Convenience method: string to UTF-8 + */ + stringToUtf8(text: string): Uint8Array { + return this.encode(text); + } + + /** + * Convenience method: UTF-8 to string + */ + utf8ToString(bytes: Uint8Array | number[]): string { + return this.decode(bytes); + } +} + +/** + * Singleton instance for shared use + */ +let serviceInstance: TextEncodingService | null = null; + +/** + * Get or create the singleton service instance + */ +export function getTextEncodingService(): ITextEncodingService { + if (!serviceInstance) { + serviceInstance = new TextEncodingService(); + } + return serviceInstance; +} \ No newline at end of file diff --git a/index.d.ts b/index.d.ts new file mode 100644 index 0000000..bb1eadc --- /dev/null +++ b/index.d.ts @@ -0,0 +1,220 @@ +/** + * Type declarations for @metatrom/text-encoding + * Self-contained for IOR type generation + */ + +declare module '@metatrom/text-encoding' { + /** + * TextEncoder interface - encodes strings to UTF-8 byte arrays + */ + export interface ITextEncoder { + /** + * The encoding format (always 'utf-8' for this implementation) + */ + readonly encoding: string; + + /** + * Encode a string into a Uint8Array using UTF-8 encoding + * @param input The string to encode + * @returns UTF-8 encoded byte array + */ + encode(input?: string): Uint8Array; + + /** + * Encode a string into an existing Uint8Array (partial encoding) + * @param source The string to encode + * @param destination The array to write to + * @returns Object with read (chars) and written (bytes) counts + */ + encodeInto?(source: string, destination: Uint8Array): TextEncoderEncodeIntoResult; + } + + /** + * TextDecoder interface - decodes UTF-8 byte arrays to strings + */ + export interface ITextDecoder { + /** + * The encoding format (always 'utf-8' for this implementation) + */ + readonly encoding: string; + + /** + * Whether to throw on invalid sequences (vs replacement char) + */ + readonly fatal: boolean; + + /** + * Whether to ignore BOM (byte order mark) + */ + readonly ignoreBOM: boolean; + + /** + * Decode a byte array into a string using UTF-8 decoding + * @param input The bytes to decode (Uint8Array or ArrayBuffer) + * @param options Decoding options + * @returns Decoded string + */ + decode(input?: ArrayBufferView | ArrayBuffer, options?: TextDecodeOptions): string; + } + + /** + * Result of encodeInto operation + */ + export interface TextEncoderEncodeIntoResult { + /** + * Number of UTF-16 code units read from source + */ + read: number; + + /** + * Number of bytes written to destination + */ + written: number; + } + + /** + * Options for text decoding + */ + export interface TextDecodeOptions { + /** + * Whether this is a streaming decode (more data coming) + */ + stream?: boolean; + } + + /** + * Options for TextDecoder constructor + */ + export interface TextDecoderOptions { + /** + * If true, throw on invalid byte sequences + */ + fatal?: boolean; + + /** + * If true, ignore byte order mark + */ + ignoreBOM?: boolean; + } + + /** + * Factory for creating encoder/decoder instances + */ + export interface ITextEncodingFactory { + /** + * Create a new TextEncoder instance + */ + createEncoder(): ITextEncoder; + + /** + * Create a new TextDecoder instance + * @param label Encoding label (default 'utf-8') + * @param options Decoder options + */ + createDecoder(label?: string, options?: TextDecoderOptions): ITextDecoder; + + /** + * Check if TextEncoder/TextDecoder are natively available + */ + isNativelySupported(): boolean; + } + + /** + * Text encoding service for shared instances + */ + export interface ITextEncodingService { + /** + * Shared encoder instance + */ + readonly encoder: ITextEncoder; + + /** + * Shared decoder instance + */ + readonly decoder: ITextDecoder; + + /** + * Encode string to bytes + */ + encode(text: string): Uint8Array; + + /** + * Decode bytes to string + */ + decode(bytes: Uint8Array | ArrayBuffer | number[]): string; + + /** + * Convert string to UTF-8 byte array (convenience method) + */ + stringToUtf8(text: string): Uint8Array; + + /** + * Convert UTF-8 byte array to string (convenience method) + */ + utf8ToString(bytes: Uint8Array | number[]): string; + } + + /** + * TextEncoder Polyfill Implementation + */ + export declare class TextEncoderPolyfill implements ITextEncoder { + readonly encoding: string; + encode(input?: string): Uint8Array; + encodeInto(source: string, destination: Uint8Array): TextEncoderEncodeIntoResult; + } + + /** + * TextDecoder Polyfill Implementation + */ + export declare class TextDecoderPolyfill implements ITextDecoder { + readonly encoding: string; + readonly fatal: boolean; + readonly ignoreBOM: boolean; + + constructor(label?: string, options?: TextDecoderOptions); + decode(input?: ArrayBufferView | ArrayBuffer | null, options?: TextDecodeOptions): string; + } + + /** + * Text Encoding Factory + */ + export declare class TextEncodingFactory implements ITextEncodingFactory { + static getInstance(): TextEncodingFactory; + createEncoder(): ITextEncoder; + createDecoder(label?: string, options?: TextDecoderOptions): ITextDecoder; + isNativelySupported(): boolean; + } + + /** + * Text Encoding Service Implementation + */ + export declare class TextEncodingService implements ITextEncodingService { + readonly encoder: ITextEncoder; + readonly decoder: ITextDecoder; + + encode(text: string): Uint8Array; + decode(bytes: Uint8Array | ArrayBuffer | number[]): string; + stringToUtf8(text: string): Uint8Array; + utf8ToString(bytes: Uint8Array | number[]): string; + } + + /** + * Factory functions + */ + export function createTextEncoder(): ITextEncoder; + export function createTextDecoder(label?: string, options?: TextDecoderOptions): ITextDecoder; + export function installTextEncodingPolyfills(): void; + export function getTextEncodingService(): ITextEncodingService; + + /** + * Default shared text encoding service + * Use this for most encoding/decoding needs + */ + export const textEncoding: ITextEncodingService; + + /** + * Version information + */ + export const VERSION: string; + export const COMPONENT_NAME: string; +} \ No newline at end of file diff --git a/index.ts b/index.ts new file mode 100644 index 0000000..24efebc --- /dev/null +++ b/index.ts @@ -0,0 +1,46 @@ +/** + * Text Encoding Module + * + * Provides UTF-8 text encoding/decoding for React Native with automatic polyfill support. + * Compatible with the standard TextEncoder/TextDecoder Web API. + * + * @module text-encoding@1.0.0 + */ + +// Export factory and helpers +export { + createTextDecoder, + createTextEncoder, + installTextEncodingPolyfills, + TextEncodingFactory, +} from './TextEncodingFactory'; +export { TextDecoderPolyfill } from './TextDecoderPolyfill'; + +// Export implementations (for advanced usage) +export { TextEncoderPolyfill } from './TextEncoderPolyfill'; +export { + getTextEncodingService, + TextEncodingService, +} from './TextEncodingService'; + +// Create and export default service instance +import { getTextEncodingService } from './TextEncodingService'; + +/** + * Default shared text encoding service + * Use this for most encoding/decoding needs + */ +export const textEncoding = getTextEncodingService(); + +// Version information +export const VERSION = '1.0.0'; +export const COMPONENT_NAME = 'text-encoding'; + +// Auto-install polyfills on import (for React Native) +import { installTextEncodingPolyfills } from './TextEncodingFactory'; + +// Only install in React Native environment +// @ts-expect-error +if (typeof global !== 'undefined' && !global.window) { + installTextEncodingPolyfills(); +} \ No newline at end of file diff --git a/interfaces.ts b/interfaces.ts new file mode 100644 index 0000000..f576769 --- /dev/null +++ b/interfaces.ts @@ -0,0 +1,158 @@ +/** + * Text Encoding Interface Definitions + * + * Provides UTF-8 text encoding/decoding capabilities for React Native + * Compatible with the standard TextEncoder/TextDecoder Web API + * + * @module text-encoding@1.0.0/interfaces + */ + +/** + * TextEncoder interface - encodes strings to UTF-8 byte arrays + */ +export interface ITextEncoder { + /** + * The encoding format (always 'utf-8' for this implementation) + */ + readonly encoding: string; + + /** + * Encode a string into a Uint8Array using UTF-8 encoding + * @param input The string to encode + * @returns UTF-8 encoded byte array + */ + encode(input?: string): Uint8Array; + + /** + * Encode a string into an existing Uint8Array (partial encoding) + * @param source The string to encode + * @param destination The array to write to + * @returns Object with read (chars) and written (bytes) counts + */ + encodeInto?(source: string, destination: Uint8Array): TextEncoderEncodeIntoResult; +} + +/** + * TextDecoder interface - decodes UTF-8 byte arrays to strings + */ +export interface ITextDecoder { + /** + * The encoding format (always 'utf-8' for this implementation) + */ + readonly encoding: string; + + /** + * Whether to throw on invalid sequences (vs replacement char) + */ + readonly fatal: boolean; + + /** + * Whether to ignore BOM (byte order mark) + */ + readonly ignoreBOM: boolean; + + /** + * Decode a byte array into a string using UTF-8 decoding + * @param input The bytes to decode (Uint8Array or ArrayBuffer) + * @param options Decoding options + * @returns Decoded string + */ + decode(input?: ArrayBufferView | ArrayBuffer, options?: TextDecodeOptions): string; +} + +/** + * Result of encodeInto operation + */ +export interface TextEncoderEncodeIntoResult { + /** + * Number of UTF-16 code units read from source + */ + read: number; + + /** + * Number of bytes written to destination + */ + written: number; +} + +/** + * Options for text decoding + */ +export interface TextDecodeOptions { + /** + * Whether this is a streaming decode (more data coming) + */ + stream?: boolean; +} + +/** + * Options for TextDecoder constructor + */ +export interface TextDecoderOptions { + /** + * If true, throw on invalid byte sequences + */ + fatal?: boolean; + + /** + * If true, ignore byte order mark + */ + ignoreBOM?: boolean; +} + +/** + * Factory for creating encoder/decoder instances + */ +export interface ITextEncodingFactory { + /** + * Create a new TextEncoder instance + */ + createEncoder(): ITextEncoder; + + /** + * Create a new TextDecoder instance + * @param label Encoding label (default 'utf-8') + * @param options Decoder options + */ + createDecoder(label?: string, options?: TextDecoderOptions): ITextDecoder; + + /** + * Check if TextEncoder/TextDecoder are natively available + */ + isNativelySupported(): boolean; +} + +/** + * Text encoding service for shared instances + */ +export interface ITextEncodingService { + /** + * Shared encoder instance + */ + readonly encoder: ITextEncoder; + + /** + * Shared decoder instance + */ + readonly decoder: ITextDecoder; + + /** + * Encode string to bytes + */ + encode(text: string): Uint8Array; + + /** + * Decode bytes to string + */ + decode(bytes: Uint8Array | ArrayBuffer | number[]): string; + + /** + * Convert string to UTF-8 byte array (convenience method) + */ + stringToUtf8(text: string): Uint8Array; + + /** + * Convert UTF-8 byte array to string (convenience method) + */ + utf8ToString(bytes: Uint8Array | number[]): string; +} \ No newline at end of file diff --git a/package.json b/package.json new file mode 100644 index 0000000..1c18e48 --- /dev/null +++ b/package.json @@ -0,0 +1,22 @@ +{ + "name": "@metatrom/text-encoding", + "version": "1.0.0", + "main": "index.ts", + "type": "module", + "description": "Text encoding/decoding utilities with polyfills for React Native", + "repository": { + "type": "git", + "url": "git@gitea.metatrom.net:universal-components/text-encoding.git" + }, + "metatrom": { + "ior": "ior:gitea:gitea.metatrom.net:universal-components/text-encoding@1.0.0", + "capabilities": { + "p2p": false, + "contracts": false, + "viewer": false, + "sync": false + } + }, + "dependencies": {}, + "optionalDependencies": {} +} \ No newline at end of file