Compare commits

..

4 Commits

Author SHA1 Message Date
d0af3a63bc Added to/from xml helpers
All checks were successful
Build / Publish Docs (push) Successful in 57s
Build / Build NPM Project (push) Successful in 1m7s
Build / Tag Version (push) Successful in 13s
2026-04-04 17:00:10 -04:00
bf73d2670b Added to/from xml helpers
Some checks failed
Build / Tag Version (push) Has been cancelled
Build / Publish Docs (push) Has been cancelled
Build / Build NPM Project (push) Has been cancelled
2026-04-04 16:58:53 -04:00
361613f507 Added decodeHTML
All checks were successful
Build / Publish Docs (push) Successful in 1m6s
Build / Build NPM Project (push) Successful in 1m10s
Build / Tag Version (push) Successful in 11s
2026-03-29 22:33:21 -04:00
681c89d5af Fixed fromCSV single quotes
All checks were successful
Build / Publish Docs (push) Successful in 58s
Build / Build NPM Project (push) Successful in 55s
Build / Tag Version (push) Successful in 9s
2026-03-13 01:34:56 -04:00
6 changed files with 384 additions and 19 deletions

View File

@@ -1,6 +1,6 @@
{
"name": "@ztimson/utils",
"version": "0.28.14",
"version": "0.28.17",
"description": "Utility library",
"author": "Zak Timson",
"license": "MIT",

View File

@@ -14,41 +14,55 @@ import {LETTER_LIST} from './string.ts';
export function fromCsv<T = any>(csv: string, hasHeaders = true): T[] {
function parseLine(line: string): (string | null)[] {
const columns: string[] = [];
let current = '', inQuotes = false;
let current = '', inQuotes = false, quoteChar: string | null = null;
for (let i = 0; i < line.length; i++) {
const char = line[i];
const nextChar = line[i + 1];
if (char === '"') {
if (inQuotes && nextChar === '"') {
current += '"'; // Handle escaped quotes
if ((char === '"' || char === "'") && !inQuotes) {
inQuotes = true;
quoteChar = char;
} else if (char === quoteChar && inQuotes) {
if (nextChar === quoteChar) {
current += quoteChar; // Handle escaped quotes
i++;
} else inQuotes = !inQuotes;
} else {
inQuotes = false;
quoteChar = null;
}
} else if (char === ',' && !inQuotes) {
columns.push(current.trim()); // Trim column values
columns.push(current.trim());
current = '';
} else current += char;
}
columns.push(current.trim()); // Trim last column value
return columns.map(col => col.replace(/^"|"$/g, '').replace(/""/g, '"'));
columns.push(current.trim());
return columns.map(col => {
// Remove surrounding quotes (both " and ')
col = col.replace(/^["']|["']$/g, '');
// Unescape doubled quotes
return col.replace(/""/g, '"').replace(/''/g, "'");
});
}
// Normalize line endings and split rows
const rows = [];
let currentRow = '', inQuotes = false;
for (const char of csv.replace(/\r\n/g, '\n')) { // Normalize \r\n to \n
if (char === '"') inQuotes = !inQuotes;
let currentRow = '', inQuotes = false, quoteChar: string | null = null;
for (const char of csv.replace(/\r\n/g, '\n')) {
if ((char === '"' || char === "'") && !inQuotes) {
inQuotes = true;
quoteChar = char;
} else if (char === quoteChar && inQuotes) {
inQuotes = false;
quoteChar = null;
}
if (char === '\n' && !inQuotes) {
rows.push(currentRow.trim()); // Trim row
rows.push(currentRow.trim());
currentRow = '';
} else currentRow += char;
}
if (currentRow) rows.push(currentRow.trim()); // Trim last row
if (currentRow) rows.push(currentRow.trim());
// Extract headers
let headers: any = hasHeaders ? rows.splice(0, 1)[0] : null;
if (headers) headers = headers.match(/(?:[^,"']+|"(?:[^"]|"")*"|'(?:[^']|'')*')+/g)?.map((h: any) => h.trim());
// Parse rows
return <T[]>rows.map(r => {
const props = parseLine(r);
const h = headers || (Array(props.length).fill(null).map((_, i) => {
@@ -65,7 +79,6 @@ export function fromCsv<T = any>(csv: string, hasHeaders = true): T[] {
});
}
/**
* Convert an array of objects to a CSV string
*

View File

@@ -1,3 +1,5 @@
export * from 'var-persist';
export * from './arg-parser';
export * from './array';
export * from './aset';
@@ -23,4 +25,4 @@ export * from './template';
export * from './time';
export * from './tts';
export * from './types';
export * from 'var-persist';
export * from './xml';

View File

@@ -27,6 +27,31 @@ export function camelCase(str?: string): string {
return pascal.charAt(0).toLowerCase() + pascal.slice(1);
}
/**
* Decode HTML escaped characters
* @param html HTML to clean up
* @returns {any}
*/
export function decodeHtml(html: string) {
return html
.replace(/&nbsp;/g, '\u00A0')
.replace(/&quot;/g, '"')
.replace(/&apos;/g, "'")
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&cent;/g, '¢')
.replace(/&pound;/g, '£')
.replace(/&yen;/g, '¥')
.replace(/&euro;/g, '€')
.replace(/&copy;/g, '©')
.replace(/&reg;/g, '®')
.replace(/&trade;/g, '™')
.replace(/&times;/g, '×')
.replace(/&divide;/g, '÷')
.replace(/&#(\d+);/g, (match, dec) => String.fromCharCode(dec))
.replace(/&#x([0-9a-fA-F]+);/g, (match, hex) => String.fromCharCode(parseInt(hex, 16)))
.replace(/&amp;/g, '&'); // Always last!
}
/**
* Convert number of bytes into a human-readable size

155
src/xml.ts Normal file
View File

@@ -0,0 +1,155 @@
/**
* Parses an XML string into a structured JavaScript object.
* @param {string} xml - The XML string to parse
* @returns {Object} An object with `tag`, `attributes`, and `children` properties
*/
export function fromXml(xml: string) {
xml = xml.trim();
let pos = 0;
function parseNode(): any {
skipWhitespace();
if(xml[pos] !== '<') return parseText();
pos++; // skip <
if(xml[pos] === '?') {
parseDeclaration();
return parseNode();
}
if(xml[pos] === '!') {
parseComment();
return parseNode();
}
const tagName = parseTagName();
const attributes = parseAttributes();
skipWhitespace();
if(xml[pos] === '/' && xml[pos + 1] === '>') {
pos += 2; // skip />
return { tag: tagName, attributes, children: [] };
}
pos++; // skip >
const children = [];
while(pos < xml.length) {
skipWhitespace();
if(xml[pos] === '<' && xml[pos + 1] === '/') {
pos += 2; // skip </
parseTagName(); // skip closing tag name
skipWhitespace();
pos++; // skip >
break;
}
const child = parseNode();
if(child) children.push(child);
}
return { tag: tagName, attributes, children };
}
/** Parses and returns the tag name at the current position */
function parseTagName() {
let name = '';
while (pos < xml.length && /[a-zA-Z0-9_:-]/.test(xml[pos])) name += xml[pos++];
return name;
}
/** Parses and returns an object containing all attributes at the current position */
function parseAttributes() {
const attrs: any = {};
while (pos < xml.length) {
skipWhitespace();
if (xml[pos] === '>' || xml[pos] === '/') break;
const name = parseTagName();
skipWhitespace();
if (xml[pos] === '=') {
pos++;
skipWhitespace();
const quote = xml[pos++];
let value = '';
while (xml[pos] !== quote) value += xml[pos++];
pos++; // skip closing quote
attrs[name] = escapeXml(value, true);
}
}
return attrs;
}
/** Parses and returns text content, or null if empty */
function parseText() {
let text = '';
while (pos < xml.length && xml[pos] !== '<') text += xml[pos++];
text = text.trim();
return text ? escapeXml(text, true) : null;
}
/** Skips over XML declaration (<?xml ... ?>) */
function parseDeclaration() {
while (xml[pos] !== '>') pos++;
pos++;
}
/** Skips over XML comments (<!-- ... -->) */
function parseComment() {
while (!(xml[pos] === '-' && xml[pos + 1] === '-' && xml[pos + 2] === '>')) pos++;
pos += 3;
}
/** Advances position past any whitespace characters */
function skipWhitespace() {
while (pos < xml.length && /\s/.test(xml[pos])) pos++;
}
return parseNode();
}
/**
* Converts a JavaScript object into an XML string.
* @param {Object} obj - Object with `tag`, `attributes`, and `children` properties, or a string
* @param {string} indent - Current indentation level (used internally for formatting)
* @returns {string} The formatted XML string
*/
export function toXml(obj: any, indent = '') {
if(typeof obj === 'string') return escapeXml(obj);
const { tag, attributes = {}, children = [] } = obj;
let xml = `${indent}<${tag}`;
for (const [key, value] of Object.entries(attributes))
xml += ` ${key}="${escapeXml(<any>value)}"`;
if (children.length === 0) {
xml += ' />';
return xml;
}
xml += '>';
const hasComplexChildren = children.some((c: any) => typeof c === 'object');
for (const child of children) {
if (hasComplexChildren) xml += '\n';
xml += toXml(child, hasComplexChildren ? indent + ' ' : '');
}
if(hasComplexChildren) xml += `\n${indent}`;
xml += `</${tag}>`;
return xml;
}
/**
* Escapes or unescapes XML special characters.
* @param {string} str - The string to process
* @param {boolean} decode - If true, decodes XML entities; if false, encodes special characters
* @returns {string} The processed string
*/
export function escapeXml(str: string, decode = false) {
if(decode) {
return str
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&apos;/g, "'")
.replace(/&amp;/g, '&');
}
return str
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&apos;');
}

170
tests/xml.spec.ts Normal file
View File

@@ -0,0 +1,170 @@
import { toXml, fromXml } from '../src';
describe('XML Parser', () => {
describe('fromXml', () => {
it('should parse simple tag', () => {
const xml = '<root></root>';
const result = fromXml(xml);
expect(result).toEqual({ tag: 'root', attributes: {}, children: [] });
});
it('should parse self-closing tag', () => {
const xml = '<item />';
const result = fromXml(xml);
expect(result).toEqual({ tag: 'item', attributes: {}, children: [] });
});
it('should parse tag with attributes', () => {
const xml = '<user id="1" name="someone" />';
const result = fromXml(xml);
expect(result).toEqual({
tag: 'user',
attributes: { id: '1', name: 'someone' },
children: []
});
});
it('should parse tag with text content', () => {
const xml = '<email>someone@example.com</email>';
const result = fromXml(xml);
expect(result).toEqual({
tag: 'email',
attributes: {},
children: ['someone@example.com']
});
});
it('should parse nested tags', () => {
const xml = '<root><child>text</child></root>';
const result = fromXml(xml);
expect(result).toEqual({
tag: 'root',
attributes: {},
children: [
{ tag: 'child', attributes: {}, children: ['text'] }
]
});
});
it('should parse multiple children', () => {
const xml = '<root><a /><b /><c /></root>';
const result = fromXml(xml);
expect(result.children.length).toBe(3);
expect(result.children[0]).toEqual({ tag: 'a', attributes: {}, children: [] });
});
it('should skip XML declaration', () => {
const xml = '<?xml version="1.0"?><root />';
const result = fromXml(xml);
expect(result.tag).toBe('root');
});
it('should skip comments', () => {
const xml = '<root><!-- comment --><child /></root>';
const result = fromXml(xml);
expect(result.children.length).toBe(1);
expect(result.children[0].tag).toBe('child');
});
it('should handle escaped characters', () => {
const xml = '<text>&lt;hello&gt; &amp; &quot;world&quot;</text>';
const result = fromXml(xml);
expect(result.children[0]).toBe('<hello> & "world"');
});
it('should parse complex nested structure', () => {
const xml = `
<root>
<user id="1" name="someone">
<email>someone@example.com</email>
<active />
</user>
</root>
`;
const result = fromXml(xml);
expect(result.tag).toBe('root');
expect(result.children[0].tag).toBe('user');
expect(result.children[0].attributes.name).toBe('someone');
expect(result.children[0].children.length).toBe(2);
});
});
describe('toXml', () => {
it('should encode simple tag', () => {
const obj = { tag: 'root', attributes: {}, children: [] };
expect(toXml(obj)).toBe('<root />');
});
it('should encode tag with attributes', () => {
const obj = { tag: 'user', attributes: { id: '1', name: 'someone' }, children: [] };
const result = toXml(obj);
expect(result).toContain('id="1"');
expect(result).toContain('name="someone"');
});
it('should encode tag with text content', () => {
const obj = { tag: 'email', attributes: {}, children: ['someone@example.com'] };
expect(toXml(obj)).toBe('<email>someone@example.com</email>');
});
it('should encode nested tags with indentation', () => {
const obj = {
tag: 'root',
attributes: {},
children: [
{ tag: 'child', attributes: {}, children: ['text'] }
]
};
const result = toXml(obj);
expect(result).toContain('<root>');
expect(result).toContain(' <child>');
expect(result).toContain('</root>');
});
it('should escape special characters', () => {
const obj = { tag: 'text', attributes: {}, children: ['<hello> & "world"'] };
const result = toXml(obj);
expect(result).toContain('&lt;hello&gt; &amp; &quot;world&quot;');
});
it('should escape attributes', () => {
const obj = { tag: 'node', attributes: { attr: 'a & b' }, children: [] };
const result = toXml(obj);
expect(result).toContain('attr="a &amp; b"');
});
it('should handle multiple children', () => {
const obj = {
tag: 'root',
attributes: {},
children: [
{ tag: 'a', attributes: {}, children: [] },
{ tag: 'b', attributes: {}, children: [] }
]
};
const result = toXml(obj);
expect(result).toContain('<a />');
expect(result).toContain('<b />');
});
it('should encode string directly', () => {
expect(toXml('hello')).toBe('hello');
expect(toXml('a & b')).toBe('a &amp; b');
});
});
describe('round-trip', () => {
it('should encode and decode to same structure', () => {
const obj = {
tag: 'root',
attributes: { id: '1' },
children: [
{ tag: 'child', attributes: {}, children: ['text'] }
]
};
const xml = toXml(obj);
const parsed = fromXml(xml);
expect(parsed).toEqual(obj);
});
});
});