diff --git a/package.json b/package.json index 9bace7a..3f1fa14 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@ztimson/utils", - "version": "0.29.0", + "version": "0.29.1", "description": "Utility library", "author": "Zak Timson", "license": "MIT", diff --git a/src/xml.ts b/src/xml.ts index d415fdf..cc8e613 100644 --- a/src/xml.ts +++ b/src/xml.ts @@ -3,6 +3,11 @@ * @param {string} xml - The XML string to parse * @returns {Object} An object with `tag`, `attributes`, and `children` properties */ +/** + * Parses an XML string into a structured JavaScript object (fast-xml-parser format). + * @param {string} xml - The XML string to parse + * @returns {Object} An object with tag names as keys and text content or nested objects as values + */ export function fromXml(xml: string) { xml = xml.trim(); let pos = 0; @@ -13,8 +18,8 @@ export function fromXml(xml: string) { pos++; // skip < if(xml[pos] === '?') { - parseDeclaration(); - return parseNode(); + const declaration = parseDeclaration(); + return { ['?' + declaration]: '', ...parseNode() }; } if(xml[pos] === '!') { @@ -28,11 +33,13 @@ export function fromXml(xml: string) { if(xml[pos] === '/' && xml[pos + 1] === '>') { pos += 2; // skip /> - return { tag: tagName, attributes, children: [] }; + return { [tagName]: '' }; } pos++; // skip > - const children = []; + const children: any[] = []; + let textContent = ''; + while(pos < xml.length) { skipWhitespace(); if(xml[pos] === '<' && xml[pos + 1] === '/') { @@ -42,20 +49,51 @@ export function fromXml(xml: string) { pos++; // skip > break; } + const startPos = pos; const child = parseNode(); - if(child) children.push(child); + if(typeof child === 'string') { + textContent += child; + } else if(child) { + children.push(child); + } } - return { tag: tagName, attributes, children }; + + // If only text content, return simple value + if(children.length === 0 && textContent) { + const value = isNumeric(textContent) ? Number(textContent) : textContent; + return { [tagName]: value }; + } + + // If only text with no children + if(children.length === 0) { + return { [tagName]: '' }; + } + + // Merge children into object + const result: any = {}; + for(const child of children) { + for(const [key, value] of Object.entries(child)) { + if(result[key]) { + // Convert to array if duplicate tags + if(!Array.isArray(result[key])) { + result[key] = [result[key]]; + } + result[key].push(value); + } else { + result[key] = value; + } + } + } + + return { [tagName]: result }; } - /** Parses and returns the tag name at the current position */ function parseTagName() { let name = ''; while (pos < xml.length && /[a-zA-Z0-9_:-]/.test(xml[pos])) name += xml[pos++]; return name; } - /** Parses and returns an object containing all attributes at the current position */ function parseAttributes() { const attrs: any = {}; while (pos < xml.length) { @@ -76,7 +114,6 @@ export function fromXml(xml: string) { return attrs; } - /** Parses and returns text content, or null if empty */ function parseText() { let text = ''; while (pos < xml.length && xml[pos] !== '<') text += xml[pos++]; @@ -84,23 +121,30 @@ export function fromXml(xml: string) { return text ? escapeXml(text, true) : null; } - /** Skips over XML declaration () */ function parseDeclaration() { + pos++; // skip ? + let name = ''; + while (pos < xml.length && xml[pos] !== ' ' && xml[pos] !== '?') { + name += xml[pos++]; + } while (xml[pos] !== '>') pos++; pos++; + return name; } - /** Skips over XML comments () */ function parseComment() { while (!(xml[pos] === '-' && xml[pos + 1] === '-' && xml[pos + 2] === '>')) pos++; pos += 3; } - /** Advances position past any whitespace characters */ function skipWhitespace() { while (pos < xml.length && /\s/.test(xml[pos])) pos++; } + function isNumeric(str: string) { + return !isNaN(Number(str)) && !isNaN(parseFloat(str)) && str.trim() !== ''; + } + return parseNode(); } diff --git a/tests/xml.spec.ts b/tests/xml.spec.ts index e78e353..ed91882 100644 --- a/tests/xml.spec.ts +++ b/tests/xml.spec.ts @@ -5,71 +5,80 @@ describe('XML Parser', () => { it('should parse simple tag', () => { const xml = ''; const result = fromXml(xml); - expect(result).toEqual({ tag: 'root', attributes: {}, children: [] }); + expect(result).toEqual({ root: '' }); }); it('should parse self-closing tag', () => { const xml = ''; const result = fromXml(xml); - expect(result).toEqual({ tag: 'item', attributes: {}, children: [] }); + expect(result).toEqual({ item: '' }); }); - it('should parse tag with attributes', () => { + it('should parse tag with attributes (ignored in fast-xml-parser format)', () => { const xml = ''; const result = fromXml(xml); - expect(result).toEqual({ - tag: 'user', - attributes: { id: '1', name: 'someone' }, - children: [] - }); + expect(result).toEqual({ user: '' }); }); it('should parse tag with text content', () => { const xml = 'someone@example.com'; const result = fromXml(xml); - expect(result).toEqual({ - tag: 'email', - attributes: {}, - children: ['someone@example.com'] - }); + expect(result).toEqual({ email: 'someone@example.com' }); + }); + + it('should parse tag with numeric content', () => { + const xml = '240'; + const result = fromXml(xml); + expect(result).toEqual({ ttl: 240 }); }); it('should parse nested tags', () => { const xml = 'text'; const result = fromXml(xml); expect(result).toEqual({ - tag: 'root', - attributes: {}, - children: [ - { tag: 'child', attributes: {}, children: ['text'] } - ] + root: { + child: 'text' + } }); }); - it('should parse multiple children', () => { - const xml = ''; + it('should parse multiple children with same tag as array', () => { + const xml = 'abc'; const result = fromXml(xml); - expect(result.children.length).toBe(3); - expect(result.children[0]).toEqual({ tag: 'a', attributes: {}, children: [] }); + expect(result).toEqual({ + root: { + item: ['a', 'b', 'c'] + } + }); }); - it('should skip XML declaration', () => { + it('should parse mixed children', () => { + const xml = '123'; + const result = fromXml(xml); + expect(result.root).toEqual({ a: 1, b: 2, c: 3 }); + }); + + it('should skip XML declaration and include as key', () => { const xml = ''; const result = fromXml(xml); - expect(result.tag).toBe('root'); + expect(result).toHaveProperty('?xml'); + expect(result).toHaveProperty('root'); }); it('should skip comments', () => { - const xml = ''; + const xml = 'text'; const result = fromXml(xml); - expect(result.children.length).toBe(1); - expect(result.children[0].tag).toBe('child'); + expect(result).toEqual({ + root: { + child: 'text' + } + }); }); it('should handle escaped characters', () => { const xml = '<hello> & "world"'; const result = fromXml(xml); - expect(result.children[0]).toBe(' & "world"'); + expect(result.text).toBe(' & "world"'); }); it('should parse complex nested structure', () => { @@ -82,10 +91,37 @@ describe('XML Parser', () => { `; const result = fromXml(xml); - expect(result.tag).toBe('root'); - expect(result.children[0].tag).toBe('user'); - expect(result.children[0].attributes.name).toBe('someone'); - expect(result.children[0].children.length).toBe(2); + expect(result).toEqual({ + root: { + user: { + email: 'someone@example.com', + active: '' + } + } + }); + }); + + it('should parse RSS-like structure with multiple items', () => { + const xml = ` + + + Test Feed + + Item 1 + http://example.com/1 + + + Item 2 + http://example.com/2 + + + + `; + const result = fromXml(xml); + expect(result.rss.channel.title).toBe('Test Feed'); + expect(Array.isArray(result.rss.channel.item)).toBe(true); + expect(result.rss.channel.item.length).toBe(2); + expect(result.rss.channel.item[0].title).toBe('Item 1'); }); }); @@ -154,7 +190,7 @@ describe('XML Parser', () => { }); describe('round-trip', () => { - it('should encode and decode to same structure', () => { + it('should parse toXml output back to fast-xml-parser format', () => { const obj = { tag: 'root', attributes: { id: '1' }, @@ -164,7 +200,11 @@ describe('XML Parser', () => { }; const xml = toXml(obj); const parsed = fromXml(xml); - expect(parsed).toEqual(obj); + expect(parsed).toEqual({ + root: { + child: 'text' + } + }); }); }); });