Compare commits

..

3 Commits

Author SHA1 Message Date
e8f81bb584 Proper xml to json format
All checks were successful
Build / Publish Docs (push) Successful in 49s
Build / Build NPM Project (push) Successful in 56s
Build / Tag Version (push) Successful in 10s
2026-04-04 18:32:48 -04:00
4179b4010a Revert "added fast-xml-parser for testing"
Some checks failed
Build / Tag Version (push) Has been cancelled
Build / Build NPM Project (push) Has been cancelled
Build / Publish Docs (push) Has been cancelled
This reverts commit 15ac52b6a0.
2026-04-04 18:20:44 -04:00
15ac52b6a0 added fast-xml-parser for testing
Some checks failed
Build / Publish Docs (push) Failing after 16s
Build / Build NPM Project (push) Failing after 54s
Build / Tag Version (push) Has been skipped
2026-04-04 18:18:24 -04:00
3 changed files with 131 additions and 47 deletions

View File

@@ -1,6 +1,6 @@
{ {
"name": "@ztimson/utils", "name": "@ztimson/utils",
"version": "0.29.0", "version": "0.29.1",
"description": "Utility library", "description": "Utility library",
"author": "Zak Timson", "author": "Zak Timson",
"license": "MIT", "license": "MIT",

View File

@@ -3,6 +3,11 @@
* @param {string} xml - The XML string to parse * @param {string} xml - The XML string to parse
* @returns {Object} An object with `tag`, `attributes`, and `children` properties * @returns {Object} An object with `tag`, `attributes`, and `children` properties
*/ */
/**
* Parses an XML string into a structured JavaScript object (fast-xml-parser format).
* @param {string} xml - The XML string to parse
* @returns {Object} An object with tag names as keys and text content or nested objects as values
*/
export function fromXml(xml: string) { export function fromXml(xml: string) {
xml = xml.trim(); xml = xml.trim();
let pos = 0; let pos = 0;
@@ -13,8 +18,8 @@ export function fromXml(xml: string) {
pos++; // skip < pos++; // skip <
if(xml[pos] === '?') { if(xml[pos] === '?') {
parseDeclaration(); const declaration = parseDeclaration();
return parseNode(); return { ['?' + declaration]: '', ...parseNode() };
} }
if(xml[pos] === '!') { if(xml[pos] === '!') {
@@ -28,11 +33,13 @@ export function fromXml(xml: string) {
if(xml[pos] === '/' && xml[pos + 1] === '>') { if(xml[pos] === '/' && xml[pos + 1] === '>') {
pos += 2; // skip /> pos += 2; // skip />
return { tag: tagName, attributes, children: [] }; return { [tagName]: '' };
} }
pos++; // skip > pos++; // skip >
const children = []; const children: any[] = [];
let textContent = '';
while(pos < xml.length) { while(pos < xml.length) {
skipWhitespace(); skipWhitespace();
if(xml[pos] === '<' && xml[pos + 1] === '/') { if(xml[pos] === '<' && xml[pos + 1] === '/') {
@@ -42,20 +49,51 @@ export function fromXml(xml: string) {
pos++; // skip > pos++; // skip >
break; break;
} }
const startPos = pos;
const child = parseNode(); const child = parseNode();
if(child) children.push(child); if(typeof child === 'string') {
textContent += child;
} else if(child) {
children.push(child);
} }
return { tag: tagName, attributes, children };
} }
/** Parses and returns the tag name at the current position */ // If only text content, return simple value
if(children.length === 0 && textContent) {
const value = isNumeric(textContent) ? Number(textContent) : textContent;
return { [tagName]: value };
}
// If only text with no children
if(children.length === 0) {
return { [tagName]: '' };
}
// Merge children into object
const result: any = {};
for(const child of children) {
for(const [key, value] of Object.entries(child)) {
if(result[key]) {
// Convert to array if duplicate tags
if(!Array.isArray(result[key])) {
result[key] = [result[key]];
}
result[key].push(value);
} else {
result[key] = value;
}
}
}
return { [tagName]: result };
}
function parseTagName() { function parseTagName() {
let name = ''; let name = '';
while (pos < xml.length && /[a-zA-Z0-9_:-]/.test(xml[pos])) name += xml[pos++]; while (pos < xml.length && /[a-zA-Z0-9_:-]/.test(xml[pos])) name += xml[pos++];
return name; return name;
} }
/** Parses and returns an object containing all attributes at the current position */
function parseAttributes() { function parseAttributes() {
const attrs: any = {}; const attrs: any = {};
while (pos < xml.length) { while (pos < xml.length) {
@@ -76,7 +114,6 @@ export function fromXml(xml: string) {
return attrs; return attrs;
} }
/** Parses and returns text content, or null if empty */
function parseText() { function parseText() {
let text = ''; let text = '';
while (pos < xml.length && xml[pos] !== '<') text += xml[pos++]; while (pos < xml.length && xml[pos] !== '<') text += xml[pos++];
@@ -84,23 +121,30 @@ export function fromXml(xml: string) {
return text ? escapeXml(text, true) : null; return text ? escapeXml(text, true) : null;
} }
/** Skips over XML declaration (<?xml ... ?>) */
function parseDeclaration() { function parseDeclaration() {
pos++; // skip ?
let name = '';
while (pos < xml.length && xml[pos] !== ' ' && xml[pos] !== '?') {
name += xml[pos++];
}
while (xml[pos] !== '>') pos++; while (xml[pos] !== '>') pos++;
pos++; pos++;
return name;
} }
/** Skips over XML comments (<!-- ... -->) */
function parseComment() { function parseComment() {
while (!(xml[pos] === '-' && xml[pos + 1] === '-' && xml[pos + 2] === '>')) pos++; while (!(xml[pos] === '-' && xml[pos + 1] === '-' && xml[pos + 2] === '>')) pos++;
pos += 3; pos += 3;
} }
/** Advances position past any whitespace characters */
function skipWhitespace() { function skipWhitespace() {
while (pos < xml.length && /\s/.test(xml[pos])) pos++; while (pos < xml.length && /\s/.test(xml[pos])) pos++;
} }
function isNumeric(str: string) {
return !isNaN(Number(str)) && !isNaN(parseFloat(str)) && str.trim() !== '';
}
return parseNode(); return parseNode();
} }

View File

@@ -5,71 +5,80 @@ describe('XML Parser', () => {
it('should parse simple tag', () => { it('should parse simple tag', () => {
const xml = '<root></root>'; const xml = '<root></root>';
const result = fromXml(xml); const result = fromXml(xml);
expect(result).toEqual({ tag: 'root', attributes: {}, children: [] }); expect(result).toEqual({ root: '' });
}); });
it('should parse self-closing tag', () => { it('should parse self-closing tag', () => {
const xml = '<item />'; const xml = '<item />';
const result = fromXml(xml); const result = fromXml(xml);
expect(result).toEqual({ tag: 'item', attributes: {}, children: [] }); expect(result).toEqual({ item: '' });
}); });
it('should parse tag with attributes', () => { it('should parse tag with attributes (ignored in fast-xml-parser format)', () => {
const xml = '<user id="1" name="someone" />'; const xml = '<user id="1" name="someone" />';
const result = fromXml(xml); const result = fromXml(xml);
expect(result).toEqual({ expect(result).toEqual({ user: '' });
tag: 'user',
attributes: { id: '1', name: 'someone' },
children: []
});
}); });
it('should parse tag with text content', () => { it('should parse tag with text content', () => {
const xml = '<email>someone@example.com</email>'; const xml = '<email>someone@example.com</email>';
const result = fromXml(xml); const result = fromXml(xml);
expect(result).toEqual({ expect(result).toEqual({ email: 'someone@example.com' });
tag: 'email',
attributes: {},
children: ['someone@example.com']
}); });
it('should parse tag with numeric content', () => {
const xml = '<ttl>240</ttl>';
const result = fromXml(xml);
expect(result).toEqual({ ttl: 240 });
}); });
it('should parse nested tags', () => { it('should parse nested tags', () => {
const xml = '<root><child>text</child></root>'; const xml = '<root><child>text</child></root>';
const result = fromXml(xml); const result = fromXml(xml);
expect(result).toEqual({ expect(result).toEqual({
tag: 'root', root: {
attributes: {}, child: 'text'
children: [ }
{ tag: 'child', attributes: {}, children: ['text'] }
]
}); });
}); });
it('should parse multiple children', () => { it('should parse multiple children with same tag as array', () => {
const xml = '<root><a /><b /><c /></root>'; const xml = '<root><item>a</item><item>b</item><item>c</item></root>';
const result = fromXml(xml); const result = fromXml(xml);
expect(result.children.length).toBe(3); expect(result).toEqual({
expect(result.children[0]).toEqual({ tag: 'a', attributes: {}, children: [] }); root: {
item: ['a', 'b', 'c']
}
});
}); });
it('should skip XML declaration', () => { it('should parse mixed children', () => {
const xml = '<root><a>1</a><b>2</b><c>3</c></root>';
const result = fromXml(xml);
expect(result.root).toEqual({ a: 1, b: 2, c: 3 });
});
it('should skip XML declaration and include as key', () => {
const xml = '<?xml version="1.0"?><root />'; const xml = '<?xml version="1.0"?><root />';
const result = fromXml(xml); const result = fromXml(xml);
expect(result.tag).toBe('root'); expect(result).toHaveProperty('?xml');
expect(result).toHaveProperty('root');
}); });
it('should skip comments', () => { it('should skip comments', () => {
const xml = '<root><!-- comment --><child /></root>'; const xml = '<root><!-- comment --><child>text</child></root>';
const result = fromXml(xml); const result = fromXml(xml);
expect(result.children.length).toBe(1); expect(result).toEqual({
expect(result.children[0].tag).toBe('child'); root: {
child: 'text'
}
});
}); });
it('should handle escaped characters', () => { it('should handle escaped characters', () => {
const xml = '<text>&lt;hello&gt; &amp; &quot;world&quot;</text>'; const xml = '<text>&lt;hello&gt; &amp; &quot;world&quot;</text>';
const result = fromXml(xml); const result = fromXml(xml);
expect(result.children[0]).toBe('<hello> & "world"'); expect(result.text).toBe('<hello> & "world"');
}); });
it('should parse complex nested structure', () => { it('should parse complex nested structure', () => {
@@ -82,10 +91,37 @@ describe('XML Parser', () => {
</root> </root>
`; `;
const result = fromXml(xml); const result = fromXml(xml);
expect(result.tag).toBe('root'); expect(result).toEqual({
expect(result.children[0].tag).toBe('user'); root: {
expect(result.children[0].attributes.name).toBe('someone'); user: {
expect(result.children[0].children.length).toBe(2); email: 'someone@example.com',
active: ''
}
}
});
});
it('should parse RSS-like structure with multiple items', () => {
const xml = `
<rss>
<channel>
<title>Test Feed</title>
<item>
<title>Item 1</title>
<link>http://example.com/1</link>
</item>
<item>
<title>Item 2</title>
<link>http://example.com/2</link>
</item>
</channel>
</rss>
`;
const result = fromXml(xml);
expect(result.rss.channel.title).toBe('Test Feed');
expect(Array.isArray(result.rss.channel.item)).toBe(true);
expect(result.rss.channel.item.length).toBe(2);
expect(result.rss.channel.item[0].title).toBe('Item 1');
}); });
}); });
@@ -154,7 +190,7 @@ describe('XML Parser', () => {
}); });
describe('round-trip', () => { describe('round-trip', () => {
it('should encode and decode to same structure', () => { it('should parse toXml output back to fast-xml-parser format', () => {
const obj = { const obj = {
tag: 'root', tag: 'root',
attributes: { id: '1' }, attributes: { id: '1' },
@@ -164,7 +200,11 @@ describe('XML Parser', () => {
}; };
const xml = toXml(obj); const xml = toXml(obj);
const parsed = fromXml(xml); const parsed = fromXml(xml);
expect(parsed).toEqual(obj); expect(parsed).toEqual({
root: {
child: 'text'
}
});
}); });
}); });
}); });