Proper xml to json format
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@ztimson/utils",
|
||||
"version": "0.29.0",
|
||||
"version": "0.29.1",
|
||||
"description": "Utility library",
|
||||
"author": "Zak Timson",
|
||||
"license": "MIT",
|
||||
|
||||
68
src/xml.ts
68
src/xml.ts
@@ -3,6 +3,11 @@
|
||||
* @param {string} xml - The XML string to parse
|
||||
* @returns {Object} An object with `tag`, `attributes`, and `children` properties
|
||||
*/
|
||||
/**
|
||||
* Parses an XML string into a structured JavaScript object (fast-xml-parser format).
|
||||
* @param {string} xml - The XML string to parse
|
||||
* @returns {Object} An object with tag names as keys and text content or nested objects as values
|
||||
*/
|
||||
export function fromXml(xml: string) {
|
||||
xml = xml.trim();
|
||||
let pos = 0;
|
||||
@@ -13,8 +18,8 @@ export function fromXml(xml: string) {
|
||||
pos++; // skip <
|
||||
|
||||
if(xml[pos] === '?') {
|
||||
parseDeclaration();
|
||||
return parseNode();
|
||||
const declaration = parseDeclaration();
|
||||
return { ['?' + declaration]: '', ...parseNode() };
|
||||
}
|
||||
|
||||
if(xml[pos] === '!') {
|
||||
@@ -28,11 +33,13 @@ export function fromXml(xml: string) {
|
||||
|
||||
if(xml[pos] === '/' && xml[pos + 1] === '>') {
|
||||
pos += 2; // skip />
|
||||
return { tag: tagName, attributes, children: [] };
|
||||
return { [tagName]: '' };
|
||||
}
|
||||
|
||||
pos++; // skip >
|
||||
const children = [];
|
||||
const children: any[] = [];
|
||||
let textContent = '';
|
||||
|
||||
while(pos < xml.length) {
|
||||
skipWhitespace();
|
||||
if(xml[pos] === '<' && xml[pos + 1] === '/') {
|
||||
@@ -42,20 +49,51 @@ export function fromXml(xml: string) {
|
||||
pos++; // skip >
|
||||
break;
|
||||
}
|
||||
const startPos = pos;
|
||||
const child = parseNode();
|
||||
if(child) children.push(child);
|
||||
if(typeof child === 'string') {
|
||||
textContent += child;
|
||||
} else if(child) {
|
||||
children.push(child);
|
||||
}
|
||||
return { tag: tagName, attributes, children };
|
||||
}
|
||||
|
||||
/** Parses and returns the tag name at the current position */
|
||||
// If only text content, return simple value
|
||||
if(children.length === 0 && textContent) {
|
||||
const value = isNumeric(textContent) ? Number(textContent) : textContent;
|
||||
return { [tagName]: value };
|
||||
}
|
||||
|
||||
// If only text with no children
|
||||
if(children.length === 0) {
|
||||
return { [tagName]: '' };
|
||||
}
|
||||
|
||||
// Merge children into object
|
||||
const result: any = {};
|
||||
for(const child of children) {
|
||||
for(const [key, value] of Object.entries(child)) {
|
||||
if(result[key]) {
|
||||
// Convert to array if duplicate tags
|
||||
if(!Array.isArray(result[key])) {
|
||||
result[key] = [result[key]];
|
||||
}
|
||||
result[key].push(value);
|
||||
} else {
|
||||
result[key] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { [tagName]: result };
|
||||
}
|
||||
|
||||
function parseTagName() {
|
||||
let name = '';
|
||||
while (pos < xml.length && /[a-zA-Z0-9_:-]/.test(xml[pos])) name += xml[pos++];
|
||||
return name;
|
||||
}
|
||||
|
||||
/** Parses and returns an object containing all attributes at the current position */
|
||||
function parseAttributes() {
|
||||
const attrs: any = {};
|
||||
while (pos < xml.length) {
|
||||
@@ -76,7 +114,6 @@ export function fromXml(xml: string) {
|
||||
return attrs;
|
||||
}
|
||||
|
||||
/** Parses and returns text content, or null if empty */
|
||||
function parseText() {
|
||||
let text = '';
|
||||
while (pos < xml.length && xml[pos] !== '<') text += xml[pos++];
|
||||
@@ -84,23 +121,30 @@ export function fromXml(xml: string) {
|
||||
return text ? escapeXml(text, true) : null;
|
||||
}
|
||||
|
||||
/** Skips over XML declaration (<?xml ... ?>) */
|
||||
function parseDeclaration() {
|
||||
pos++; // skip ?
|
||||
let name = '';
|
||||
while (pos < xml.length && xml[pos] !== ' ' && xml[pos] !== '?') {
|
||||
name += xml[pos++];
|
||||
}
|
||||
while (xml[pos] !== '>') pos++;
|
||||
pos++;
|
||||
return name;
|
||||
}
|
||||
|
||||
/** Skips over XML comments (<!-- ... -->) */
|
||||
function parseComment() {
|
||||
while (!(xml[pos] === '-' && xml[pos + 1] === '-' && xml[pos + 2] === '>')) pos++;
|
||||
pos += 3;
|
||||
}
|
||||
|
||||
/** Advances position past any whitespace characters */
|
||||
function skipWhitespace() {
|
||||
while (pos < xml.length && /\s/.test(xml[pos])) pos++;
|
||||
}
|
||||
|
||||
function isNumeric(str: string) {
|
||||
return !isNaN(Number(str)) && !isNaN(parseFloat(str)) && str.trim() !== '';
|
||||
}
|
||||
|
||||
return parseNode();
|
||||
}
|
||||
|
||||
|
||||
@@ -5,71 +5,80 @@ describe('XML Parser', () => {
|
||||
it('should parse simple tag', () => {
|
||||
const xml = '<root></root>';
|
||||
const result = fromXml(xml);
|
||||
expect(result).toEqual({ tag: 'root', attributes: {}, children: [] });
|
||||
expect(result).toEqual({ root: '' });
|
||||
});
|
||||
|
||||
it('should parse self-closing tag', () => {
|
||||
const xml = '<item />';
|
||||
const result = fromXml(xml);
|
||||
expect(result).toEqual({ tag: 'item', attributes: {}, children: [] });
|
||||
expect(result).toEqual({ item: '' });
|
||||
});
|
||||
|
||||
it('should parse tag with attributes', () => {
|
||||
it('should parse tag with attributes (ignored in fast-xml-parser format)', () => {
|
||||
const xml = '<user id="1" name="someone" />';
|
||||
const result = fromXml(xml);
|
||||
expect(result).toEqual({
|
||||
tag: 'user',
|
||||
attributes: { id: '1', name: 'someone' },
|
||||
children: []
|
||||
});
|
||||
expect(result).toEqual({ user: '' });
|
||||
});
|
||||
|
||||
it('should parse tag with text content', () => {
|
||||
const xml = '<email>someone@example.com</email>';
|
||||
const result = fromXml(xml);
|
||||
expect(result).toEqual({
|
||||
tag: 'email',
|
||||
attributes: {},
|
||||
children: ['someone@example.com']
|
||||
expect(result).toEqual({ email: 'someone@example.com' });
|
||||
});
|
||||
|
||||
it('should parse tag with numeric content', () => {
|
||||
const xml = '<ttl>240</ttl>';
|
||||
const result = fromXml(xml);
|
||||
expect(result).toEqual({ ttl: 240 });
|
||||
});
|
||||
|
||||
it('should parse nested tags', () => {
|
||||
const xml = '<root><child>text</child></root>';
|
||||
const result = fromXml(xml);
|
||||
expect(result).toEqual({
|
||||
tag: 'root',
|
||||
attributes: {},
|
||||
children: [
|
||||
{ tag: 'child', attributes: {}, children: ['text'] }
|
||||
]
|
||||
root: {
|
||||
child: 'text'
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse multiple children', () => {
|
||||
const xml = '<root><a /><b /><c /></root>';
|
||||
it('should parse multiple children with same tag as array', () => {
|
||||
const xml = '<root><item>a</item><item>b</item><item>c</item></root>';
|
||||
const result = fromXml(xml);
|
||||
expect(result.children.length).toBe(3);
|
||||
expect(result.children[0]).toEqual({ tag: 'a', attributes: {}, children: [] });
|
||||
expect(result).toEqual({
|
||||
root: {
|
||||
item: ['a', 'b', 'c']
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
it('should skip XML declaration', () => {
|
||||
it('should parse mixed children', () => {
|
||||
const xml = '<root><a>1</a><b>2</b><c>3</c></root>';
|
||||
const result = fromXml(xml);
|
||||
expect(result.root).toEqual({ a: 1, b: 2, c: 3 });
|
||||
});
|
||||
|
||||
it('should skip XML declaration and include as key', () => {
|
||||
const xml = '<?xml version="1.0"?><root />';
|
||||
const result = fromXml(xml);
|
||||
expect(result.tag).toBe('root');
|
||||
expect(result).toHaveProperty('?xml');
|
||||
expect(result).toHaveProperty('root');
|
||||
});
|
||||
|
||||
it('should skip comments', () => {
|
||||
const xml = '<root><!-- comment --><child /></root>';
|
||||
const xml = '<root><!-- comment --><child>text</child></root>';
|
||||
const result = fromXml(xml);
|
||||
expect(result.children.length).toBe(1);
|
||||
expect(result.children[0].tag).toBe('child');
|
||||
expect(result).toEqual({
|
||||
root: {
|
||||
child: 'text'
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle escaped characters', () => {
|
||||
const xml = '<text><hello> & "world"</text>';
|
||||
const result = fromXml(xml);
|
||||
expect(result.children[0]).toBe('<hello> & "world"');
|
||||
expect(result.text).toBe('<hello> & "world"');
|
||||
});
|
||||
|
||||
it('should parse complex nested structure', () => {
|
||||
@@ -82,10 +91,37 @@ describe('XML Parser', () => {
|
||||
</root>
|
||||
`;
|
||||
const result = fromXml(xml);
|
||||
expect(result.tag).toBe('root');
|
||||
expect(result.children[0].tag).toBe('user');
|
||||
expect(result.children[0].attributes.name).toBe('someone');
|
||||
expect(result.children[0].children.length).toBe(2);
|
||||
expect(result).toEqual({
|
||||
root: {
|
||||
user: {
|
||||
email: 'someone@example.com',
|
||||
active: ''
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse RSS-like structure with multiple items', () => {
|
||||
const xml = `
|
||||
<rss>
|
||||
<channel>
|
||||
<title>Test Feed</title>
|
||||
<item>
|
||||
<title>Item 1</title>
|
||||
<link>http://example.com/1</link>
|
||||
</item>
|
||||
<item>
|
||||
<title>Item 2</title>
|
||||
<link>http://example.com/2</link>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
`;
|
||||
const result = fromXml(xml);
|
||||
expect(result.rss.channel.title).toBe('Test Feed');
|
||||
expect(Array.isArray(result.rss.channel.item)).toBe(true);
|
||||
expect(result.rss.channel.item.length).toBe(2);
|
||||
expect(result.rss.channel.item[0].title).toBe('Item 1');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -154,7 +190,7 @@ describe('XML Parser', () => {
|
||||
});
|
||||
|
||||
describe('round-trip', () => {
|
||||
it('should encode and decode to same structure', () => {
|
||||
it('should parse toXml output back to fast-xml-parser format', () => {
|
||||
const obj = {
|
||||
tag: 'root',
|
||||
attributes: { id: '1' },
|
||||
@@ -164,7 +200,11 @@ describe('XML Parser', () => {
|
||||
};
|
||||
const xml = toXml(obj);
|
||||
const parsed = fromXml(xml);
|
||||
expect(parsed).toEqual(obj);
|
||||
expect(parsed).toEqual({
|
||||
root: {
|
||||
child: 'text'
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user