Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e8f81bb584 | |||
| 4179b4010a | |||
| 15ac52b6a0 | |||
| c778f3d280 |
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@ztimson/utils",
|
"name": "@ztimson/utils",
|
||||||
"version": "0.28.17",
|
"version": "0.29.1",
|
||||||
"description": "Utility library",
|
"description": "Utility library",
|
||||||
"author": "Zak Timson",
|
"author": "Zak Timson",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
|||||||
68
src/xml.ts
68
src/xml.ts
@@ -3,6 +3,11 @@
|
|||||||
* @param {string} xml - The XML string to parse
|
* @param {string} xml - The XML string to parse
|
||||||
* @returns {Object} An object with `tag`, `attributes`, and `children` properties
|
* @returns {Object} An object with `tag`, `attributes`, and `children` properties
|
||||||
*/
|
*/
|
||||||
|
/**
|
||||||
|
* Parses an XML string into a structured JavaScript object (fast-xml-parser format).
|
||||||
|
* @param {string} xml - The XML string to parse
|
||||||
|
* @returns {Object} An object with tag names as keys and text content or nested objects as values
|
||||||
|
*/
|
||||||
export function fromXml(xml: string) {
|
export function fromXml(xml: string) {
|
||||||
xml = xml.trim();
|
xml = xml.trim();
|
||||||
let pos = 0;
|
let pos = 0;
|
||||||
@@ -13,8 +18,8 @@ export function fromXml(xml: string) {
|
|||||||
pos++; // skip <
|
pos++; // skip <
|
||||||
|
|
||||||
if(xml[pos] === '?') {
|
if(xml[pos] === '?') {
|
||||||
parseDeclaration();
|
const declaration = parseDeclaration();
|
||||||
return parseNode();
|
return { ['?' + declaration]: '', ...parseNode() };
|
||||||
}
|
}
|
||||||
|
|
||||||
if(xml[pos] === '!') {
|
if(xml[pos] === '!') {
|
||||||
@@ -28,11 +33,13 @@ export function fromXml(xml: string) {
|
|||||||
|
|
||||||
if(xml[pos] === '/' && xml[pos + 1] === '>') {
|
if(xml[pos] === '/' && xml[pos + 1] === '>') {
|
||||||
pos += 2; // skip />
|
pos += 2; // skip />
|
||||||
return { tag: tagName, attributes, children: [] };
|
return { [tagName]: '' };
|
||||||
}
|
}
|
||||||
|
|
||||||
pos++; // skip >
|
pos++; // skip >
|
||||||
const children = [];
|
const children: any[] = [];
|
||||||
|
let textContent = '';
|
||||||
|
|
||||||
while(pos < xml.length) {
|
while(pos < xml.length) {
|
||||||
skipWhitespace();
|
skipWhitespace();
|
||||||
if(xml[pos] === '<' && xml[pos + 1] === '/') {
|
if(xml[pos] === '<' && xml[pos + 1] === '/') {
|
||||||
@@ -42,20 +49,51 @@ export function fromXml(xml: string) {
|
|||||||
pos++; // skip >
|
pos++; // skip >
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
const startPos = pos;
|
||||||
const child = parseNode();
|
const child = parseNode();
|
||||||
if(child) children.push(child);
|
if(typeof child === 'string') {
|
||||||
|
textContent += child;
|
||||||
|
} else if(child) {
|
||||||
|
children.push(child);
|
||||||
}
|
}
|
||||||
return { tag: tagName, attributes, children };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Parses and returns the tag name at the current position */
|
// If only text content, return simple value
|
||||||
|
if(children.length === 0 && textContent) {
|
||||||
|
const value = isNumeric(textContent) ? Number(textContent) : textContent;
|
||||||
|
return { [tagName]: value };
|
||||||
|
}
|
||||||
|
|
||||||
|
// If only text with no children
|
||||||
|
if(children.length === 0) {
|
||||||
|
return { [tagName]: '' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge children into object
|
||||||
|
const result: any = {};
|
||||||
|
for(const child of children) {
|
||||||
|
for(const [key, value] of Object.entries(child)) {
|
||||||
|
if(result[key]) {
|
||||||
|
// Convert to array if duplicate tags
|
||||||
|
if(!Array.isArray(result[key])) {
|
||||||
|
result[key] = [result[key]];
|
||||||
|
}
|
||||||
|
result[key].push(value);
|
||||||
|
} else {
|
||||||
|
result[key] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { [tagName]: result };
|
||||||
|
}
|
||||||
|
|
||||||
function parseTagName() {
|
function parseTagName() {
|
||||||
let name = '';
|
let name = '';
|
||||||
while (pos < xml.length && /[a-zA-Z0-9_:-]/.test(xml[pos])) name += xml[pos++];
|
while (pos < xml.length && /[a-zA-Z0-9_:-]/.test(xml[pos])) name += xml[pos++];
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Parses and returns an object containing all attributes at the current position */
|
|
||||||
function parseAttributes() {
|
function parseAttributes() {
|
||||||
const attrs: any = {};
|
const attrs: any = {};
|
||||||
while (pos < xml.length) {
|
while (pos < xml.length) {
|
||||||
@@ -76,7 +114,6 @@ export function fromXml(xml: string) {
|
|||||||
return attrs;
|
return attrs;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Parses and returns text content, or null if empty */
|
|
||||||
function parseText() {
|
function parseText() {
|
||||||
let text = '';
|
let text = '';
|
||||||
while (pos < xml.length && xml[pos] !== '<') text += xml[pos++];
|
while (pos < xml.length && xml[pos] !== '<') text += xml[pos++];
|
||||||
@@ -84,23 +121,30 @@ export function fromXml(xml: string) {
|
|||||||
return text ? escapeXml(text, true) : null;
|
return text ? escapeXml(text, true) : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Skips over XML declaration (<?xml ... ?>) */
|
|
||||||
function parseDeclaration() {
|
function parseDeclaration() {
|
||||||
|
pos++; // skip ?
|
||||||
|
let name = '';
|
||||||
|
while (pos < xml.length && xml[pos] !== ' ' && xml[pos] !== '?') {
|
||||||
|
name += xml[pos++];
|
||||||
|
}
|
||||||
while (xml[pos] !== '>') pos++;
|
while (xml[pos] !== '>') pos++;
|
||||||
pos++;
|
pos++;
|
||||||
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Skips over XML comments (<!-- ... -->) */
|
|
||||||
function parseComment() {
|
function parseComment() {
|
||||||
while (!(xml[pos] === '-' && xml[pos + 1] === '-' && xml[pos + 2] === '>')) pos++;
|
while (!(xml[pos] === '-' && xml[pos + 1] === '-' && xml[pos + 2] === '>')) pos++;
|
||||||
pos += 3;
|
pos += 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Advances position past any whitespace characters */
|
|
||||||
function skipWhitespace() {
|
function skipWhitespace() {
|
||||||
while (pos < xml.length && /\s/.test(xml[pos])) pos++;
|
while (pos < xml.length && /\s/.test(xml[pos])) pos++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function isNumeric(str: string) {
|
||||||
|
return !isNaN(Number(str)) && !isNaN(parseFloat(str)) && str.trim() !== '';
|
||||||
|
}
|
||||||
|
|
||||||
return parseNode();
|
return parseNode();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -5,71 +5,80 @@ describe('XML Parser', () => {
|
|||||||
it('should parse simple tag', () => {
|
it('should parse simple tag', () => {
|
||||||
const xml = '<root></root>';
|
const xml = '<root></root>';
|
||||||
const result = fromXml(xml);
|
const result = fromXml(xml);
|
||||||
expect(result).toEqual({ tag: 'root', attributes: {}, children: [] });
|
expect(result).toEqual({ root: '' });
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should parse self-closing tag', () => {
|
it('should parse self-closing tag', () => {
|
||||||
const xml = '<item />';
|
const xml = '<item />';
|
||||||
const result = fromXml(xml);
|
const result = fromXml(xml);
|
||||||
expect(result).toEqual({ tag: 'item', attributes: {}, children: [] });
|
expect(result).toEqual({ item: '' });
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should parse tag with attributes', () => {
|
it('should parse tag with attributes (ignored in fast-xml-parser format)', () => {
|
||||||
const xml = '<user id="1" name="someone" />';
|
const xml = '<user id="1" name="someone" />';
|
||||||
const result = fromXml(xml);
|
const result = fromXml(xml);
|
||||||
expect(result).toEqual({
|
expect(result).toEqual({ user: '' });
|
||||||
tag: 'user',
|
|
||||||
attributes: { id: '1', name: 'someone' },
|
|
||||||
children: []
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should parse tag with text content', () => {
|
it('should parse tag with text content', () => {
|
||||||
const xml = '<email>someone@example.com</email>';
|
const xml = '<email>someone@example.com</email>';
|
||||||
const result = fromXml(xml);
|
const result = fromXml(xml);
|
||||||
expect(result).toEqual({
|
expect(result).toEqual({ email: 'someone@example.com' });
|
||||||
tag: 'email',
|
|
||||||
attributes: {},
|
|
||||||
children: ['someone@example.com']
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should parse tag with numeric content', () => {
|
||||||
|
const xml = '<ttl>240</ttl>';
|
||||||
|
const result = fromXml(xml);
|
||||||
|
expect(result).toEqual({ ttl: 240 });
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should parse nested tags', () => {
|
it('should parse nested tags', () => {
|
||||||
const xml = '<root><child>text</child></root>';
|
const xml = '<root><child>text</child></root>';
|
||||||
const result = fromXml(xml);
|
const result = fromXml(xml);
|
||||||
expect(result).toEqual({
|
expect(result).toEqual({
|
||||||
tag: 'root',
|
root: {
|
||||||
attributes: {},
|
child: 'text'
|
||||||
children: [
|
}
|
||||||
{ tag: 'child', attributes: {}, children: ['text'] }
|
|
||||||
]
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should parse multiple children', () => {
|
it('should parse multiple children with same tag as array', () => {
|
||||||
const xml = '<root><a /><b /><c /></root>';
|
const xml = '<root><item>a</item><item>b</item><item>c</item></root>';
|
||||||
const result = fromXml(xml);
|
const result = fromXml(xml);
|
||||||
expect(result.children.length).toBe(3);
|
expect(result).toEqual({
|
||||||
expect(result.children[0]).toEqual({ tag: 'a', attributes: {}, children: [] });
|
root: {
|
||||||
|
item: ['a', 'b', 'c']
|
||||||
|
}
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should skip XML declaration', () => {
|
it('should parse mixed children', () => {
|
||||||
|
const xml = '<root><a>1</a><b>2</b><c>3</c></root>';
|
||||||
|
const result = fromXml(xml);
|
||||||
|
expect(result.root).toEqual({ a: 1, b: 2, c: 3 });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should skip XML declaration and include as key', () => {
|
||||||
const xml = '<?xml version="1.0"?><root />';
|
const xml = '<?xml version="1.0"?><root />';
|
||||||
const result = fromXml(xml);
|
const result = fromXml(xml);
|
||||||
expect(result.tag).toBe('root');
|
expect(result).toHaveProperty('?xml');
|
||||||
|
expect(result).toHaveProperty('root');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should skip comments', () => {
|
it('should skip comments', () => {
|
||||||
const xml = '<root><!-- comment --><child /></root>';
|
const xml = '<root><!-- comment --><child>text</child></root>';
|
||||||
const result = fromXml(xml);
|
const result = fromXml(xml);
|
||||||
expect(result.children.length).toBe(1);
|
expect(result).toEqual({
|
||||||
expect(result.children[0].tag).toBe('child');
|
root: {
|
||||||
|
child: 'text'
|
||||||
|
}
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should handle escaped characters', () => {
|
it('should handle escaped characters', () => {
|
||||||
const xml = '<text><hello> & "world"</text>';
|
const xml = '<text><hello> & "world"</text>';
|
||||||
const result = fromXml(xml);
|
const result = fromXml(xml);
|
||||||
expect(result.children[0]).toBe('<hello> & "world"');
|
expect(result.text).toBe('<hello> & "world"');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should parse complex nested structure', () => {
|
it('should parse complex nested structure', () => {
|
||||||
@@ -82,10 +91,37 @@ describe('XML Parser', () => {
|
|||||||
</root>
|
</root>
|
||||||
`;
|
`;
|
||||||
const result = fromXml(xml);
|
const result = fromXml(xml);
|
||||||
expect(result.tag).toBe('root');
|
expect(result).toEqual({
|
||||||
expect(result.children[0].tag).toBe('user');
|
root: {
|
||||||
expect(result.children[0].attributes.name).toBe('someone');
|
user: {
|
||||||
expect(result.children[0].children.length).toBe(2);
|
email: 'someone@example.com',
|
||||||
|
active: ''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should parse RSS-like structure with multiple items', () => {
|
||||||
|
const xml = `
|
||||||
|
<rss>
|
||||||
|
<channel>
|
||||||
|
<title>Test Feed</title>
|
||||||
|
<item>
|
||||||
|
<title>Item 1</title>
|
||||||
|
<link>http://example.com/1</link>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<title>Item 2</title>
|
||||||
|
<link>http://example.com/2</link>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
||||||
|
`;
|
||||||
|
const result = fromXml(xml);
|
||||||
|
expect(result.rss.channel.title).toBe('Test Feed');
|
||||||
|
expect(Array.isArray(result.rss.channel.item)).toBe(true);
|
||||||
|
expect(result.rss.channel.item.length).toBe(2);
|
||||||
|
expect(result.rss.channel.item[0].title).toBe('Item 1');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -154,7 +190,7 @@ describe('XML Parser', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
describe('round-trip', () => {
|
describe('round-trip', () => {
|
||||||
it('should encode and decode to same structure', () => {
|
it('should parse toXml output back to fast-xml-parser format', () => {
|
||||||
const obj = {
|
const obj = {
|
||||||
tag: 'root',
|
tag: 'root',
|
||||||
attributes: { id: '1' },
|
attributes: { id: '1' },
|
||||||
@@ -164,7 +200,11 @@ describe('XML Parser', () => {
|
|||||||
};
|
};
|
||||||
const xml = toXml(obj);
|
const xml = toXml(obj);
|
||||||
const parsed = fromXml(xml);
|
const parsed = fromXml(xml);
|
||||||
expect(parsed).toEqual(obj);
|
expect(parsed).toEqual({
|
||||||
|
root: {
|
||||||
|
child: 'text'
|
||||||
|
}
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user