utils/node_modules/minimatch/dist/mjs/ast.js

585 lines
22 KiB
JavaScript
Raw Normal View History

2024-02-07 01:33:07 -05:00
// parse a single path portion
import { parseClass } from './brace-expressions.js';
import { unescape } from './unescape.js';
const types = new Set(['!', '?', '+', '*', '@']);
const isExtglobType = (c) => types.has(c);
// Patterns that get prepended to bind to the start of either the
// entire string, or just a single path portion, to prevent dots
// and/or traversal patterns, when needed.
// Exts don't need the ^ or / bit, because the root binds that already.
const startNoTraversal = '(?!(?:^|/)\\.\\.?(?:$|/))';
const startNoDot = '(?!\\.)';
// characters that indicate a start of pattern needs the "no dots" bit,
// because a dot *might* be matched. ( is not in the list, because in
// the case of a child extglob, it will handle the prevention itself.
const addPatternStart = new Set(['[', '.']);
// cases where traversal is A-OK, no dot prevention needed
const justDots = new Set(['..', '.']);
const reSpecials = new Set('().*{}+?[]^$\\!');
const regExpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
// any single thing other than /
const qmark = '[^/]';
// * => any number of characters
const star = qmark + '*?';
// use + when we need to ensure that *something* matches, because the * is
// the only thing in the path portion.
const starNoEmpty = qmark + '+?';
// remove the \ chars that we added if we end up doing a nonmagic compare
// const deslash = (s: string) => s.replace(/\\(.)/g, '$1')
export class AST {
type;
#root;
#hasMagic;
#uflag = false;
#parts = [];
#parent;
#parentIndex;
#negs;
#filledNegs = false;
#options;
#toString;
// set to true if it's an extglob with no children
// (which really means one child of '')
#emptyExt = false;
constructor(type, parent, options = {}) {
this.type = type;
// extglobs are inherently magical
if (type)
this.#hasMagic = true;
this.#parent = parent;
this.#root = this.#parent ? this.#parent.#root : this;
this.#options = this.#root === this ? options : this.#root.#options;
this.#negs = this.#root === this ? [] : this.#root.#negs;
if (type === '!' && !this.#root.#filledNegs)
this.#negs.push(this);
this.#parentIndex = this.#parent ? this.#parent.#parts.length : 0;
}
get hasMagic() {
/* c8 ignore start */
if (this.#hasMagic !== undefined)
return this.#hasMagic;
/* c8 ignore stop */
for (const p of this.#parts) {
if (typeof p === 'string')
continue;
if (p.type || p.hasMagic)
return (this.#hasMagic = true);
}
// note: will be undefined until we generate the regexp src and find out
return this.#hasMagic;
}
// reconstructs the pattern
toString() {
if (this.#toString !== undefined)
return this.#toString;
if (!this.type) {
return (this.#toString = this.#parts.map(p => String(p)).join(''));
}
else {
return (this.#toString =
this.type + '(' + this.#parts.map(p => String(p)).join('|') + ')');
}
}
#fillNegs() {
/* c8 ignore start */
if (this !== this.#root)
throw new Error('should only call on root');
if (this.#filledNegs)
return this;
/* c8 ignore stop */
// call toString() once to fill this out
this.toString();
this.#filledNegs = true;
let n;
while ((n = this.#negs.pop())) {
if (n.type !== '!')
continue;
// walk up the tree, appending everthing that comes AFTER parentIndex
let p = n;
let pp = p.#parent;
while (pp) {
for (let i = p.#parentIndex + 1; !pp.type && i < pp.#parts.length; i++) {
for (const part of n.#parts) {
/* c8 ignore start */
if (typeof part === 'string') {
throw new Error('string part in extglob AST??');
}
/* c8 ignore stop */
part.copyIn(pp.#parts[i]);
}
}
p = pp;
pp = p.#parent;
}
}
return this;
}
push(...parts) {
for (const p of parts) {
if (p === '')
continue;
/* c8 ignore start */
if (typeof p !== 'string' && !(p instanceof AST && p.#parent === this)) {
throw new Error('invalid part: ' + p);
}
/* c8 ignore stop */
this.#parts.push(p);
}
}
toJSON() {
const ret = this.type === null
? this.#parts.slice().map(p => (typeof p === 'string' ? p : p.toJSON()))
: [this.type, ...this.#parts.map(p => p.toJSON())];
if (this.isStart() && !this.type)
ret.unshift([]);
if (this.isEnd() &&
(this === this.#root ||
(this.#root.#filledNegs && this.#parent?.type === '!'))) {
ret.push({});
}
return ret;
}
isStart() {
if (this.#root === this)
return true;
// if (this.type) return !!this.#parent?.isStart()
if (!this.#parent?.isStart())
return false;
if (this.#parentIndex === 0)
return true;
// if everything AHEAD of this is a negation, then it's still the "start"
const p = this.#parent;
for (let i = 0; i < this.#parentIndex; i++) {
const pp = p.#parts[i];
if (!(pp instanceof AST && pp.type === '!')) {
return false;
}
}
return true;
}
isEnd() {
if (this.#root === this)
return true;
if (this.#parent?.type === '!')
return true;
if (!this.#parent?.isEnd())
return false;
if (!this.type)
return this.#parent?.isEnd();
// if not root, it'll always have a parent
/* c8 ignore start */
const pl = this.#parent ? this.#parent.#parts.length : 0;
/* c8 ignore stop */
return this.#parentIndex === pl - 1;
}
copyIn(part) {
if (typeof part === 'string')
this.push(part);
else
this.push(part.clone(this));
}
clone(parent) {
const c = new AST(this.type, parent);
for (const p of this.#parts) {
c.copyIn(p);
}
return c;
}
static #parseAST(str, ast, pos, opt) {
let escaping = false;
let inBrace = false;
let braceStart = -1;
let braceNeg = false;
if (ast.type === null) {
// outside of a extglob, append until we find a start
let i = pos;
let acc = '';
while (i < str.length) {
const c = str.charAt(i++);
// still accumulate escapes at this point, but we do ignore
// starts that are escaped
if (escaping || c === '\\') {
escaping = !escaping;
acc += c;
continue;
}
if (inBrace) {
if (i === braceStart + 1) {
if (c === '^' || c === '!') {
braceNeg = true;
}
}
else if (c === ']' && !(i === braceStart + 2 && braceNeg)) {
inBrace = false;
}
acc += c;
continue;
}
else if (c === '[') {
inBrace = true;
braceStart = i;
braceNeg = false;
acc += c;
continue;
}
if (!opt.noext && isExtglobType(c) && str.charAt(i) === '(') {
ast.push(acc);
acc = '';
const ext = new AST(c, ast);
i = AST.#parseAST(str, ext, i, opt);
ast.push(ext);
continue;
}
acc += c;
}
ast.push(acc);
return i;
}
// some kind of extglob, pos is at the (
// find the next | or )
let i = pos + 1;
let part = new AST(null, ast);
const parts = [];
let acc = '';
while (i < str.length) {
const c = str.charAt(i++);
// still accumulate escapes at this point, but we do ignore
// starts that are escaped
if (escaping || c === '\\') {
escaping = !escaping;
acc += c;
continue;
}
if (inBrace) {
if (i === braceStart + 1) {
if (c === '^' || c === '!') {
braceNeg = true;
}
}
else if (c === ']' && !(i === braceStart + 2 && braceNeg)) {
inBrace = false;
}
acc += c;
continue;
}
else if (c === '[') {
inBrace = true;
braceStart = i;
braceNeg = false;
acc += c;
continue;
}
if (isExtglobType(c) && str.charAt(i) === '(') {
part.push(acc);
acc = '';
const ext = new AST(c, part);
part.push(ext);
i = AST.#parseAST(str, ext, i, opt);
continue;
}
if (c === '|') {
part.push(acc);
acc = '';
parts.push(part);
part = new AST(null, ast);
continue;
}
if (c === ')') {
if (acc === '' && ast.#parts.length === 0) {
ast.#emptyExt = true;
}
part.push(acc);
acc = '';
ast.push(...parts, part);
return i;
}
acc += c;
}
// unfinished extglob
// if we got here, it was a malformed extglob! not an extglob, but
// maybe something else in there.
ast.type = null;
ast.#hasMagic = undefined;
ast.#parts = [str.substring(pos - 1)];
return i;
}
static fromGlob(pattern, options = {}) {
const ast = new AST(null, undefined, options);
AST.#parseAST(pattern, ast, 0, options);
return ast;
}
// returns the regular expression if there's magic, or the unescaped
// string if not.
toMMPattern() {
// should only be called on root
/* c8 ignore start */
if (this !== this.#root)
return this.#root.toMMPattern();
/* c8 ignore stop */
const glob = this.toString();
const [re, body, hasMagic, uflag] = this.toRegExpSource();
// if we're in nocase mode, and not nocaseMagicOnly, then we do
// still need a regular expression if we have to case-insensitively
// match capital/lowercase characters.
const anyMagic = hasMagic ||
this.#hasMagic ||
(this.#options.nocase &&
!this.#options.nocaseMagicOnly &&
glob.toUpperCase() !== glob.toLowerCase());
if (!anyMagic) {
return body;
}
const flags = (this.#options.nocase ? 'i' : '') + (uflag ? 'u' : '');
return Object.assign(new RegExp(`^${re}$`, flags), {
_src: re,
_glob: glob,
});
}
// returns the string match, the regexp source, whether there's magic
// in the regexp (so a regular expression is required) and whether or
// not the uflag is needed for the regular expression (for posix classes)
// TODO: instead of injecting the start/end at this point, just return
// the BODY of the regexp, along with the start/end portions suitable
// for binding the start/end in either a joined full-path makeRe context
// (where we bind to (^|/), or a standalone matchPart context (where
// we bind to ^, and not /). Otherwise slashes get duped!
//
// In part-matching mode, the start is:
// - if not isStart: nothing
// - if traversal possible, but not allowed: ^(?!\.\.?$)
// - if dots allowed or not possible: ^
// - if dots possible and not allowed: ^(?!\.)
// end is:
// - if not isEnd(): nothing
// - else: $
//
// In full-path matching mode, we put the slash at the START of the
// pattern, so start is:
// - if first pattern: same as part-matching mode
// - if not isStart(): nothing
// - if traversal possible, but not allowed: /(?!\.\.?(?:$|/))
// - if dots allowed or not possible: /
// - if dots possible and not allowed: /(?!\.)
// end is:
// - if last pattern, same as part-matching mode
// - else nothing
//
// Always put the (?:$|/) on negated tails, though, because that has to be
// there to bind the end of the negated pattern portion, and it's easier to
// just stick it in now rather than try to inject it later in the middle of
// the pattern.
//
// We can just always return the same end, and leave it up to the caller
// to know whether it's going to be used joined or in parts.
// And, if the start is adjusted slightly, can do the same there:
// - if not isStart: nothing
// - if traversal possible, but not allowed: (?:/|^)(?!\.\.?$)
// - if dots allowed or not possible: (?:/|^)
// - if dots possible and not allowed: (?:/|^)(?!\.)
//
// But it's better to have a simpler binding without a conditional, for
// performance, so probably better to return both start options.
//
// Then the caller just ignores the end if it's not the first pattern,
// and the start always gets applied.
//
// But that's always going to be $ if it's the ending pattern, or nothing,
// so the caller can just attach $ at the end of the pattern when building.
//
// So the todo is:
// - better detect what kind of start is needed
// - return both flavors of starting pattern
// - attach $ at the end of the pattern when creating the actual RegExp
//
// Ah, but wait, no, that all only applies to the root when the first pattern
// is not an extglob. If the first pattern IS an extglob, then we need all
// that dot prevention biz to live in the extglob portions, because eg
// +(*|.x*) can match .xy but not .yx.
//
// So, return the two flavors if it's #root and the first child is not an
// AST, otherwise leave it to the child AST to handle it, and there,
// use the (?:^|/) style of start binding.
//
// Even simplified further:
// - Since the start for a join is eg /(?!\.) and the start for a part
// is ^(?!\.), we can just prepend (?!\.) to the pattern (either root
// or start or whatever) and prepend ^ or / at the Regexp construction.
toRegExpSource(allowDot) {
const dot = allowDot ?? !!this.#options.dot;
if (this.#root === this)
this.#fillNegs();
if (!this.type) {
const noEmpty = this.isStart() && this.isEnd();
const src = this.#parts
.map(p => {
const [re, _, hasMagic, uflag] = typeof p === 'string'
? AST.#parseGlob(p, this.#hasMagic, noEmpty)
: p.toRegExpSource(allowDot);
this.#hasMagic = this.#hasMagic || hasMagic;
this.#uflag = this.#uflag || uflag;
return re;
})
.join('');
let start = '';
if (this.isStart()) {
if (typeof this.#parts[0] === 'string') {
// this is the string that will match the start of the pattern,
// so we need to protect against dots and such.
// '.' and '..' cannot match unless the pattern is that exactly,
// even if it starts with . or dot:true is set.
const dotTravAllowed = this.#parts.length === 1 && justDots.has(this.#parts[0]);
if (!dotTravAllowed) {
const aps = addPatternStart;
// check if we have a possibility of matching . or ..,
// and prevent that.
const needNoTrav =
// dots are allowed, and the pattern starts with [ or .
(dot && aps.has(src.charAt(0))) ||
// the pattern starts with \., and then [ or .
(src.startsWith('\\.') && aps.has(src.charAt(2))) ||
// the pattern starts with \.\., and then [ or .
(src.startsWith('\\.\\.') && aps.has(src.charAt(4)));
// no need to prevent dots if it can't match a dot, or if a
// sub-pattern will be preventing it anyway.
const needNoDot = !dot && !allowDot && aps.has(src.charAt(0));
start = needNoTrav ? startNoTraversal : needNoDot ? startNoDot : '';
}
}
}
// append the "end of path portion" pattern to negation tails
let end = '';
if (this.isEnd() &&
this.#root.#filledNegs &&
this.#parent?.type === '!') {
end = '(?:$|\\/)';
}
const final = start + src + end;
return [
final,
unescape(src),
(this.#hasMagic = !!this.#hasMagic),
this.#uflag,
];
}
// We need to calculate the body *twice* if it's a repeat pattern
// at the start, once in nodot mode, then again in dot mode, so a
// pattern like *(?) can match 'x.y'
const repeated = this.type === '*' || this.type === '+';
// some kind of extglob
const start = this.type === '!' ? '(?:(?!(?:' : '(?:';
let body = this.#partsToRegExp(dot);
if (this.isStart() && this.isEnd() && !body && this.type !== '!') {
// invalid extglob, has to at least be *something* present, if it's
// the entire path portion.
const s = this.toString();
this.#parts = [s];
this.type = null;
this.#hasMagic = undefined;
return [s, unescape(this.toString()), false, false];
}
// XXX abstract out this map method
let bodyDotAllowed = !repeated || allowDot || dot || !startNoDot
? ''
: this.#partsToRegExp(true);
if (bodyDotAllowed === body) {
bodyDotAllowed = '';
}
if (bodyDotAllowed) {
body = `(?:${body})(?:${bodyDotAllowed})*?`;
}
// an empty !() is exactly equivalent to a starNoEmpty
let final = '';
if (this.type === '!' && this.#emptyExt) {
final = (this.isStart() && !dot ? startNoDot : '') + starNoEmpty;
}
else {
const close = this.type === '!'
? // !() must match something,but !(x) can match ''
'))' +
(this.isStart() && !dot && !allowDot ? startNoDot : '') +
star +
')'
: this.type === '@'
? ')'
: this.type === '?'
? ')?'
: this.type === '+' && bodyDotAllowed
? ')'
: this.type === '*' && bodyDotAllowed
? `)?`
: `)${this.type}`;
final = start + body + close;
}
return [
final,
unescape(body),
(this.#hasMagic = !!this.#hasMagic),
this.#uflag,
];
}
#partsToRegExp(dot) {
return this.#parts
.map(p => {
// extglob ASTs should only contain parent ASTs
/* c8 ignore start */
if (typeof p === 'string') {
throw new Error('string type in extglob ast??');
}
/* c8 ignore stop */
// can ignore hasMagic, because extglobs are already always magic
const [re, _, _hasMagic, uflag] = p.toRegExpSource(dot);
this.#uflag = this.#uflag || uflag;
return re;
})
.filter(p => !(this.isStart() && this.isEnd()) || !!p)
.join('|');
}
static #parseGlob(glob, hasMagic, noEmpty = false) {
let escaping = false;
let re = '';
let uflag = false;
for (let i = 0; i < glob.length; i++) {
const c = glob.charAt(i);
if (escaping) {
escaping = false;
re += (reSpecials.has(c) ? '\\' : '') + c;
continue;
}
if (c === '\\') {
if (i === glob.length - 1) {
re += '\\\\';
}
else {
escaping = true;
}
continue;
}
if (c === '[') {
const [src, needUflag, consumed, magic] = parseClass(glob, i);
if (consumed) {
re += src;
uflag = uflag || needUflag;
i += consumed - 1;
hasMagic = hasMagic || magic;
continue;
}
}
if (c === '*') {
if (noEmpty && glob === '*')
re += starNoEmpty;
else
re += star;
hasMagic = true;
continue;
}
if (c === '?') {
re += qmark;
hasMagic = true;
continue;
}
re += regExpEscape(c);
}
return [re, unescape(glob), !!hasMagic, uflag];
}
}
//# sourceMappingURL=ast.js.map