[PATCH] Stricter macro substitution syntax

Panu Matilainen pmatilai at laiskiainen.org
Tue Jan 22 15:16:38 UTC 2013


On 01/21/2013 10:14 PM, Alexey Tourbin wrote:
> This change introduces a separate routine to parse for valid macro
> names.  Valid macro names are either regular 3+ character identifiers,
> or special names: "S", "P", "0", "#", "*", "**", macro options such as
> "-o" and "-o*", and macro arguments such as "1".  Other names are not
> valid.  This fixes a number of bugs seen earlier due to sloppy name
> parsing: "%_libdir*" and "%01" were not expanded (these are now expanded
> to e.g. "/usr/lib64*" and "<name>1", as expected).  This also fixes
> bugs in as-is substitution: "%!foo" was expanded to "%foo", and likewise
> "%!!!" was expanded to "%" (and to "%<garbage>" at EOL).
>
> Also, bad names in %name and %{name...} substitutions are now handled
> differently.  In %name form, the name is parsed tentatively; a silent
> fall-back to as-is substitution is provisioned when no valid name can
> be obtain.  In %{name...} form, a failure to obtain a valid name is now
> a syntax error.  Furthermore, only 3 variants are syntactically valid:
> %{name} proper, %{name:...}, and %{name ...}.  This renders invalid
> ambiguous macro substitutions such as the one found in FC18 lvm2.spec:
>
> Requires: util-linux >= %{util-linux_version}
> error: Invalid macro syntax: %{util-linux_version}

Nice. The garbage spotted by this in Fedora specs is ... scary.

OTOH changes of this scale in the macro parser are a bit scary in 
themselves as its sooooo easy to break some obscure corner-case :) I 
think I'll try to give this a spin with some of the more macro-intensive 
packages like Fedora's kernel and font-packages first, but unless I find 
something unexpected breakage, consider it applied. On a related note, 
the test-suite could really use more thorough and more advanced 
test-cases to make these kind of changes less scary.

BTW, while the rules are obviously different between define vs expansion 
due to the built-in special macros, perhaps parseMacroName() function 
here could be extended to cover the %define/%global/%undefine macro name 
sanity checks as well (see my reply wrt the missing-whitespace patch).

	- Panu -

> ---
>   rpmio/macro.c | 164 +++++++++++++++++++++++++++++++++++++++-------------------
>   1 file changed, 111 insertions(+), 53 deletions(-)
>
> diff --git a/rpmio/macro.c b/rpmio/macro.c
> index 9229580..2abdbd4 100644
> --- a/rpmio/macro.c
> +++ b/rpmio/macro.c
> @@ -1011,6 +1011,93 @@ doFoo(MacroBuf mb, int negate, const char * f, size_t fn,
>   }
>
>   /**
> + * Parse for flags before macro name, such as in %{?!name:subst}.
> + * @param s		macro flags start
> + * @param negate	flipped by each '!' flag
> + * @param chkexist	increased by each '?' flag
> + * @return		macro flags end
> + */
> +static const char *
> +parseMacroFlags(const char *s, int *negate, int *chkexist)
> +{
> +    while (1) {
> +	switch (*s) {
> +	case '!':
> +	    *negate = !*negate;
> +	    s++;
> +	    break;
> +	case '?':
> +	    (*chkexist)++;
> +	    s++;
> +	    break;
> +	default:
> +	    return s;
> +	}
> +    }
> +    /* not reached */
> +    return s;
> +}
> +
> +/**
> + * Parse for valid macro name (during expansion).
> + * @param s		macro name start
> + * @return		macro name end, NULL on error
> + */
> +static const char *
> +parseMacroName(const char *s)
> +{
> +    /* alnum identifiers */
> +    if (risalpha(*s) || *s == '_') {
> +	const char *se = s + 1;
> +	while (risalnum(*se) || *se == '_')
> +	    se++;
> +	switch (se - s) {
> +	case 1:
> +	    /* recheck for [SPF] */
> +	    break;
> +	case 2:
> +	    return NULL;
> +	default:
> +	    return se;
> +	}
> +    }
> +    /* simple special names */
> +    switch (*s) {
> +    case '0':
> +    case '#':
> +    case 'S':
> +    case 'P':
> +    case 'F':
> +	s++;
> +	return s;
> +    case '*':
> +	s++;
> +	if (*s == '*')
> +	    s++;
> +	return s;
> +    /* option names */
> +    case '-':
> +	s++;
> +	if (risalnum(*s))
> +	    s++;
> +	else
> +	    return NULL;
> +	if (*s == '*')
> +	    s++;
> +	return s;
> +    }
> +    /* argument names */
> +    if (risdigit(*s)) {
> +	s++;
> +	while (risdigit(*s))
> +	    s++;
> +	return s;
> +    }
> +    /* invalid macro name */
> +    return NULL;
> +}
> +
> +/**
>    * The main macro recursion loop.
>    * @param mb		macro expansion state
>    * @param src		string to expand
> @@ -1083,34 +1170,14 @@ expandMacro(MacroBuf mb, const char *src, size_t slen)
>   	chkexist = 0;
>   	switch ((c = *s)) {
>   	default:		/* %name substitution */
> -		while (strchr("!?", *s) != NULL) {
> -			switch(*s++) {
> -			case '!':
> -				negate = ((negate + 1) % 2);
> -				break;
> -			case '?':
> -				chkexist++;
> -				break;
> -			}
> -		}
> -		f = se = s;
> -		if (*se == '-')
> -			se++;
> -		while((c = *se) && (risalnum(c) || c == '_'))
> -			se++;
> -		/* Recognize non-alnum macros too */
> -		switch (*se) {
> -		case '*':
> -			se++;
> -			if (*se == '*') se++;
> -			break;
> -		case '#':
> -			se++;
> -			break;
> -		default:
> -			break;
> +		f = parseMacroFlags(s, &negate, &chkexist);
> +		fe = parseMacroName(f);
> +		/* no valid name? assume as-is substitution */
> +		if (fe == NULL) {
> +			mbAppend(mb, '%');
> +			continue;
>   		}
> -		fe = se;
> +		se = fe;
>   		/* For "%name " macros ... */
>   		if ((c = *fe) && isblank(c))
>   			if ((lastc = strchr(fe,'\n')) == NULL)
> @@ -1140,48 +1207,39 @@ expandMacro(MacroBuf mb, const char *src, size_t slen)
>   			rc = 1;
>   			continue;
>   		}
> -		f = s+1;/* skip { */
> +		f = parseMacroFlags(s + 1 /* skip { */, &negate, &chkexist);
> +		fe = parseMacroName(f);
>   		se++;	/* skip } */
> -		while (strchr("!?", *f) != NULL) {
> -			switch(*f++) {
> -			case '!':
> -				negate = ((negate + 1) % 2);
> -				break;
> -			case '?':
> -				chkexist++;
> -				break;
> -			}
> +		/* no valid name? syntax error */
> +		if (fe == NULL) {
> +			rpmlog(RPMLOG_ERR,
> +				_("Invalid macro name: %%%.*s\n"), (int)(se - s), s);
> +			rc = 1;
> +			continue;
>   		}
> -		for (fe = f; (c = *fe) && !strchr(" :}", c);)
> -			fe++;
> -		switch (c) {
> +		switch (*fe) {
>   		case ':':
>   			g = fe + 1;
>   			ge = se - 1;
>   			break;
>   		case ' ':
> +		case '\t':
>   			lastc = se-1;
>   			break;
> -		default:
> +		case '}':
>   			break;
> +		default:
> +			rpmlog(RPMLOG_ERR,
> +				_("Invalid macro syntax: %%%.*s\n"), (int)(se - s), s);
> +			rc = 1;
> +			continue;
>   		}
>   		break;
>   	}
>
> -	/* XXX Everything below expects fe > f */
> +	assert(fe > f);
>   	fn = (fe - f);
>   	gn = (ge - g);
> -	if ((fe - f) <= 0) {
> -/* XXX Process % in unknown context */
> -		c = '%';	/* XXX only need to save % */
> -		mbAppend(mb, c);
> -#if 0
> -		rpmlog(RPMLOG_ERR,
> -			_("A %% is followed by an unparseable macro\n"));
> -#endif
> -		s = se;
> -		continue;
> -	}
>
>   	if (mb->macro_trace)
>   		printMacro(mb, s, se);
>



More information about the Rpm-list mailing list