Print this page
cpp: squash spaces correctly-ish whilst pasting macros
Sun cpp removes all leading and trailing space from a macro pasting, and
compress whitespace in the macro body to a single space character.
There is some deviation from this in the Sun implementation which we
don't duplicate.
1) The presence of comments in the macro body affect the minimization of
   runs of spaces.
2) When newlines are encountered in the parameter list of a macro
   invocation, Sun cpp inserts that many newlines prior to any of the
   pasted text, and then in the pasted text pastes those newlines as
   (minimized) spaces.  Escaped new-lines are de-escaped, and otherwise
   treated similarly (in effect, the \ is removed).

@@ -1200,16 +1200,24 @@
         else
                 newp=0;
         return(np);
 }
 
+/*
+ * When a macro substitution must happen, arrange the input stack based on the
+ * macro definition and any parameters such that the expanded macro is what is
+ * next read by the preprocessor as if it were input
+ */
 static char *
 subst(p,sp) register char *p; struct symtab *sp; {
         static char match[]="%s: argument mismatch";
         register char *ca,*vp; int params;
         char *actual[MAXFRM]; /* actual[n] is text of nth actual */
         char acttxt[BUFFERSIZ]; /* space for actuals */
+        /* State while pasting, TRAIL is trailing space, INTRA is in the body */
+        enum { TRAIL, INTRA } state = TRAIL;
+        int pasted = 0;         /* # of character pasted */
 
         if (0==(vp=sp->value)) return(p);
         if ((p-macforw)<=macdam) {
                 if (++maclvl>symsiz && !rflag) {
                         pperror("%s: macro recursion",sp->name);

@@ -1218,10 +1226,11 @@
         } else {
                 maclvl=0;       /* level decreased */
         }
         macforw=p; macdam=0;    /* new target for decrease in level */
         macnam=sp->name;
+        /* flush all buffered output prior to the expansion */
         dump();
         if (sp==ulnloc) {
                 vp=acttxt; *vp++='\0';
                 sprintf(vp,"%d",lineno[ifno]); while (*vp++);
         } else if (sp==uflloc) {

@@ -1260,10 +1269,24 @@
                                                  * Replace newlines in actual
                                                  * macro parameters by spaces.
                                                  * Keep escaped newlines, they
                                                  * are assumed to be inside a
                                                  * string.
+                                                 * 
+                                                 * XXX: The above is actually
+                                                 * false in a couple of ways.
+                                                 *
+                                                 * 1) Sun cpp turns newlines
+                                                 * into spaces, but inserts an
+                                                 * equal number of newlines
+                                                 * prior to pasting the body.
+                                                 *
+                                                 * 2) Sun does _not_ preserved
+                                                 * escaped newlines, the \ is
+                                                 * removed, and the newline
+                                                 * otherwise treated
+                                                 * identically to in #1.
                                                  */
                                                 if (*inp == '\n' &&
                                                     inp[-1] != '\\')
                                                         *inp = ' ';
                                                 *ca++= *inp++;

@@ -1282,22 +1305,63 @@
                         ppwarn(match,sp->name);
                 while (--params>=0)
                         *pa++=""+1;     /* null string for missing actuals */
                 --flslvl; fasscan();
         }
+
         for (;;) {/* push definition onto front of input stack */
+                /*
+                 * Loop until we hit the end of the macro, or a parameter
+                 * placement.  Note that we expand the macro into the input
+                 * backwards (so it replays forwards.)
+                 */
                 while (!iswarn(*--vp)) {
                         if (bob(p)) {outp=inp=p; p=unfill(p);}
+                                
+                        /* Unless we are mid-paste, swallow all spaces */
+                        if (state == TRAIL) {
+                                while (isspace(*vp) && !iswarn(*vp))
+                                        vp--;
+                        } else {
+                                /*
+                                 * If we're mid-paste, compress spaces to a
+                                 * single space
+                                 */
+                                while (isspace(*vp)) {
+                                        if (!isspace(vp[1])) {
+                                                *vp = ' ';
+                                                break;
+                                        } else {
+                                                vp--;
+                                        }
+                                }
+                        }
+                        state = INTRA; /* Hit a non-space */
+                        
+                        if (iswarn(*vp))
+                                break;
                         *--p= *vp;
+                        pasted++;
                 }
                 if (*vp==warnc) {/* insert actual param */
+                        state = INTRA;
                         ca=actual[*--vp-1];
                         while (*--ca) {
                                 if (bob(p)) {outp=inp=p; p=unfill(p);}
                                 *--p= *ca;
+                                pasted++;
+                        }
+                } else {
+                        /*
+                         * Trim leading spaces, but only those from our pasting
+                         */
+                        while (isspace(*p) && pasted > 0) {
+                                p++;
+                                pasted--;
+                        }
+                        break;
                         }
-                } else break;
         }
         outp=inp=p;
         return(p);
 }