Index: /trunk/src/kmk/Makefile.kmk
===================================================================
--- /trunk/src/kmk/Makefile.kmk	(revision 3188)
+++ /trunk/src/kmk/Makefile.kmk	(revision 3189)
@@ -222,5 +222,5 @@
 kmk_DEFS.x86 = CONFIG_WITH_OPTIMIZATION_HACKS
 kmk_DEFS.amd64 = CONFIG_WITH_OPTIMIZATION_HACKS
-kmk_DEFS.win = CONFIG_NEW_WIN32_CTRL_EVENT
+kmk_DEFS.win = CONFIG_NEW_WIN32_CTRL_EVENT CONFIG_WITH_OUTPUT_IN_MEMORY
 kmk_DEFS.debug = CONFIG_WITH_MAKE_STATS
 ifdef CONFIG_WITH_MAKE_STATS
Index: /trunk/src/kmk/function.c
===================================================================
--- /trunk/src/kmk/function.c	(revision 3188)
+++ /trunk/src/kmk/function.c	(revision 3189)
@@ -2570,6 +2570,10 @@
   output_start ();
 
+#ifdef CONFIG_WITH_OUTPUT_IN_MEMORY
+  errfd = -1; /** @todo fixme */
+#else
   errfd = (output_context && output_context->err >= 0
            ? output_context->err : FD_STDERR);
+#endif
 
 #if defined(__MSDOS__)
Index: /trunk/src/kmk/output.c
===================================================================
--- /trunk/src/kmk/output.c	(revision 3188)
+++ /trunk/src/kmk/output.c	(revision 3189)
@@ -60,17 +60,316 @@
 #endif
 
+
+#ifdef CONFIG_WITH_OUTPUT_IN_MEMORY
+# define MEMBUF_MIN_SEG_SIZE  4096
+# define MEMBUF_MAX_SEG_SIZE  (512*1024)
+# define MEMBUF_MAX_MOVE_LEN  (  MEMBUF_MIN_SEG_SIZE \
+                               - offsetof (struct output_segment, runs) \
+                               - sizeof (struct output_run))
+
+static void *acquire_semaphore (void);
+static void  release_semaphore (void *);
+static int   log_working_directory (int);
+
+/* Internal worker for output_dump and membuf_dump_most. */
+static void membuf_dump (struct output *out)
+{
+  if (out->out.total || out->err.total)
+    {
+      int traced = 0;
+      struct output_run *err_run;
+      struct output_run *out_run;
+      struct output_segment *seg;
+      FILE *prevdst;
+
+      /* Try to acquire the semaphore.  If it fails, dump the output
+         unsynchronized; still better than silently discarding it.
+         We want to keep this lock for as little time as possible.  */
+      void *sem = acquire_semaphore ();
+
+      /* Log the working directory for this dump.  */
+      if (print_directory_flag && output_sync != OUTPUT_SYNC_RECURSE)
+        traced = log_working_directory (1);
+
+      /* Work the out and err sequences in parallel. */
+      out_run = out->out.head_run;
+      err_run = out->err.head_run;
+      prevdst = NULL;
+      while (err_run || out_run)
+        {
+          FILE       *dst;
+          const void *src;
+          size_t      len;
+          if (out_run && (!err_run || out_run->seqno <= err_run->seqno))
+            {
+              src = out_run + 1;
+              len = out_run->len;
+              dst = stdout;
+              out_run = out_run->next;
+            }
+          else
+            {
+              src = err_run + 1;
+              len = err_run->len;
+              dst = stderr;
+              err_run = err_run->next;
+            }
+          if (dst != prevdst)
+            fflush(prevdst);
+          prevdst = dst;
+#ifdef KBUILD_OS_WINDOWS
+          maybe_con_fwrite (src, len, 1, dst);
+#else
+          fwrite (src, len, 1, dst);
+#endif
+        }
+      if (prevdst)
+        fflush (prevdst);
+
+      if (traced)
+        log_working_directory (0);
+
+      /* Exit the critical section.  */
+      if (sem)
+        release_semaphore (sem);
+
+      /* Free the segments and reset the state. */
+      while ((seg = out->out.head_seg))
+        {
+         out->out.head_seg = seg->next;
+         free (seg);
+        }
+      out->out.tail_seg = NULL;
+      out->out.tail_run = NULL;
+      out->out.head_run = NULL;
+      out->out.left     = 0;
+      out->out.total    = 0;
+
+      while ((seg = out->err.head_seg))
+        {
+         out->err.head_seg = seg->next;
+         free (seg);
+        }
+      out->err.tail_seg = NULL;
+      out->err.tail_run = NULL;
+      out->err.head_run = NULL;
+      out->err.left     = 0;
+      out->err.total    = 0;
+
+      out->seqno = 0;
+    }
+  else
+    assert (out->out.head_seg == NULL && out->err.head_seg == NULL);
+}
+
+/* Writes up to LEN bytes to the given segment.
+   Returns how much was actually written.  */
+static size_t
+membuf_write_segment (struct output_membuf *membuf, struct output_segment *seg,
+                      const char *src, size_t len, unsigned int *pseqno)
+{
+  size_t written = 0;
+  if (seg && membuf->left > 0)
+    {
+      struct output_run *run = membuf->tail_run;
+      char  *dst = (char *)(run + 1) + run->len;
+      assert ((uintptr_t)run - (uintptr_t)seg < seg->size);
+
+      /* If the sequence number didn't change, then we can append
+         to the current run without further considerations. */
+      if (run->seqno == *pseqno)
+          written = len;
+      /* If the current run does not end with a newline, don't start a new
+         run till we encounter one. */
+      else if (dst[-1] != '\n')
+        {
+          char const *srcnl = (const char *)memchr (src, '\n', len);
+          written = srcnl ? srcnl - src + 1 : len;
+        }
+      /* Try create a new empty run and append to it. */
+      else
+        {
+          size_t const offnextrun = (  (uintptr_t)dst - (uintptr_t)(seg)
+                                     + sizeof(void *) - 1)
+                                  & ~(sizeof(void *) - 1);
+          if (offnextrun > seg->size - sizeof (struct output_run) * 2)
+            return 0; /* need new segment */
+
+          run = run->next = (struct output_run *)((char *)seg + offnextrun);
+          run->next  = NULL;
+          run->seqno = ++(*pseqno);
+          run->len   = 0;
+          membuf->tail_run = run;
+          membuf->left = seg->size - (offnextrun + sizeof (*run));
+          dst = (char *)(run + 1);
+          written = len;
+        }
+
+      /* Append to the current run. */
+      if (written > membuf->left)
+        written = membuf->left;
+      memcpy (dst, src, written);
+      run->len += written;
+      membuf->left -= written;
+    }
+  return written;
+}
+
+/* Helper for membuf_new_segment_write that figures out now much data needs to
+   be moved from the previous run in order to make it end with a newline.  */
+static size_t membuf_calc_move_len (struct output_run *tail_run)
+{
+  size_t to_move = 0;
+  if (tail_run)
+    {
+      const char *data = (const char *)(tail_run + 1);
+      size_t off = tail_run->len;
+      while (off > 0 && data[off - 1] != '\n')
+        off--;
+      to_move = tail_run->len - off;
+      if (to_move  >=  MEMBUF_MAX_MOVE_LEN)
+        to_move = 0;
+    }
+  return to_move;
+}
+
+/* Allocates a new segment and writes to it.
+   This will take care to make sure the previous run terminates with
+   a newline so that we pass whole lines to fwrite when dumping. */
+static size_t
+membuf_new_segment_write (struct output_membuf *membuf, const char *src,
+                          size_t len, unsigned int *pseqno)
+{
+  struct output_run     *prev_run = membuf->tail_run;
+  struct output_segment *prev_seg = membuf->tail_seg;
+  size_t const           to_move  = membuf_calc_move_len (prev_run);
+  struct output_segment *new_seg;
+  size_t written;
+  char *dst;
+
+  /* Figure the the segment size.  We start with MEMBUF_MIN_SEG_SIZE and double
+     it each time till we reach MEMBUF_MAX_SEG_SIZE. */
+  size_t const offset_runs = offsetof (struct output_segment, runs);
+  size_t segsize = !prev_seg ? MEMBUF_MIN_SEG_SIZE
+                 : prev_seg->size >= MEMBUF_MAX_SEG_SIZE ? MEMBUF_MAX_SEG_SIZE
+                 : prev_seg->size * 2;
+  while (   segsize < to_move + len + offset_runs + sizeof (struct output_run) * 2
+         && segsize < MEMBUF_MAX_SEG_SIZE)
+    segsize *= 2;
+
+  /* Allocate the segment and link it and the first run. */
+  new_seg = (struct output_segment *)xmalloc (segsize);
+  new_seg->size = segsize;
+  new_seg->next = NULL;
+  new_seg->runs[0].next = NULL;
+  if (!prev_seg)
+    {
+      membuf->head_seg = new_seg;
+      membuf->head_run = &new_seg->runs[0];
+    }
+  else
+    {
+      prev_seg->next = new_seg;
+      prev_run->next = &new_seg->runs[0];
+    }
+  membuf->tail_seg = new_seg;
+  membuf->tail_run = &new_seg->runs[0];
+  membuf->total += segsize;
+  membuf->left = segsize - sizeof (struct output_run);
+
+  /* Initialize and write data to the first run. */
+  dst = (char *)&new_seg->runs[0]; /* Try bypass gcc array size cleverness. */
+  dst += sizeof (struct output_run);
+  assert (MEMBUF_MAX_MOVE_LEN < MEMBUF_MIN_SEG_SIZE);
+  if (to_move > 0)
+    {
+      /* Move to_move bytes from the previous run in hope that we'll get a
+         newline to soon.  Afterwards call membuf_segment_write to work SRC. */
+      assert (prev_run != NULL);
+      assert (membuf->left >= to_move);
+      prev_run->len -= to_move;
+      new_seg->runs[0].len = to_move;
+      new_seg->runs[0].seqno = prev_run->seqno;
+      memcpy (dst, (const char *)(prev_run + 1) + prev_run->len, to_move);
+      membuf->left -= to_move;
+
+      written = membuf_write_segment (membuf, new_seg, src, len, pseqno);
+    }
+  else
+    {
+      /* Create a run with up to LEN from SRC. */
+      written = len;
+      if (written > membuf->left)
+        written = membuf->left;
+      new_seg->runs[0].len = written;
+      new_seg->runs[0].seqno = ++(*pseqno);
+      memcpy (dst, src, written);
+      membuf->left -= written;
+    }
+  return written;
+}
+
+/* Worker for output_write that will try dump as much as possible of the
+   output, but making sure to try leave unfinished lines. */
+static void
+membuf_dump_most (struct output *out)
+{
+  /** @todo check last segment and make local copies.   */
+  membuf_dump (out);
+}
+
+/* write/fwrite like function. */
+void
+output_write (struct output *out, int is_err, const char *src, size_t len)
+{
+  if (!out || !out->syncout)
+    {
+      FILE *f = is_err ? stderr : stdout;
+#ifdef KBUILD_OS_WINDOWS
+      maybe_con_fwrite (src, len, 1, f);
+#else
+      fwrite (src, len, 1, f);
+#endif
+      fflush (f);
+    }
+  else
+    {
+      struct output_membuf *membuf = is_err ? &out->err : &out->out;
+      while (len > 0)
+        {
+          size_t runlen = membuf_write_segment (membuf, membuf->tail_seg, src, len, &out->seqno);
+          if (!runlen)
+            {
+              size_t max_total = sizeof (membuf) <= 4 ? 512*1024 : 16*1024*1024;
+              if (membuf->total < max_total)
+                runlen = membuf_new_segment_write (membuf, src, len, &out->seqno);
+              else
+                membuf_dump_most (out);
+            }
+          /* advance */
+          len -= runlen;
+          src += runlen;
+        }
+    }
+}
+
+#endif /* CONFIG_WITH_OUTPUT_IN_MEMORY */
+
+
 /* Write a string to the current STDOUT or STDERR.  */
 static void
 _outputs (struct output *out, int is_err, const char *msg)
 {
+#ifdef CONFIG_WITH_OUTPUT_IN_MEMORY
+  output_write (out, is_err, msg, strlen (msg));
+#else  /* !CONFIG_WITH_OUTPUT_IN_MEMORY */
   if (! out || ! out->syncout)
     {
       FILE *f = is_err ? stderr : stdout;
-#ifdef KBUILD_OS_WINDOWS
-      /** @todo check if fputs is also subject to char-by-char stupidity */
+# ifdef KBUILD_OS_WINDOWS
       maybe_con_fwrite(msg, strlen(msg), 1, f);
-#else
+# else
       fputs (msg, f);
-#endif
+# endif
       fflush (f);
     }
@@ -91,4 +390,5 @@
         }
     }
+#endif /* !CONFIG_WITH_OUTPUT_IN_MEMORY */
 }
 
@@ -229,9 +529,14 @@
 }
 
+#ifndef CONFIG_WITH_OUTPUT_IN_MEMORY
 /* Support routine for output_sync() */
 static void
 pump_from_tmp (int from, FILE *to)
 {
+# ifdef KMK
+  char buffer[8192];
+# else
   static char buffer[8192];
+#endif
 
 #ifdef WINDOWS32
@@ -268,4 +573,5 @@
 #endif
 }
+#endif /* CONFIG_WITH_OUTPUT_IN_MEMORY */
 
 /* Obtain the lock for writing output.  */
@@ -294,4 +600,6 @@
     perror ("fcntl()");
 }
+
+#ifndef CONFIG_WITH_OUTPUT_IN_MEMORY
 
 /* Returns a file descriptor to a temporary file.  The file is automatically
@@ -362,4 +670,6 @@
 }
 
+#endif /* CONFIG_WITH_OUTPUT_IN_MEMORY */
+
 /* Synchronize the output of jobs in -j mode to keep the results of
    each job together. This is done by holding the results in temp files,
@@ -370,4 +680,7 @@
 output_dump (struct output *out)
 {
+#ifdef CONFIG_WITH_OUTPUT_IN_MEMORY
+  membuf_dump (out);
+#else
   int outfd_not_empty = FD_NOT_EMPTY (out->out);
   int errfd_not_empty = FD_NOT_EMPTY (out->err);
@@ -412,4 +725,5 @@
         }
     }
+#endif
 }
 #endif /* NO_OUTPUT_SYNC */
@@ -523,5 +837,19 @@
   if (out)
     {
+#ifdef CONFIG_WITH_OUTPUT_IN_MEMORY
+      out->out.head_seg  = NULL;
+      out->out.tail_seg  = NULL;
+      out->out.head_run  = NULL;
+      out->out.tail_run  = NULL;
+      out->err.head_seg  = NULL;
+      out->err.tail_seg  = NULL;
+      out->err.head_run  = NULL;
+      out->err.tail_run  = NULL;
+      out->err.total     = 0;
+      out->out.total     = 0;
+      out->seqno         = 0;
+#else
       out->out = out->err = OUTPUT_NONE;
+#endif
       out->syncout = !!output_sync;
       return;
@@ -566,8 +894,15 @@
 #endif
 
+#ifdef CONFIG_WITH_OUTPUT_IN_MEMORY
+  assert (out->out.total == 0);
+  assert (out->out.head_seg == NULL);
+  assert (out->err.total == 0);
+  assert (out->err.head_seg == NULL);
+#else
   if (out->out >= 0)
     close (out->out);
   if (out->err >= 0 && out->err != out->out)
     close (out->err);
+#endif
 
   output_init (out);
@@ -578,4 +913,5 @@
 output_start (void)
 {
+#ifndef CONFIG_WITH_OUTPUT_IN_MEMORY
 #ifndef NO_OUTPUT_SYNC
   /* If we're syncing output make sure the temporary file is set up.  */
@@ -583,4 +919,5 @@
     if (! OUTPUT_ISSET(output_context))
       setup_tmpfile (output_context);
+#endif
 #endif
 
Index: /trunk/src/kmk/output.h
===================================================================
--- /trunk/src/kmk/output.h	(revision 3188)
+++ /trunk/src/kmk/output.h	(revision 3189)
@@ -15,8 +15,44 @@
 this program.  If not, see <http://www.gnu.org/licenses/>.  */
 
+#ifdef CONFIG_WITH_OUTPUT_IN_MEMORY
+/*  Output run. */
+struct output_run
+{
+    unsigned int seqno;         /* For interleaving out/err output. */
+    unsigned int len;           /* The length of the output. */
+    struct output_run *next;    /* Pointer to the next run. */
+};
+
+/* Output segment. */
+struct output_segment
+{
+    struct output_segment *next;
+    size_t size;                 /* Segment size, everything included. */
+    struct output_run runs[1];
+};
+
+/* Output memory buffer. */
+struct output_membuf
+{
+    struct output_run     *head_run;
+    struct output_run     *tail_run; /* Always in tail_seg. */
+    struct output_segment *head_seg;
+    struct output_segment *tail_seg;
+    size_t left;                /* Number of bytes that can be appended to
+                                   the tail_run.  */
+    size_t total;               /* Total segment allocation size.  */
+};
+#endif /* CONFIG_WITH_OUTPUT_IN_MEMORY */
+
 struct output
   {
+#ifdef CONFIG_WITH_OUTPUT_IN_MEMORY
+    struct output_membuf out;
+    struct output_membuf err;
+    unsigned int seqno;         /* The current run sequence number. */
+#else
     int out;
     int err;
+#endif
     unsigned int syncout:1;     /* True if we want to synchronize output.  */
  };
