Fix bogus EXPLAIN output for Hash Aggregate
authorDavid Rowley <[email protected]>
Thu, 6 Aug 2020 22:22:18 +0000 (10:22 +1200)
committerDavid Rowley <[email protected]>
Thu, 6 Aug 2020 22:22:18 +0000 (10:22 +1200)
9bdb300de modified the EXPLAIN output for Hash Aggregate to show details
from parallel workers. However, it neglected to consider that a given
parallel worker may not have assisted with the given Hash Aggregate. This
can occur when workers fail to start or during Parallel Append with
enable_partitionwise_join enabled when only a single worker is working on
a non-parallel aware sub-plan. It could also happen if a worker simply
wasn't fast enough to get any work done before other processes went and
finished all the work.

The bogus output came from the fact that ExplainOpenWorker() skipped
showing any details for non-initialized workers but show_hashagg_info()
did show details from the worker.  This meant that the worker properties
that were shown were not properly attributed to the worker that they
belong to.

In passing, we also now don't show Hash Aggregate properties for the
leader process when it did not contribute any work to the Hash Aggregate.
This can occur either during Parallel Append when only a parallel worker
worked on a given sub plan or with parallel_leader_participation set to
off.  This aims to make the behavior of Hash Aggregate's EXPLAIN output
more similar to Sort's.

Reported-by: Justin Pryzby
Discussion: https://postgr.es/m/20200805012105.GZ28072%40telsasoft.com
Back-through: 13, where the original breakage was introduced

src/backend/commands/explain.c

index 1e565fd3375523e0680ed55b497d152d5fdfed8f..30e0a7ee7f2190f8c9fad0d1308df410bc0e91d8 100644 (file)
@@ -3063,15 +3063,19 @@ show_hashagg_info(AggState *aggstate, ExplainState *es)
            ExplainPropertyInteger("Planned Partitions", NULL,
                                   aggstate->hash_planned_partitions, es);
 
-       if (!es->analyze)
-           return;
-
-       /* EXPLAIN ANALYZE */
-       ExplainPropertyInteger("HashAgg Batches", NULL,
-                              aggstate->hash_batches_used, es);
-       ExplainPropertyInteger("Peak Memory Usage", "kB", memPeakKb, es);
-       ExplainPropertyInteger("Disk Usage", "kB",
-                              aggstate->hash_disk_used, es);
+       /*
+        * During parallel query the leader may have not helped out.  We
+        * detect this by checking how much memory it used.  If we find it
+        * didn't do any work then we don't show its properties.
+        */
+       if (es->analyze && aggstate->hash_mem_peak > 0)
+       {
+           ExplainPropertyInteger("HashAgg Batches", NULL,
+                                  aggstate->hash_batches_used, es);
+           ExplainPropertyInteger("Peak Memory Usage", "kB", memPeakKb, es);
+           ExplainPropertyInteger("Disk Usage", "kB",
+                                  aggstate->hash_disk_used, es);
+       }
    }
    else
    {
@@ -3085,26 +3089,32 @@ show_hashagg_info(AggState *aggstate, ExplainState *es)
            gotone = true;
        }
 
-       if (!es->analyze)
+       /*
+        * During parallel query the leader may have not helped out.  We
+        * detect this by checking how much memory it used.  If we find it
+        * didn't do any work then we don't show its properties.
+        */
+       if (es->analyze && aggstate->hash_mem_peak > 0)
        {
-           if (gotone)
-               appendStringInfoChar(es->str, '\n');
-           return;
-       }
+           if (!gotone)
+               ExplainIndentText(es);
+           else
+               appendStringInfoString(es->str, "  ");
 
-       if (!gotone)
-           ExplainIndentText(es);
-       else
-           appendStringInfoString(es->str, "  ");
+           appendStringInfo(es->str, "Batches: %d  Memory Usage: " INT64_FORMAT "kB",
+                            aggstate->hash_batches_used, memPeakKb);
+           gotone = true;
 
-       appendStringInfo(es->str, "Batches: %d  Memory Usage: " INT64_FORMAT "kB",
-                        aggstate->hash_batches_used, memPeakKb);
+           /* Only display disk usage if we spilled to disk */
+           if (aggstate->hash_batches_used > 1)
+           {
+               appendStringInfo(es->str, "  Disk Usage: " UINT64_FORMAT "kB",
+                   aggstate->hash_disk_used);
+           }
+       }
 
-       /* Only display disk usage if we spilled to disk */
-       if (aggstate->hash_batches_used > 1)
-           appendStringInfo(es->str, "  Disk Usage: " UINT64_FORMAT "kB",
-                            aggstate->hash_disk_used);
-       appendStringInfoChar(es->str, '\n');
+       if (gotone)
+           appendStringInfoChar(es->str, '\n');
    }
 
    /* Display stats for each parallel worker */
@@ -3117,6 +3127,9 @@ show_hashagg_info(AggState *aggstate, ExplainState *es)
            int         hash_batches_used;
 
            sinstrument = &aggstate->shared_info->sinstrument[n];
+           /* Skip workers that didn't do anything */
+           if (sinstrument->hash_mem_peak == 0)
+               continue;
            hash_disk_used = sinstrument->hash_disk_used;
            hash_batches_used = sinstrument->hash_batches_used;
            memPeakKb = (sinstrument->hash_mem_peak + 1023) / 1024;