/*
* INTERFACE ROUTINES
* RelationCacheInitialize - initialize relcache (to empty)
- * RelationCacheInitializePhase2 - finish initializing relcache
+ * RelationCacheInitializePhase2 - initialize shared-catalog entries
+ * RelationCacheInitializePhase3 - finish initializing relcache
* RelationIdGetRelation - get a reldesc by relation id
* RelationClose - close an open relation
*
#include <unistd.h>
#include "access/genam.h"
-#include "access/heapam.h"
#include "access/reloptions.h"
#include "access/sysattr.h"
#include "access/xact.h"
#include "catalog/pg_attrdef.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_constraint.h"
+#include "catalog/pg_database.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_rewrite.h"
+#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
#include "commands/trigger.h"
#include "miscadmin.h"
/*
- * name of relcache init file, used to speed up backend startup
+ * name of relcache init file(s), used to speed up backend startup
*/
#define RELCACHE_INIT_FILENAME "pg_internal.init"
-#define RELCACHE_INIT_FILEMAGIC 0x573264 /* version ID value */
+#define RELCACHE_INIT_FILEMAGIC 0x573265 /* version ID value */
/*
* hardcoded tuple descriptors. see include/catalog/pg_attribute.h
*/
-static FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
-static FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
-static FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
-static FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
-static FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
+static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
+static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
+static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
+static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
+static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
+static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
/*
* Hash tables that index the relation cache
*/
bool criticalRelcachesBuilt = false;
+/*
+ * This flag is false until we have prepared the critical relcache entries
+ * for shared catalogs (specifically, pg_database and its indexes).
+ */
+bool criticalSharedRelcachesBuilt = false;
+
/*
* This counter counts relcache inval events received since backend startup
* (but only for rels that are actually in cache). Presently, we use it only
static long relcacheInvalsReceived = 0L;
/*
- * This list remembers the OIDs of the relations cached in the relcache
- * init file.
+ * This list remembers the OIDs of the non-shared relations cached in the
+ * database's local relcache init file. Note that there is no corresponding
+ * list for the shared relcache init file, for reasons explained in the
+ * comments for RelationCacheInitFileRemove.
*/
static List *initFileRelationIds = NIL;
static void RelationReloadIndexInfo(Relation relation);
static void RelationFlushRelation(Relation relation);
-static bool load_relcache_init_file(void);
-static void write_relcache_init_file(void);
+static bool load_relcache_init_file(bool shared);
+static void write_relcache_init_file(bool shared);
static void write_item(const void *data, Size len, FILE *fp);
-static void formrdesc(const char *relationName, Oid relationReltype,
- bool hasoids, int natts, FormData_pg_attribute *att);
+static void formrdesc(const char *relationName, bool isshared,
+ bool hasoids, int natts, const FormData_pg_attribute *attrs);
static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK);
static Relation AllocateRelationDesc(Relation relation, Form_pg_class relp);
static void RelationBuildTupleDesc(Relation relation);
static Relation RelationBuildDesc(Oid targetRelId, Relation oldrelation);
static void RelationInitPhysicalAddr(Relation relation);
+static void load_critical_index(Oid indexoid);
static TupleDesc GetPgClassDescriptor(void);
static TupleDesc GetPgIndexDescriptor(void);
static void AttrDefaultFetch(Relation relation);
static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
StrategyNumber numStrats,
StrategyNumber numSupport);
+static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
+static void unlink_initfile(const char *initfilename);
/*
SysScanDesc pg_class_scan;
ScanKeyData key[1];
+ /*
+ * If something goes wrong during backend startup, we might find ourselves
+ * trying to read pg_class before we've selected a database. That ain't
+ * gonna work, so bail out with a useful error message. If this happens,
+ * it probably means a relcache entry that needs to be nailed isn't.
+ */
+ if (!OidIsValid(MyDatabaseId))
+ elog(FATAL, "cannot read pg_class without having selected a database");
+
/*
* form a scan key
*/
/*
* formrdesc
*
- * This is a special cut-down version of RelationBuildDesc()
- * used by RelationCacheInitializePhase2() in initializing the relcache.
+ * This is a special cut-down version of RelationBuildDesc(),
+ * used while initializing the relcache.
* The relation descriptor is built just from the supplied parameters,
* without actually looking at any system table entries. We cheat
* quite a lot since we only need to work for a few basic system
* catalogs.
*
- * formrdesc is currently used for: pg_class, pg_attribute, pg_proc,
- * and pg_type (see RelationCacheInitializePhase2).
+ * formrdesc is currently used for: pg_database, pg_class, pg_attribute,
+ * pg_proc, and pg_type (see RelationCacheInitializePhase2/3).
*
* Note that these catalogs can't have constraints (except attnotnull),
* default values, rules, or triggers, since we don't cope with any of that.
+ * (Well, actually, this only matters for properties that need to be valid
+ * during bootstrap or before RelationCacheInitializePhase3 runs, and none of
+ * these properties matter then...)
*
* NOTE: we assume we are already switched into CacheMemoryContext.
*/
static void
-formrdesc(const char *relationName, Oid relationReltype,
- bool hasoids, int natts, FormData_pg_attribute *att)
+formrdesc(const char *relationName, bool isshared,
+ bool hasoids, int natts, const FormData_pg_attribute *attrs)
{
Relation relation;
int i;
* initialize relation tuple form
*
* The data we insert here is pretty incomplete/bogus, but it'll serve to
- * get us launched. RelationCacheInitializePhase2() will read the real
+ * get us launched. RelationCacheInitializePhase3() will read the real
* data from pg_class and replace what we've done here.
*/
relation->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
namestrcpy(&relation->rd_rel->relname, relationName);
relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
- relation->rd_rel->reltype = relationReltype;
/*
* It's important to distinguish between shared and non-shared relations,
- * even at bootstrap time, to make sure we know where they are stored. At
- * present, all relations that formrdesc is used for are not shared.
+ * even at bootstrap time, to make sure we know where they are stored.
*/
- relation->rd_rel->relisshared = false;
+ relation->rd_rel->relisshared = isshared;
+ if (isshared)
+ relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
/*
* Likewise, we must know if a relation is temp ... but formrdesc is not
relation->rd_att = CreateTemplateTupleDesc(natts, hasoids);
relation->rd_att->tdrefcount = 1; /* mark as refcounted */
- relation->rd_att->tdtypeid = relationReltype;
- relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
-
/*
* initialize tuple desc info
*/
for (i = 0; i < natts; i++)
{
memcpy(relation->rd_att->attrs[i],
- &att[i],
+ &attrs[i],
ATTRIBUTE_FIXED_PART_SIZE);
- has_not_null |= att[i].attnotnull;
+ has_not_null |= attrs[i].attnotnull;
/* make sure attcacheoff is valid */
relation->rd_att->attrs[i]->attcacheoff = -1;
}
/* Should be closed at smgr level */
Assert(relation->rd_smgr == NULL);
+ /*
+ * Must reset targblock, fsm_nblocks and vm_nblocks in case rel was
+ * truncated
+ */
+ relation->rd_targblock = InvalidBlockNumber;
+ relation->rd_fsm_nblocks = InvalidBlockNumber;
+ relation->rd_vm_nblocks = InvalidBlockNumber;
+ /* Must free any AM cached data, too */
+ if (relation->rd_amcache)
+ pfree(relation->rd_amcache);
+ relation->rd_amcache = NULL;
+
+ /*
+ * If it's a shared index, we might be called before backend startup
+ * has finished selecting a database, in which case we have no way to
+ * read pg_class yet. However, a shared index can never have any
+ * significant schema updates, so it's okay to ignore the invalidation
+ * signal. Just mark it valid and return without doing anything more.
+ */
+ if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
+ {
+ relation->rd_isvalid = true;
+ return;
+ }
+
/*
* Read the pg_class row
*
/* We must recalculate physical address in case it changed */
RelationInitPhysicalAddr(relation);
- /*
- * Must reset targblock, fsm_nblocks and vm_nblocks in case rel was
- * truncated
- */
- relation->rd_targblock = InvalidBlockNumber;
- relation->rd_fsm_nblocks = InvalidBlockNumber;
- relation->rd_vm_nblocks = InvalidBlockNumber;
- /* Must free any AM cached data, too */
- if (relation->rd_amcache)
- pfree(relation->rd_amcache);
- relation->rd_amcache = NULL;
-
/*
* For a non-system index, there are fields of the pg_index row that are
* allowed to change, so re-read that row and update the relcache entry.
/*
* check for creation of a rel that must be nailed in cache.
*
- * XXX this list had better match RelationCacheInitializePhase2's list.
+ * XXX this list had better match the relations specially handled in
+ * RelationCacheInitializePhase2/3.
*/
switch (relid)
{
+ case DatabaseRelationId:
case RelationRelationId:
case AttributeRelationId:
case ProcedureRelationId:
/*
* RelationCacheInitializePhase2
*
- * This is called as soon as the catcache and transaction system
- * are functional. At this point we can actually read data from
- * the system catalogs. We first try to read pre-computed relcache
- * entries from the pg_internal.init file. If that's missing or
- * broken, make phony entries for the minimum set of nailed-in-cache
- * relations. Then (unless bootstrapping) make sure we have entries
- * for the critical system indexes. Once we've done all this, we
- * have enough infrastructure to open any system catalog or use any
- * catcache. The last step is to rewrite pg_internal.init if needed.
+ * This is called to prepare for access to pg_database during startup.
+ * We must at least set up a nailed reldesc for pg_database. Ideally
+ * we'd like to have reldescs for its indexes, too. We attempt to
+ * load this information from the shared relcache init file. If that's
+ * missing or broken, just make a phony entry for pg_database.
+ * RelationCacheInitializePhase3 will clean up as needed.
*/
void
RelationCacheInitializePhase2(void)
+{
+ MemoryContext oldcxt;
+
+ /*
+ * In bootstrap mode, pg_database isn't there yet anyway, so do nothing.
+ */
+ if (IsBootstrapProcessingMode())
+ return;
+
+ /*
+ * switch to cache memory context
+ */
+ oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+
+ /*
+ * Try to load the shared relcache cache file. If unsuccessful,
+ * bootstrap the cache with a pre-made descriptor for pg_database.
+ */
+ if (!load_relcache_init_file(true))
+ {
+ formrdesc("pg_database", true,
+ true, Natts_pg_database, Desc_pg_database);
+
+#define NUM_CRITICAL_SHARED_RELS 1 /* fix if you change list above */
+ }
+
+ MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * RelationCacheInitializePhase3
+ *
+ * This is called as soon as the catcache and transaction system
+ * are functional and we have determined MyDatabaseId. At this point
+ * we can actually read data from the database's system catalogs.
+ * We first try to read pre-computed relcache entries from the local
+ * relcache init file. If that's missing or broken, make phony entries
+ * for the minimum set of nailed-in-cache relations. Then (unless
+ * bootstrapping) make sure we have entries for the critical system
+ * indexes. Once we've done all this, we have enough infrastructure to
+ * open any system catalog or use any catcache. The last step is to
+ * rewrite the cache files if needed.
+ */
+void
+RelationCacheInitializePhase3(void)
{
HASH_SEQ_STATUS status;
RelIdCacheEnt *idhentry;
MemoryContext oldcxt;
- bool needNewCacheFile = false;
+ bool needNewCacheFile = !criticalSharedRelcachesBuilt;
/*
* switch to cache memory context
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
/*
- * Try to load the relcache cache file. If unsuccessful, bootstrap the
- * cache with pre-made descriptors for the critical "nailed-in" system
- * catalogs.
+ * Try to load the local relcache cache file. If unsuccessful,
+ * bootstrap the cache with pre-made descriptors for the critical
+ * "nailed-in" system catalogs.
*/
if (IsBootstrapProcessingMode() ||
- !load_relcache_init_file())
+ !load_relcache_init_file(false))
{
needNewCacheFile = true;
- formrdesc("pg_class", PG_CLASS_RELTYPE_OID,
+ formrdesc("pg_class", false,
true, Natts_pg_class, Desc_pg_class);
- formrdesc("pg_attribute", PG_ATTRIBUTE_RELTYPE_OID,
+ formrdesc("pg_attribute", false,
false, Natts_pg_attribute, Desc_pg_attribute);
- formrdesc("pg_proc", PG_PROC_RELTYPE_OID,
+ formrdesc("pg_proc", false,
true, Natts_pg_proc, Desc_pg_proc);
- formrdesc("pg_type", PG_TYPE_RELTYPE_OID,
+ formrdesc("pg_type", false,
true, Natts_pg_type, Desc_pg_type);
-#define NUM_CRITICAL_RELS 4 /* fix if you change list above */
+#define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */
}
MemoryContextSwitchTo(oldcxt);
*/
if (!criticalRelcachesBuilt)
{
- Relation ird;
-
-#define LOAD_CRIT_INDEX(indexoid) \
- do { \
- LockRelationOid(indexoid, AccessShareLock); \
- ird = RelationBuildDesc(indexoid, NULL); \
- if (ird == NULL) \
- elog(PANIC, "could not open critical system index %u", \
- indexoid); \
- ird->rd_isnailed = true; \
- ird->rd_refcnt = 1; \
- UnlockRelationOid(indexoid, AccessShareLock); \
- } while (0)
-
- LOAD_CRIT_INDEX(ClassOidIndexId);
- LOAD_CRIT_INDEX(AttributeRelidNumIndexId);
- LOAD_CRIT_INDEX(IndexRelidIndexId);
- LOAD_CRIT_INDEX(OpclassOidIndexId);
- LOAD_CRIT_INDEX(AccessMethodStrategyIndexId);
- LOAD_CRIT_INDEX(AccessMethodProcedureIndexId);
- LOAD_CRIT_INDEX(OperatorOidIndexId);
- LOAD_CRIT_INDEX(RewriteRelRulenameIndexId);
- LOAD_CRIT_INDEX(TriggerRelidNameIndexId);
-
-#define NUM_CRITICAL_INDEXES 9 /* fix if you change list above */
+ load_critical_index(ClassOidIndexId);
+ load_critical_index(AttributeRelidNumIndexId);
+ load_critical_index(IndexRelidIndexId);
+ load_critical_index(OpclassOidIndexId);
+ load_critical_index(AccessMethodStrategyIndexId);
+ load_critical_index(AccessMethodProcedureIndexId);
+ load_critical_index(OperatorOidIndexId);
+ load_critical_index(RewriteRelRulenameIndexId);
+ load_critical_index(TriggerRelidNameIndexId);
+
+#define NUM_CRITICAL_LOCAL_INDEXES 9 /* fix if you change list above */
criticalRelcachesBuilt = true;
}
+ /*
+ * Process critical shared indexes too.
+ *
+ * DatabaseNameIndexId isn't critical for relcache loading, but rather
+ * for initial lookup of MyDatabaseId, without which we'll never find
+ * any non-shared catalogs at all. Autovacuum calls InitPostgres with
+ * a database OID, so it instead depends on DatabaseOidIndexId.
+ */
+ if (!criticalSharedRelcachesBuilt)
+ {
+ load_critical_index(DatabaseNameIndexId);
+ load_critical_index(DatabaseOidIndexId);
+
+#define NUM_CRITICAL_SHARED_INDEXES 2 /* fix if you change list above */
+
+ criticalSharedRelcachesBuilt = true;
+ }
+
/*
* Now, scan all the relcache entries and update anything that might be
* wrong in the results from formrdesc or the relcache cache file. If we
}
/*
- * Lastly, write out a new relcache cache file if one is needed.
+ * Lastly, write out new relcache cache files if needed. We don't bother
+ * to distinguish cases where only one of the two needs an update.
*/
if (needNewCacheFile)
{
* Force all the catcaches to finish initializing and thereby open the
* catalogs and indexes they use. This will preload the relcache with
* entries for all the most important system catalogs and indexes, so
- * that the init file will be most useful for future backends.
+ * that the init files will be most useful for future backends.
*/
InitCatalogCachePhase2();
- /* now write the file */
- write_relcache_init_file();
+ /* reset initFileRelationIds list; we'll fill it during write */
+ initFileRelationIds = NIL;
+
+ /* now write the files */
+ write_relcache_init_file(true);
+ write_relcache_init_file(false);
}
}
+/*
+ * Load one critical system index into the relcache
+ */
+static void
+load_critical_index(Oid indexoid)
+{
+ Relation ird;
+
+ LockRelationOid(indexoid, AccessShareLock);
+ ird = RelationBuildDesc(indexoid, NULL);
+ if (ird == NULL)
+ elog(PANIC, "could not open critical system index %u", indexoid);
+ ird->rd_isnailed = true;
+ ird->rd_refcnt = 1;
+ UnlockRelationOid(indexoid, AccessShareLock);
+}
+
/*
* GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
* GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
* extracting fields.
*/
static TupleDesc
-BuildHardcodedDescriptor(int natts, Form_pg_attribute attrs, bool hasoids)
+BuildHardcodedDescriptor(int natts, const FormData_pg_attribute *attrs,
+ bool hasoids)
{
TupleDesc result;
MemoryContext oldcxt;
return pgindexdesc;
}
+/*
+ * Load any default attribute value definitions for the relation.
+ */
static void
AttrDefaultFetch(Relation relation)
{
ndef - found, RelationGetRelationName(relation));
}
+/*
+ * Load any check constraints for the relation.
+ */
static void
CheckConstraintFetch(Relation relation)
{
* relation descriptors using sequential scans and write 'em to
* the initialization file for use by subsequent backends.
*
- * We could dispense with the initialization file and just build the
+ * As of Postgres 8.5, there is one local initialization file in each
+ * database, plus one shared initialization file for shared catalogs.
+ *
+ * We could dispense with the initialization files and just build the
* critical reldescs the hard way on every backend startup, but that
* slows down backend startup noticeably.
*
* just the ones that are absolutely critical; this allows us to speed
* up backend startup by not having to build such entries the hard way.
* Presently, all the catalog and index entries that are referred to
- * by catcaches are stored in the initialization file.
+ * by catcaches are stored in the initialization files.
*
* The same mechanism that detects when catcache and relcache entries
* need to be invalidated (due to catalog updates) also arranges to
- * unlink the initialization file when its contents may be out of date.
- * The file will then be rebuilt during the next backend startup.
+ * unlink the initialization files when the contents may be out of date.
+ * The files will then be rebuilt during the next backend startup.
*/
/*
- * load_relcache_init_file -- attempt to load cache from the init file
+ * load_relcache_init_file -- attempt to load cache from the shared
+ * or local cache init file
*
- * If successful, return TRUE and set criticalRelcachesBuilt to true.
+ * If successful, return TRUE and set criticalRelcachesBuilt or
+ * criticalSharedRelcachesBuilt to true.
* If not successful, return FALSE.
*
* NOTE: we assume we are already switched into CacheMemoryContext.
*/
static bool
-load_relcache_init_file(void)
+load_relcache_init_file(bool shared)
{
FILE *fp;
char initfilename[MAXPGPATH];
magic;
int i;
- snprintf(initfilename, sizeof(initfilename), "%s/%s",
- DatabasePath, RELCACHE_INIT_FILENAME);
+ if (shared)
+ snprintf(initfilename, sizeof(initfilename), "global/%s",
+ RELCACHE_INIT_FILENAME);
+ else
+ snprintf(initfilename, sizeof(initfilename), "%s/%s",
+ DatabasePath, RELCACHE_INIT_FILENAME);
fp = AllocateFile(initfilename, PG_BINARY_R);
if (fp == NULL)
rels = (Relation *) palloc(max_rels * sizeof(Relation));
num_rels = 0;
nailed_rels = nailed_indexes = 0;
- initFileRelationIds = NIL;
/* check for correct magic number (compatible version) */
if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
/*
* Rules and triggers are not saved (mainly because the internal
* format is complex and subject to change). They must be rebuilt if
- * needed by RelationCacheInitializePhase2. This is not expected to
+ * needed by RelationCacheInitializePhase3. This is not expected to
* be a big performance hit since few system catalogs have such. Ditto
* for index expressions and predicates.
*/
* get the right number of nailed items? (This is a useful crosscheck in
* case the set of critical rels or indexes changes.)
*/
- if (nailed_rels != NUM_CRITICAL_RELS ||
- nailed_indexes != NUM_CRITICAL_INDEXES)
- goto read_failed;
+ if (shared)
+ {
+ if (nailed_rels != NUM_CRITICAL_SHARED_RELS ||
+ nailed_indexes != NUM_CRITICAL_SHARED_INDEXES)
+ goto read_failed;
+ }
+ else
+ {
+ if (nailed_rels != NUM_CRITICAL_LOCAL_RELS ||
+ nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES)
+ goto read_failed;
+ }
/*
* OK, all appears well.
{
RelationCacheInsert(rels[relno]);
/* also make a list of their OIDs, for RelationIdIsInInitFile */
- initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]),
- initFileRelationIds);
+ if (!shared)
+ initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]),
+ initFileRelationIds);
}
pfree(rels);
FreeFile(fp);
- criticalRelcachesBuilt = true;
+ if (shared)
+ criticalSharedRelcachesBuilt = true;
+ else
+ criticalRelcachesBuilt = true;
return true;
/*
/*
* Write out a new initialization file with the current contents
- * of the relcache.
+ * of the relcache (either shared rels or local rels, as indicated).
*/
static void
-write_relcache_init_file(void)
+write_relcache_init_file(bool shared)
{
FILE *fp;
char tempfilename[MAXPGPATH];
* another backend starting at about the same time might crash trying to
* read the partially-complete file.
*/
- snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
- DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid);
- snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
- DatabasePath, RELCACHE_INIT_FILENAME);
+ if (shared)
+ {
+ snprintf(tempfilename, sizeof(tempfilename), "global/%s.%d",
+ RELCACHE_INIT_FILENAME, MyProcPid);
+ snprintf(finalfilename, sizeof(finalfilename), "global/%s",
+ RELCACHE_INIT_FILENAME);
+ }
+ else
+ {
+ snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
+ DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid);
+ snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
+ DatabasePath, RELCACHE_INIT_FILENAME);
+ }
unlink(tempfilename); /* in case it exists w/wrong permissions */
elog(FATAL, "could not write init file");
/*
- * Write all the reldescs (in no particular order).
+ * Write all the appropriate reldescs (in no particular order).
*/
hash_seq_init(&status, RelationIdCache);
- initFileRelationIds = NIL;
-
while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
{
Relation rel = idhentry->reldesc;
Form_pg_class relform = rel->rd_rel;
+ /* ignore if not correct group */
+ if (relform->relisshared != shared)
+ continue;
+
/* first write the relcache entry proper */
write_item(rel, sizeof(RelationData), fp);
}
/* also make a list of their OIDs, for RelationIdIsInInitFile */
- oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
- initFileRelationIds = lcons_oid(RelationGetRelid(rel),
- initFileRelationIds);
- MemoryContextSwitchTo(oldcxt);
+ if (!shared)
+ {
+ oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+ initFileRelationIds = lcons_oid(RelationGetRelid(rel),
+ initFileRelationIds);
+ MemoryContextSwitchTo(oldcxt);
+ }
}
if (FreeFile(fp))
/*
* Detect whether a given relation (identified by OID) is one of the ones
- * we store in the init file.
+ * we store in the local relcache init file.
*
* Note that we effectively assume that all backends running in a database
* would choose to store the same set of relations in the init file;
/*
* Invalidate (remove) the init file during commit of a transaction that
* changed one or more of the relation cache entries that are kept in the
- * init file.
+ * local init file.
*
* We actually need to remove the init file twice: once just before sending
* the SI messages that include relcache inval for such relations, and once
*
* Ignore any failure to unlink the file, since it might not be there if
* no backend has been started since the last removal.
+ *
+ * Notice this deals only with the local init file, not the shared init file.
+ * The reason is that there can never be a "significant" change to the
+ * relcache entry of a shared relation; the most that could happen is
+ * updates of noncritical fields such as relpages/reltuples. So, while
+ * it's worth updating the shared init file from time to time, it can never
+ * be invalid enough to make it necessary to remove it.
*/
void
RelationCacheInitFileInvalidate(bool beforeSend)
}
/*
- * Remove the init file for a given database during postmaster startup.
+ * Remove the init files during postmaster startup.
*
- * We used to keep the init file across restarts, but that is unsafe in PITR
+ * We used to keep the init files across restarts, but that is unsafe in PITR
* scenarios, and even in simple crash-recovery cases there are windows for
- * the init file to become out-of-sync with the database. So now we just
- * remove it during startup and expect the first backend launch to rebuild it.
- * Of course, this has to happen in each database of the cluster. For
- * simplicity this is driven by flatfiles.c, which has to scan pg_database
- * anyway.
+ * the init files to become out-of-sync with the database. So now we just
+ * remove them during startup and expect the first backend launch to rebuild
+ * them. Of course, this has to happen in each database of the cluster.
*/
void
-RelationCacheInitFileRemove(const char *dbPath)
+RelationCacheInitFileRemove(void)
+{
+ const char *tblspcdir = "pg_tblspc";
+ DIR *dir;
+ struct dirent *de;
+ char path[MAXPGPATH];
+
+ /*
+ * We zap the shared cache file too. In theory it can't get out of sync
+ * enough to be a problem, but in data-corruption cases, who knows ...
+ */
+ snprintf(path, sizeof(path), "global/%s",
+ RELCACHE_INIT_FILENAME);
+ unlink_initfile(path);
+
+ /* Scan everything in the default tablespace */
+ RelationCacheInitFileRemoveInDir("base");
+
+ /* Scan the tablespace link directory to find non-default tablespaces */
+ dir = AllocateDir(tblspcdir);
+ if (dir == NULL)
+ {
+ elog(LOG, "could not open tablespace link directory \"%s\": %m",
+ tblspcdir);
+ return;
+ }
+
+ while ((de = ReadDir(dir, tblspcdir)) != NULL)
+ {
+ if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
+ {
+ /* Scan the tablespace dir for per-database dirs */
+ snprintf(path, sizeof(path), "%s/%s",
+ tblspcdir, de->d_name);
+ RelationCacheInitFileRemoveInDir(path);
+ }
+ }
+
+ FreeDir(dir);
+}
+
+/* Process one per-tablespace directory for RelationCacheInitFileRemove */
+static void
+RelationCacheInitFileRemoveInDir(const char *tblspcpath)
{
+ DIR *dir;
+ struct dirent *de;
char initfilename[MAXPGPATH];
- snprintf(initfilename, sizeof(initfilename), "%s/%s",
- dbPath, RELCACHE_INIT_FILENAME);
- unlink(initfilename);
- /* ignore any error, since it might not be there at all */
+ /* Scan the tablespace directory to find per-database directories */
+ dir = AllocateDir(tblspcpath);
+ if (dir == NULL)
+ {
+ elog(LOG, "could not open tablespace directory \"%s\": %m",
+ tblspcpath);
+ return;
+ }
+
+ while ((de = ReadDir(dir, tblspcpath)) != NULL)
+ {
+ if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
+ {
+ /* Try to remove the init file in each database */
+ snprintf(initfilename, sizeof(initfilename), "%s/%s/%s",
+ tblspcpath, de->d_name, RELCACHE_INIT_FILENAME);
+ unlink_initfile(initfilename);
+ }
+ }
+
+ FreeDir(dir);
+}
+
+static void
+unlink_initfile(const char *initfilename)
+{
+ if (unlink(initfilename) < 0)
+ {
+ /* It might not be there, but log any error other than ENOENT */
+ if (errno != ENOENT)
+ elog(LOG, "could not remove cache file \"%s\": %m", initfilename);
+ }
}
#include <unistd.h>
#include "access/heapam.h"
+#include "access/sysattr.h"
#include "access/xact.h"
#include "catalog/catalog.h"
+#include "catalog/indexing.h"
#include "catalog/namespace.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_database.h"
#include "catalog/pg_tablespace.h"
-#include "libpq/hba.h"
#include "libpq/libpq-be.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "postmaster/postmaster.h"
-#include "storage/backendid.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/sinvaladt.h"
#include "storage/smgr.h"
#include "utils/acl.h"
-#include "utils/flatfiles.h"
+#include "utils/fmgroids.h"
#include "utils/guc.h"
#include "utils/pg_locale.h"
-#include "utils/plancache.h"
#include "utils/portal.h"
-#include "utils/relcache.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "utils/tqual.h"
-static bool FindMyDatabase(const char *name, Oid *db_id, Oid *db_tablespace);
-static bool FindMyDatabaseByOid(Oid dbid, char *dbname, Oid *db_tablespace);
+static HeapTuple GetDatabaseTuple(const char *dbname);
+static HeapTuple GetDatabaseTupleByOid(Oid dboid);
static void CheckMyDatabase(const char *name, bool am_superuser);
static void InitCommunication(void);
static void ShutdownPostgres(int code, Datum arg);
/*
- * FindMyDatabase -- get the critical info needed to locate my database
+ * GetDatabaseTuple -- fetch the pg_database row for a database
*
- * Find the named database in pg_database, return its database OID and the
- * OID of its default tablespace. Return TRUE if found, FALSE if not.
- *
- * Since we are not yet up and running as a backend, we cannot look directly
- * at pg_database (we can't obtain locks nor participate in transactions).
- * So to get the info we need before starting up, we must look at the "flat
- * file" copy of pg_database that is helpfully maintained by flatfiles.c.
- * This is subject to various race conditions, so after we have the
- * transaction infrastructure started, we have to recheck the information;
- * see InitPostgres.
+ * This is used during backend startup when we don't yet have any access to
+ * system catalogs in general. In the worst case, we can seqscan pg_database
+ * using nothing but the hard-wired descriptor that relcache.c creates for
+ * pg_database. In more typical cases, relcache.c was able to load
+ * descriptors for both pg_database and its indexes from the shared relcache
+ * cache file, and so we can do an indexscan. criticalSharedRelcachesBuilt
+ * tells whether we got the cached descriptors.
*/
-static bool
-FindMyDatabase(const char *name, Oid *db_id, Oid *db_tablespace)
+static HeapTuple
+GetDatabaseTuple(const char *dbname)
{
- bool result = false;
- char *filename;
- FILE *db_file;
- char thisname[NAMEDATALEN];
- TransactionId db_frozenxid;
-
- filename = database_getflatfilename();
- db_file = AllocateFile(filename, "r");
- if (db_file == NULL)
- ereport(FATAL,
- (errcode_for_file_access(),
- errmsg("could not open file \"%s\": %m", filename)));
+ HeapTuple tuple;
+ Relation relation;
+ SysScanDesc scan;
+ ScanKeyData key[1];
- while (read_pg_database_line(db_file, thisname, db_id,
- db_tablespace, &db_frozenxid))
- {
- if (strcmp(thisname, name) == 0)
- {
- result = true;
- break;
- }
- }
+ /*
+ * form a scan key
+ */
+ ScanKeyInit(&key[0],
+ Anum_pg_database_datname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(dbname));
- FreeFile(db_file);
- pfree(filename);
+ /*
+ * Open pg_database and fetch a tuple. Force heap scan if we haven't yet
+ * built the critical shared relcache entries (i.e., we're starting up
+ * without a shared relcache cache file).
+ */
+ relation = heap_open(DatabaseRelationId, AccessShareLock);
+ scan = systable_beginscan(relation, DatabaseNameIndexId,
+ criticalSharedRelcachesBuilt,
+ SnapshotNow,
+ 1, key);
- return result;
+ tuple = systable_getnext(scan);
+
+ /* Must copy tuple before releasing buffer */
+ if (HeapTupleIsValid(tuple))
+ tuple = heap_copytuple(tuple);
+
+ /* all done */
+ systable_endscan(scan);
+ heap_close(relation, AccessShareLock);
+
+ return tuple;
}
/*
- * FindMyDatabaseByOid
- *
- * As above, but the actual database Id is known. Return its name and the
- * tablespace OID. Return TRUE if found, FALSE if not. The same restrictions
- * as FindMyDatabase apply.
+ * GetDatabaseTupleByOid -- as above, but search by database OID
*/
-static bool
-FindMyDatabaseByOid(Oid dbid, char *dbname, Oid *db_tablespace)
+static HeapTuple
+GetDatabaseTupleByOid(Oid dboid)
{
- bool result = false;
- char *filename;
- FILE *db_file;
- Oid db_id;
- char thisname[NAMEDATALEN];
- TransactionId db_frozenxid;
-
- filename = database_getflatfilename();
- db_file = AllocateFile(filename, "r");
- if (db_file == NULL)
- ereport(FATAL,
- (errcode_for_file_access(),
- errmsg("could not open file \"%s\": %m", filename)));
+ HeapTuple tuple;
+ Relation relation;
+ SysScanDesc scan;
+ ScanKeyData key[1];
- while (read_pg_database_line(db_file, thisname, &db_id,
- db_tablespace, &db_frozenxid))
- {
- if (dbid == db_id)
- {
- result = true;
- strlcpy(dbname, thisname, NAMEDATALEN);
- break;
- }
- }
+ /*
+ * form a scan key
+ */
+ ScanKeyInit(&key[0],
+ ObjectIdAttributeNumber,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(dboid));
+
+ /*
+ * Open pg_database and fetch a tuple. Force heap scan if we haven't yet
+ * built the critical shared relcache entries (i.e., we're starting up
+ * without a shared relcache cache file).
+ */
+ relation = heap_open(DatabaseRelationId, AccessShareLock);
+ scan = systable_beginscan(relation, DatabaseOidIndexId,
+ criticalSharedRelcachesBuilt,
+ SnapshotNow,
+ 1, key);
- FreeFile(db_file);
- pfree(filename);
+ tuple = systable_getnext(scan);
- return result;
+ /* Must copy tuple before releasing buffer */
+ if (HeapTupleIsValid(tuple))
+ tuple = heap_copytuple(tuple);
+
+ /* all done */
+ systable_endscan(scan);
+ heap_close(relation, AccessShareLock);
+
+ return tuple;
}
char *collate;
char *ctype;
- /* Fetch our real pg_database row */
+ /* Fetch our pg_database row normally, via syscache */
tup = SearchSysCache(DATABASEOID,
ObjectIdGetDatum(MyDatabaseId),
0, 0, 0);
* Initialize POSTGRES.
*
* The database can be specified by name, using the in_dbname parameter, or by
- * OID, using the dboid parameter. In the latter case, the computed database
- * name is passed out to the caller as a palloc'ed string in out_dbname.
+ * OID, using the dboid parameter. In the latter case, the actual database
+ * name can be returned to the caller in out_dbname. If out_dbname isn't
+ * NULL, it must point to a buffer of size NAMEDATALEN.
*
* In bootstrap mode no parameters are used.
*
* the startup transaction rather than doing a separate one in postgres.c.)
*
* As of PostgreSQL 8.2, we expect InitProcess() was already called, so we
- * already have a PGPROC struct ... but it's not filled in yet.
+ * already have a PGPROC struct ... but it's not completely filled in yet.
*
* Note:
* Be very careful with the order of calls in the InitPostgres function.
*/
bool
InitPostgres(const char *in_dbname, Oid dboid, const char *username,
- char **out_dbname)
+ char *out_dbname)
{
bool bootstrap = IsBootstrapProcessingMode();
bool autovacuum = IsAutoVacuumWorkerProcess();
char dbname[NAMEDATALEN];
/*
- * Set up the global variables holding database id and path. But note we
- * won't actually try to touch the database just yet.
- *
- * We take a shortcut in the bootstrap case, otherwise we have to look up
- * the db name in pg_database.
- */
- if (bootstrap)
- {
- MyDatabaseId = TemplateDbOid;
- MyDatabaseTableSpace = DEFAULTTABLESPACE_OID;
- }
- else
- {
- /*
- * Find tablespace of the database we're about to open. Since we're
- * not yet up and running we have to use one of the hackish
- * FindMyDatabase variants, which look in the flat-file copy of
- * pg_database.
- *
- * If the in_dbname param is NULL, lookup database by OID.
- */
- if (in_dbname == NULL)
- {
- if (!FindMyDatabaseByOid(dboid, dbname, &MyDatabaseTableSpace))
- ereport(FATAL,
- (errcode(ERRCODE_UNDEFINED_DATABASE),
- errmsg("database %u does not exist", dboid)));
- MyDatabaseId = dboid;
- /* pass the database name to the caller */
- *out_dbname = pstrdup(dbname);
- }
- else
- {
- if (!FindMyDatabase(in_dbname, &MyDatabaseId, &MyDatabaseTableSpace))
- ereport(FATAL,
- (errcode(ERRCODE_UNDEFINED_DATABASE),
- errmsg("database \"%s\" does not exist",
- in_dbname)));
- /* our database name is gotten from the caller */
- strlcpy(dbname, in_dbname, NAMEDATALEN);
- }
- }
-
- fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace);
-
- SetDatabasePath(fullpath);
-
- /*
- * Finish filling in the PGPROC struct, and add it to the ProcArray. (We
- * need to know MyDatabaseId before we can do this, since it's entered
- * into the PGPROC struct.)
+ * Add my PGPROC struct to the ProcArray.
*
* Once I have done this, I am visible to other backends!
*/
}
/*
- * Now that we have a transaction, we can take locks. Take a writer's
- * lock on the database we are trying to connect to. If there is a
- * concurrently running DROP DATABASE on that database, this will block us
- * until it finishes (and has updated the flat file copy of pg_database).
+ * Load relcache entries for the shared system catalogs. This must
+ * create at least an entry for pg_database.
+ */
+ RelationCacheInitializePhase2();
+
+ /*
+ * Set up the global variables holding database id and default tablespace.
+ * But note we won't actually try to touch the database just yet.
+ *
+ * We take a shortcut in the bootstrap case, otherwise we have to look up
+ * the db's entry in pg_database.
+ */
+ if (bootstrap)
+ {
+ MyDatabaseId = TemplateDbOid;
+ MyDatabaseTableSpace = DEFAULTTABLESPACE_OID;
+ }
+ else if (in_dbname != NULL)
+ {
+ HeapTuple tuple;
+ Form_pg_database dbform;
+
+ tuple = GetDatabaseTuple(in_dbname);
+ if (!HeapTupleIsValid(tuple))
+ ereport(FATAL,
+ (errcode(ERRCODE_UNDEFINED_DATABASE),
+ errmsg("database \"%s\" does not exist", in_dbname)));
+ dbform = (Form_pg_database) GETSTRUCT(tuple);
+ MyDatabaseId = HeapTupleGetOid(tuple);
+ MyDatabaseTableSpace = dbform->dattablespace;
+ /* take database name from the caller, just for paranoia */
+ strlcpy(dbname, in_dbname, sizeof(dbname));
+ }
+ else
+ {
+ /* caller specified database by OID */
+ HeapTuple tuple;
+ Form_pg_database dbform;
+
+ tuple = GetDatabaseTupleByOid(dboid);
+ if (!HeapTupleIsValid(tuple))
+ ereport(FATAL,
+ (errcode(ERRCODE_UNDEFINED_DATABASE),
+ errmsg("database %u does not exist", dboid)));
+ dbform = (Form_pg_database) GETSTRUCT(tuple);
+ MyDatabaseId = HeapTupleGetOid(tuple);
+ MyDatabaseTableSpace = dbform->dattablespace;
+ Assert(MyDatabaseId == dboid);
+ strlcpy(dbname, NameStr(dbform->datname), sizeof(dbname));
+ /* pass the database name back to the caller */
+ if (out_dbname)
+ strcpy(out_dbname, dbname);
+ }
+
+ /* Now we can mark our PGPROC entry with the database ID */
+ /* (We assume this is an atomic store so no lock is needed) */
+ MyProc->databaseId = MyDatabaseId;
+
+ /*
+ * Now, take a writer's lock on the database we are trying to connect to.
+ * If there is a concurrently running DROP DATABASE on that database,
+ * this will block us until it finishes (and has committed its update of
+ * pg_database).
*
* Note that the lock is not held long, only until the end of this startup
* transaction. This is OK since we are already advertising our use of
RowExclusiveLock);
/*
- * Recheck the flat file copy of pg_database to make sure the target
- * database hasn't gone away. If there was a concurrent DROP DATABASE,
- * this ensures we will die cleanly without creating a mess.
+ * Recheck pg_database to make sure the target database hasn't gone away.
+ * If there was a concurrent DROP DATABASE, this ensures we will die
+ * cleanly without creating a mess.
*/
if (!bootstrap)
{
- Oid dbid2;
- Oid tsid2;
+ HeapTuple tuple;
- if (!FindMyDatabase(dbname, &dbid2, &tsid2) ||
- dbid2 != MyDatabaseId || tsid2 != MyDatabaseTableSpace)
+ tuple = GetDatabaseTuple(dbname);
+ if (!HeapTupleIsValid(tuple) ||
+ MyDatabaseId != HeapTupleGetOid(tuple) ||
+ MyDatabaseTableSpace != ((Form_pg_database) GETSTRUCT(tuple))->dattablespace)
ereport(FATAL,
(errcode(ERRCODE_UNDEFINED_DATABASE),
- errmsg("database \"%s\" does not exist",
- dbname),
+ errmsg("database \"%s\" does not exist", dbname),
errdetail("It seems to have just been dropped or renamed.")));
}
* Now we should be able to access the database directory safely. Verify
* it's there and looks reasonable.
*/
+ fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace);
+
if (!bootstrap)
{
if (access(fullpath, F_OK) == -1)
ValidatePgVersion(fullpath);
}
+ SetDatabasePath(fullpath);
+
/*
* It's now possible to do real access to the system catalogs.
*
* Load relcache entries for the system catalogs. This must create at
* least the minimum set of "nailed-in" cache entries.
*/
- RelationCacheInitializePhase2();
+ RelationCacheInitializePhase3();
/*
* Figure out our postgres user id, and see if we are a superuser.
initialize_acl();
/*
- * Read the real pg_database row for our database, check permissions and
+ * Re-read the pg_database row for our database, check permissions and
* set up database-specific GUC settings. We can't do this until all the
* database-access infrastructure is up. (Also, it wants to know if the
* user is a superuser, so the above stuff has to happen first.)
DATA(insert ( 1259 cmax 29 0 0 4 -6 0 -1 -1 t p i t f f t 0 _null_));
DATA(insert ( 1259 tableoid 26 0 0 4 -7 0 -1 -1 t p i t f f t 0 _null_));
+/* ----------------
+ * pg_database
+ *
+ * pg_database is not bootstrapped in the same way as the other relations that
+ * have hardwired pg_attribute entries in this file. However, we do need
+ * a "Schema_xxx" macro for it --- see relcache.c.
+ * ----------------
+ */
+#define Schema_pg_database \
+{ 1262, {"datname"}, 19, -1, 0, NAMEDATALEN, 1, 0, -1, -1, false, 'p', 'c', true, false, false, true, 0, { 0 } }, \
+{ 1262, {"datdba"}, 26, -1, 0, 4, 2, 0, -1, -1, true, 'p', 'i', true, false, false, true, 0, { 0 } }, \
+{ 1262, {"encoding"}, 23, -1, 0, 4, 3, 0, -1, -1, true, 'p', 'i', true, false, false, true, 0, { 0 } }, \
+{ 1262, {"datcollate"}, 19, -1, 0, NAMEDATALEN, 4, 0, -1, -1, false, 'p', 'c', true, false, false, true, 0, { 0 } }, \
+{ 1262, {"datctype"}, 19, -1, 0, NAMEDATALEN, 5, 0, -1, -1, false, 'p', 'c', true, false, false, true, 0, { 0 } }, \
+{ 1262, {"datistemplate"}, 16, -1, 0, 1, 6, 0, -1, -1, true, 'p', 'c', true, false, false, true, 0, { 0 } }, \
+{ 1262, {"datallowconn"}, 16, -1, 0, 1, 7, 0, -1, -1, true, 'p', 'c', true, false, false, true, 0, { 0 } }, \
+{ 1262, {"datconnlimit"}, 23, -1, 0, 4, 8, 0, -1, -1, true, 'p', 'i', true, false, false, true, 0, { 0 } }, \
+{ 1262, {"datlastsysoid"}, 26, -1, 0, 4, 9, 0, -1, -1, true, 'p', 'i', true, false, false, true, 0, { 0 } }, \
+{ 1262, {"datfrozenxid"}, 28, -1, 0, 4, 10, 0, -1, -1, true, 'p', 'i', true, false, false, true, 0, { 0 } }, \
+{ 1262, {"dattablespace"}, 26, -1, 0, 4, 11, 0, -1, -1, true, 'p', 'i', true, false, false, true, 0, { 0 } }, \
+{ 1262, {"datconfig"}, 1009, -1, 0, -1, 12, 1, -1, -1, false, 'x', 'i', false, false, false, true, 0, { 0 } }, \
+{ 1262, {"datacl"}, 1034, -1, 0, -1, 13, 1, -1, -1, false, 'x', 'i', false, false, false, true, 0, { 0 } }
+
/* ----------------
* pg_index
*