代码:
根据opername从pg_operator获取记录,并放到SysCache中
cpp
根据opername进行查询
catlist = SearchSysCacheList1(OPERNAMENSP, CStringGetDatum(opername));
/*
* SearchCatCacheList
*
* Generate a list of all tuples matching a partial key (that is,
* a key specifying just the first K of the cache's N key columns).
*
* It doesn't make any sense to specify all of the cache's key columns
* here: since the key is unique, there could be at most one match, so
* you ought to use SearchCatCache() instead. Hence this function takes
* one fewer Datum argument than SearchCatCache() does.
*
* The caller must not modify the list object or the pointed-to tuples,
* and must call ReleaseCatCacheList() when done with the list.
*/
CatCList *
SearchCatCacheList(CatCache *cache,
int nkeys,
Datum v1,
Datum v2,
Datum v3)
{
Datum v4 = 0; /* dummy last-column value */
Datum arguments[CATCACHE_MAXKEYS];
uint32 lHashValue;
Index lHashIndex;
dlist_iter iter;
dlist_head *lbucket;
CatCList *cl;
CatCTup *ct;
List *volatile ctlist;
ListCell *ctlist_item;
int nmembers;
bool ordered;
HeapTuple ntp;
MemoryContext oldcxt;
int i;
CatCInProgress *save_in_progress;
CatCInProgress in_progress_ent;
/*
* one-time startup overhead for each cache
*/
//如果tupedesc为空,先初始化后,先初始化,见代码2
if (unlikely(cache->cc_tupdesc == NULL))
CatalogCacheInitializeCache(cache);
Assert(nkeys > 0 && nkeys < cache->cc_nkeys);
#ifdef CATCACHE_STATS
cache->cc_lsearches++;
#endif
/* Initialize local parameter array */
arguments[0] = v1;
arguments[1] = v2;
arguments[2] = v3;
arguments[3] = v4;
/*
* If we haven't previously done a list search in this cache, create the
* bucket header array; otherwise, consider whether it's time to enlarge
* it.
*/
if (cache->cc_lbucket == NULL)
{
/* Arbitrary initial size --- must be a power of 2 */
int nbuckets = 16;
cache->cc_lbucket = (dlist_head *)
MemoryContextAllocZero(CacheMemoryContext,
nbuckets * sizeof(dlist_head));
/* Don't set cc_nlbuckets if we get OOM allocating cc_lbucket */
cache->cc_nlbuckets = nbuckets;
}
else
{
/*
* If the hash table has become too full, enlarge the buckets array.
* Quite arbitrarily, we enlarge when fill factor > 2.
*/
if (cache->cc_nlist > cache->cc_nlbuckets * 2)
RehashCatCacheLists(cache);
}
/*
* Find the hash bucket in which to look for the CatCList.
*/
lHashValue = CatalogCacheComputeHashValue(cache, nkeys, v1, v2, v3, v4);
lHashIndex = HASH_INDEX(lHashValue, cache->cc_nlbuckets);
/*
* scan the items until we find a match or exhaust our list
*
* Note: it's okay to use dlist_foreach here, even though we modify the
* dlist within the loop, because we don't continue the loop afterwards.
*/
lbucket = &cache->cc_lbucket[lHashIndex];
dlist_foreach(iter, lbucket)
{
cl = dlist_container(CatCList, cache_elem, iter.cur);
if (cl->dead)
continue; /* ignore dead entries */
if (cl->hash_value != lHashValue)
continue; /* quickly skip entry if wrong hash val */
/*
* see if the cached list matches our key.
*/
if (cl->nkeys != nkeys)
continue;
if (!CatalogCacheCompareTuple(cache, nkeys, cl->keys, arguments))
continue;
/*
* We found a matching list. Move the list to the front of the list
* for its hashbucket, so as to speed subsequent searches. (We do not
* move the members to the fronts of their hashbucket lists, however,
* since there's no point in that unless they are searched for
* individually.)
*/
dlist_move_head(lbucket, &cl->cache_elem);
/* Bump the list's refcount and return it */
ResourceOwnerEnlarge(CurrentResourceOwner);
cl->refcount++;
ResourceOwnerRememberCatCacheListRef(CurrentResourceOwner, cl);
CACHE_elog(DEBUG2, "SearchCatCacheList(%s): found list",
cache->cc_relname);
#ifdef CATCACHE_STATS
cache->cc_lhits++;
#endif
return cl;
}
代码2 CatalogCacheInitializeCache
cpp
static void
CatalogCacheInitializeCache(CatCache *cache)
{
Relation relation;
MemoryContext oldcxt;
TupleDesc tupdesc;
int i;
CatalogCacheInitializeCache_DEBUG1;
//打开表,见代码3
relation = table_open(cache->cc_reloid, AccessShareLock);
/*
* switch to the cache context so our allocations do not vanish at the end
* of a transaction
*/
Assert(CacheMemoryContext != NULL);
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
/*
* copy the relcache's tuple descriptor to permanent cache storage
*/
tupdesc = CreateTupleDescCopyConstr(RelationGetDescr(relation));
/*
* save the relation's name and relisshared flag, too (cc_relname is used
* only for debugging purposes)
*/
cache->cc_relname = pstrdup(RelationGetRelationName(relation));
cache->cc_relisshared = RelationGetForm(relation)->relisshared;
/*
* return to the caller's memory context and close the rel
*/
MemoryContextSwitchTo(oldcxt);
table_close(relation, AccessShareLock);
CACHE_elog(DEBUG2, "CatalogCacheInitializeCache: %s, %d keys",
cache->cc_relname, cache->cc_nkeys);
/*
* initialize cache's key information
*/
for (i = 0; i < cache->cc_nkeys; ++i)
{
Oid keytype;
RegProcedure eqfunc;
CatalogCacheInitializeCache_DEBUG2;
if (cache->cc_keyno[i] > 0)
{
Form_pg_attribute attr = TupleDescAttr(tupdesc,
cache->cc_keyno[i] - 1);
keytype = attr->atttypid;
/* cache key columns should always be NOT NULL */
Assert(attr->attnotnull);
}
else
{
if (cache->cc_keyno[i] < 0)
elog(FATAL, "sys attributes are not supported in caches");
keytype = OIDOID;
}
GetCCHashEqFuncs(keytype,
&cache->cc_hashfunc[i],
&eqfunc,
&cache->cc_fastequal[i]);
/*
* Do equality-function lookup (we assume this won't need a catalog
* lookup for any supported type)
*/
fmgr_info_cxt(eqfunc,
&cache->cc_skey[i].sk_func,
CacheMemoryContext);
/* Initialize sk_attno suitably for HeapKeyTest() and heap scans */
cache->cc_skey[i].sk_attno = cache->cc_keyno[i];
/* Fill in sk_strategy as well --- always standard equality */
cache->cc_skey[i].sk_strategy = BTEqualStrategyNumber;
cache->cc_skey[i].sk_subtype = InvalidOid;
/* If a catcache key requires a collation, it must be C collation */
cache->cc_skey[i].sk_collation = C_COLLATION_OID;
CACHE_elog(DEBUG2, "CatalogCacheInitializeCache %s %d %p",
cache->cc_relname, i, cache);
}
/*
* mark this cache fully initialized
*/
cache->cc_tupdesc = tupdesc;
}
代码3 table_open
cpp
/* ----------------
* table_open - open a table relation by relation OID
*
* This is essentially relation_open plus check that the relation
* is not an index nor a composite type. (The caller should also
* check that it's not a view or foreign table before assuming it has
* storage.)
* ----------------
*/
Relation
table_open(Oid relationId, LOCKMODE lockmode)
{
Relation r;
r = relation_open(relationId, lockmode);
validate_relation_kind(r);
return r;
}
/* ----------------
* relation_open - open any relation by relation OID
*
* If lockmode is not "NoLock", the specified kind of lock is
* obtained on the relation. (Generally, NoLock should only be
* used if the caller knows it has some appropriate lock on the
* relation already.)
*
* An error is raised if the relation does not exist.
*
* NB: a "relation" is anything with a pg_class entry. The caller is
* expected to check whether the relkind is something it can handle.
* ----------------
*/
Relation
relation_open(Oid relationId, LOCKMODE lockmode)
{
Relation r;
Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
/* Get the lock before trying to open the relcache entry */
//先锁表
if (lockmode != NoLock)
LockRelationOid(relationId, lockmode);
/* The relcache does all the real work... */
//根据relationId到RelCache中查询,如果relcache没有,则新建并加入
// 具体代码可以看代码4
r = RelationIdGetRelation(relationId);
if (!RelationIsValid(r))
elog(ERROR, "could not open relation with OID %u", relationId);
/*
* If we didn't get the lock ourselves, assert that caller holds one,
* except in bootstrap mode where no locks are used.
*/
Assert(lockmode != NoLock ||
IsBootstrapProcessingMode() ||
CheckRelationLockedByMe(r, AccessShareLock, true));
/* Make note that we've accessed a temporary relation */
if (RelationUsesLocalBuffers(r))
MyXactFlags |= XACT_FLAGS_ACCESSEDTEMPNAMESPACE;
pgstat_init_relation(r);
return r;
}
代码4:RelationIdGetRelation
cpp
/*
* hash_search -- look up key in table and perform action
* hash_search_with_hash_value -- same, with key's hash value already computed
*
* action is one of:
* HASH_FIND: look up key in table
* HASH_ENTER: look up key in table, creating entry if not present
* HASH_ENTER_NULL: same, but return NULL if out of memory
* HASH_REMOVE: look up key in table, remove entry if present
*
* Return value is a pointer to the element found/entered/removed if any,
* or NULL if no match was found. (NB: in the case of the REMOVE action,
* the result is a dangling pointer that shouldn't be dereferenced!)
*
* HASH_ENTER will normally ereport a generic "out of memory" error if
* it is unable to create a new entry. The HASH_ENTER_NULL operation is
* the same except it will return NULL if out of memory.
*
* If foundPtr isn't NULL, then *foundPtr is set true if we found an
* existing entry in the table, false otherwise. This is needed in the
* HASH_ENTER case, but is redundant with the return value otherwise.
*
* For hash_search_with_hash_value, the hashvalue parameter must have been
* calculated with get_hash_value().
*/
void *
hash_search(HTAB *hashp, //RelationIdCache 的指针
const void *keyPtr, //relationId的栈指针
HASHACTION action,
bool *foundPtr)
{
return hash_search_with_hash_value(hashp,
keyPtr,
hashp->hash(keyPtr, hashp->keysize),
action,
foundPtr);
}
#define IS_PARTITIONED(hctl) ((hctl)->num_partitions != 0)
#define FREELIST_IDX(hctl, hashcode) \
(IS_PARTITIONED(hctl) ? (hashcode) % NUM_FREELISTS : 0)
/*
* Do initial lookup of a bucket for the given hash value, retrieving its
* bucket number and its hash bucket.
*/
static inline uint32
hash_initial_lookup(HTAB *hashp, uint32 hashvalue, HASHBUCKET **bucketptr)
{
HASHHDR *hctl = hashp->hctl;
HASHSEGMENT segp;
long segment_num;
long segment_ndx;
uint32 bucket;
//PG的relcache使用的是二级指针,只要是为了效率考虑,获取过程类似从windows某个盘符的某个文件夹中寻找某个文件
//获取桶的指针,可以看作windiws的某个盘符
bucket = calc_bucket(hctl, hashvalue);
//获取目录的索引号,分成256个目录
segment_num = bucket >> hashp->sshift;
//获取具体的存储指针
segment_ndx = MOD(bucket, hashp->ssize);
//获取目录指针
segp = hashp->dir[segment_num];
if (segp == NULL)
hash_corrupted(hashp);
//获取具体桶的指针,一个桶下面可能存储一个指针列表
*bucketptr = &segp[segment_ndx];
return bucket;
}
void *
hash_search_with_hash_value(HTAB *hashp,
const void *keyPtr,
uint32 hashvalue,
HASHACTION action,
bool *foundPtr)
{
HASHHDR *hctl = hashp->hctl;
//根据hash值获取freelist的id
int freelist_idx = FREELIST_IDX(hctl, hashvalue);
Size keysize;
HASHBUCKET currBucket;
HASHBUCKET *prevBucketPtr;
HashCompareFunc match;
#ifdef HASH_STATISTICS
hash_accesses++;
hctl->accesses++;
#endif
/*
* If inserting, check if it is time to split a bucket.
*
* NOTE: failure to expand table is not a fatal error, it just means we
* have to run at higher fill factor than we wanted. However, if we're
* using the palloc allocator then it will throw error anyway on
* out-of-memory, so we must do this before modifying the table.
*/
if (action == HASH_ENTER || action == HASH_ENTER_NULL)
{
/*
* Can't split if running in partitioned mode, nor if frozen, nor if
* table is the subject of any active hash_seq_search scans.
*/
if (hctl->freeList[0].nentries > (long) hctl->max_bucket &&
!IS_PARTITIONED(hctl) && !hashp->frozen &&
!has_seq_scans(hashp))
(void) expand_table(hashp);
}
/*
* Do the initial lookup
*/
/*初始化查询,代码见上面*/
(void) hash_initial_lookup(hashp, hashvalue, &prevBucketPtr);
currBucket = *prevBucketPtr;
/*
* Follow collision chain looking for matching key
*/
match = hashp->match; /* save one fetch in inner loop */
keysize = hashp->keysize; /* ditto */
while (currBucket != NULL)
{
//判断其hash和值(relationid的地址是否相等)是否相等
if (currBucket->hashvalue == hashvalue &&
match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
break;
prevBucketPtr = &(currBucket->link);
currBucket = *prevBucketPtr;
#ifdef HASH_STATISTICS
hash_collisions++;
hctl->collisions++;
#endif
}
if (foundPtr)
*foundPtr = (bool) (currBucket != NULL);
/*
* OK, now what?
*/
switch (action)
{
case HASH_FIND:
if (currBucket != NULL)
return (void *) ELEMENTKEY(currBucket);
return NULL;
case HASH_REMOVE:
if (currBucket != NULL)
{
/* if partitioned, must lock to touch nentries and freeList */
if (IS_PARTITIONED(hctl))
SpinLockAcquire(&(hctl->freeList[freelist_idx].mutex));
/* delete the record from the appropriate nentries counter. */
Assert(hctl->freeList[freelist_idx].nentries > 0);
hctl->freeList[freelist_idx].nentries--;
/* remove record from hash bucket's chain. */
*prevBucketPtr = currBucket->link;
/* add the record to the appropriate freelist. */
currBucket->link = hctl->freeList[freelist_idx].freeList;
hctl->freeList[freelist_idx].freeList = currBucket;
if (IS_PARTITIONED(hctl))
SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
/*
* better hope the caller is synchronizing access to this
* element, because someone else is going to reuse it the next
* time something is added to the table
*/
return (void *) ELEMENTKEY(currBucket);
}
return NULL;
case HASH_ENTER:
case HASH_ENTER_NULL:
/* Return existing element if found, else create one */
if (currBucket != NULL)
return (void *) ELEMENTKEY(currBucket);
/* disallow inserts if frozen */
if (hashp->frozen)
elog(ERROR, "cannot insert into frozen hashtable \"%s\"",
hashp->tabname);
currBucket = get_hash_entry(hashp, freelist_idx);
if (currBucket == NULL)
{
/* out of memory */
if (action == HASH_ENTER_NULL)
return NULL;
/* report a generic message */
if (hashp->isshared)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory")));
else
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
/* link into hashbucket chain */
*prevBucketPtr = currBucket;
currBucket->link = NULL;
/* copy key into record */
currBucket->hashvalue = hashvalue;
hashp->keycopy(ELEMENTKEY(currBucket), keyPtr, keysize);
/*
* Caller is expected to fill the data field on return. DO NOT
* insert any code that could possibly throw error here, as doing
* so would leave the table entry incomplete and hence corrupt the
* caller's data structure.
*/
return (void *) ELEMENTKEY(currBucket);
}
elog(ERROR, "unrecognized hash action code: %d", (int) action);
return NULL; /* keep compiler quiet */
}
#define RelationIdCacheLookup(ID, RELATION) \
do { \
RelIdCacheEnt *hentry; \
hentry = (RelIdCacheEnt *) hash_search(RelationIdCache, \
&(ID), \
HASH_FIND, NULL); \
if (hentry) \
RELATION = hentry->reldesc; \
else \
RELATION = NULL; \
} while(0)
/* ----------------------------------------------------------------
* Relation Descriptor Lookup Interface
* ----------------------------------------------------------------
*/
/*
* RelationIdGetRelation
*
* Lookup a reldesc by OID; make one if not already in cache.
*
* Returns NULL if no pg_class row could be found for the given relid
* (suggesting we are trying to access a just-deleted relation).
* Any other error is reported via elog.
*
* NB: caller should already have at least AccessShareLock on the
* relation ID, else there are nasty race conditions.
*
* NB: relation ref count is incremented, or set to 1 if new entry.
* Caller should eventually decrement count. (Usually,
* that happens by calling RelationClose().)
*/
Relation
RelationIdGetRelation(Oid relationId)
{
Relation rd;
/* Make sure we're in an xact, even if this ends up being a cache hit */
Assert(IsTransactionState());
/*
* first try to find reldesc in the cache
*/
//先从relcache中查找
RelationIdCacheLookup(relationId, rd);
if (RelationIsValid(rd))
{
/* return NULL for dropped relations */
if (rd->rd_droppedSubid != InvalidSubTransactionId)
{
Assert(!rd->rd_isvalid);
return NULL;
}
RelationIncrementReferenceCount(rd);
/* revalidate cache entry if necessary */
if (!rd->rd_isvalid)
{
/*
* Indexes only have a limited number of possible schema changes,
* and we don't want to use the full-blown procedure because it's
* a headache for indexes that reload itself depends on.
*/
if (rd->rd_rel->relkind == RELKIND_INDEX ||
rd->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
RelationReloadIndexInfo(rd);
else
RelationClearRelation(rd, true);
/*
* Normally entries need to be valid here, but before the relcache
* has been initialized, not enough infrastructure exists to
* perform pg_class lookups. The structure of such entries doesn't
* change, but we still want to update the rd_rel entry. So
* rd_isvalid = false is left in place for a later lookup.
*/
Assert(rd->rd_isvalid ||
(rd->rd_isnailed && !criticalRelcachesBuilt));
}
return rd;
}
/*
* no reldesc in the cache, so have RelationBuildDesc() build one and add
* it.
*/
//relcache中找不到则新建一个,具体看代码5部分
rd = RelationBuildDesc(relationId, true);
if (RelationIsValid(rd))
RelationIncrementReferenceCount(rd);
return rd;
}
代码5: RelationBuildDesc
cpp
/*
* RelationBuildDesc
*
* Build a relation descriptor. The caller must hold at least
* AccessShareLock on the target relid.
*
* The new descriptor is inserted into the hash table if insertIt is true.
*
* Returns NULL if no pg_class row could be found for the given relid
* (suggesting we are trying to access a just-deleted relation).
* Any other error is reported via elog.
*/
static Relation
RelationBuildDesc(Oid targetRelId, bool insertIt)
{
int in_progress_offset;
Relation relation;
Oid relid;
HeapTuple pg_class_tuple;
Form_pg_class relp;
/*
* This function and its subroutines can allocate a good deal of transient
* data in CurrentMemoryContext. Traditionally we've just leaked that
* data, reasoning that the caller's context is at worst of transaction
* scope, and relcache loads shouldn't happen so often that it's essential
* to recover transient data before end of statement/transaction. However
* that's definitely not true when debug_discard_caches is active, and
* perhaps it's not true in other cases.
*
* When debug_discard_caches is active or when forced to by
* RECOVER_RELATION_BUILD_MEMORY=1, arrange to allocate the junk in a
* temporary context that we'll free before returning. Make it a child of
* caller's context so that it will get cleaned up appropriately if we
* error out partway through.
*/
#ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY
MemoryContext tmpcxt = NULL;
MemoryContext oldcxt = NULL;
if (RECOVER_RELATION_BUILD_MEMORY || debug_discard_caches > 0)
{
tmpcxt = AllocSetContextCreate(CurrentMemoryContext,
"RelationBuildDesc workspace",
ALLOCSET_DEFAULT_SIZES);
oldcxt = MemoryContextSwitchTo(tmpcxt);
}
#endif
/* Register to catch invalidation messages */
if (in_progress_list_len >= in_progress_list_maxlen)
{
int allocsize;
allocsize = in_progress_list_maxlen * 2;
in_progress_list = repalloc(in_progress_list,
allocsize * sizeof(*in_progress_list));
in_progress_list_maxlen = allocsize;
}
in_progress_offset = in_progress_list_len++;
in_progress_list[in_progress_offset].reloid = targetRelId;
retry:
in_progress_list[in_progress_offset].invalidated = false;
/*
* find the tuple in pg_class corresponding to the given relation id
*/
//从pg_class中查找relation的信息,类似
//select * from pg_class where oid=targetRelId
pg_class_tuple = ScanPgRelation(targetRelId, true, false);
/*
* if no such tuple exists, return NULL
*/
if (!HeapTupleIsValid(pg_class_tuple))
{
#ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY
if (tmpcxt)
{
/* Return to caller's context, and blow away the temporary context */
MemoryContextSwitchTo(oldcxt);
MemoryContextDelete(tmpcxt);
}
#endif
Assert(in_progress_offset + 1 == in_progress_list_len);
in_progress_list_len--;
return NULL;
}
/*
* get information from the pg_class_tuple
*/
relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
relid = relp->oid;
Assert(relid == targetRelId);
/*
* allocate storage for the relation descriptor, and copy pg_class_tuple
* to relation->rd_rel.
*/
relation = AllocateRelationDesc(relp);
/*
* initialize the relation's relation id (relation->rd_id)
*/
RelationGetRelid(relation) = relid;
/*
* Normal relations are not nailed into the cache. Since we don't flush
* new relations, it won't be new. It could be temp though.
*/
relation->rd_refcnt = 0;
relation->rd_isnailed = false;
relation->rd_createSubid = InvalidSubTransactionId;
relation->rd_newRelfilelocatorSubid = InvalidSubTransactionId;
relation->rd_firstRelfilelocatorSubid = InvalidSubTransactionId;
relation->rd_droppedSubid = InvalidSubTransactionId;
switch (relation->rd_rel->relpersistence)
{
//非log和永久堆表
case RELPERSISTENCE_UNLOGGED:
case RELPERSISTENCE_PERMANENT:
relation->rd_backend = INVALID_PROC_NUMBER;
relation->rd_islocaltemp = false;
break;
//临时表
case RELPERSISTENCE_TEMP:
//是否属于自己的
if (isTempOrTempToastNamespace(relation->rd_rel->relnamespace))
{
//临时表所属后台进程标识(并不是pid)
relation->rd_backend = ProcNumberForTempRelations();
relation->rd_islocaltemp = true;
}
else
{
/*
* If it's a temp table, but not one of ours, we have to use
* the slow, grotty method to figure out the owning backend.
*
* Note: it's possible that rd_backend gets set to
* MyProcNumber here, in case we are looking at a pg_class
* entry left over from a crashed backend that coincidentally
* had the same ProcNumber we're using. We should *not*
* consider such a table to be "ours"; this is why we need the
* separate rd_islocaltemp flag. The pg_class entry will get
* flushed if/when we clean out the corresponding temp table
* namespace in preparation for using it.
*/
relation->rd_backend =
GetTempNamespaceProcNumber(relation->rd_rel->relnamespace);
Assert(relation->rd_backend != INVALID_PROC_NUMBER);
relation->rd_islocaltemp = false;
}
break;
default:
elog(ERROR, "invalid relpersistence: %c",
relation->rd_rel->relpersistence);
break;
}
/*
* initialize the tuple descriptor (relation->rd_att).
*/
//填充AllocateRelationDesc函数中没有填充完的pg_attribute的信息
RelationBuildTupleDesc(relation);
/* foreign key data is not loaded till asked for */
relation->rd_fkeylist = NIL;
relation->rd_fkeyvalid = false;
/* partitioning data is not loaded till asked for */
relation->rd_partkey = NULL;
relation->rd_partkeycxt = NULL;
relation->rd_partdesc = NULL;
relation->rd_partdesc_nodetached = NULL;
relation->rd_partdesc_nodetached_xmin = InvalidTransactionId;
relation->rd_pdcxt = NULL;
relation->rd_pddcxt = NULL;
relation->rd_partcheck = NIL;
relation->rd_partcheckvalid = false;
relation->rd_partcheckcxt = NULL;
/*
* initialize access method information
*/
if (relation->rd_rel->relkind == RELKIND_INDEX ||
relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
RelationInitIndexAccessInfo(relation);
else if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind) ||
relation->rd_rel->relkind == RELKIND_SEQUENCE)
// 初始化TableAccessMethod,具体参考代码6
RelationInitTableAccessMethod(relation);
else if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
/*
* Do nothing: access methods are a setting that partitions can
* inherit.
*/
}
else
Assert(relation->rd_rel->relam == InvalidOid);
/* extract reloptions if any */
RelationParseRelOptions(relation, pg_class_tuple);
/*
* Fetch rules and triggers that affect this relation.
*
* Note that RelationBuildRuleLock() relies on this being done after
* extracting the relation's reloptions.
*/
if (relation->rd_rel->relhasrules)
RelationBuildRuleLock(relation);
else
{
relation->rd_rules = NULL;
relation->rd_rulescxt = NULL;
}
if (relation->rd_rel->relhastriggers)
RelationBuildTriggers(relation);
else
relation->trigdesc = NULL;
if (relation->rd_rel->relrowsecurity)
RelationBuildRowSecurity(relation);
else
relation->rd_rsdesc = NULL;
/*
* initialize the relation lock manager information
*/
RelationInitLockInfo(relation); /* see lmgr.c */
/*
* initialize physical addressing information for the relation
*/
RelationInitPhysicalAddr(relation);
/* make sure relation is marked as having no open file yet */
relation->rd_smgr = NULL;
/*
* now we can free the memory allocated for pg_class_tuple
*/
heap_freetuple(pg_class_tuple);
/*
* If an invalidation arrived mid-build, start over. Between here and the
* end of this function, don't add code that does or reasonably could read
* system catalogs. That range must be free from invalidation processing
* for the !insertIt case. For the insertIt case, RelationCacheInsert()
* will enroll this relation in ordinary relcache invalidation processing,
*/
if (in_progress_list[in_progress_offset].invalidated)
{
RelationDestroyRelation(relation, false);
goto retry;
}
Assert(in_progress_offset + 1 == in_progress_list_len);
in_progress_list_len--;
/*
* Insert newly created relation into relcache hash table, if requested.
*
* There is one scenario in which we might find a hashtable entry already
* present, even though our caller failed to find it: if the relation is a
* system catalog or index that's used during relcache load, we might have
* recursively created the same relcache entry during the preceding steps.
* So allow RelationCacheInsert to delete any already-present relcache
* entry for the same OID. The already-present entry should have refcount
* zero (else somebody forgot to close it); in the event that it doesn't,
* we'll elog a WARNING and leak the already-present entry.
*/
if (insertIt)
RelationCacheInsert(relation, true);
/* It's fully valid */
relation->rd_isvalid = true;
#ifdef MAYBE_RECOVER_RELATION_BUILD_MEMORY
if (tmpcxt)
{
/* Return to caller's context, and blow away the temporary context */
MemoryContextSwitchTo(oldcxt);
MemoryContextDelete(tmpcxt);
}
#endif
return relation;
}
/*
* AllocateRelationDesc
*
* This is used to allocate memory for a new relation descriptor
* and initialize the rd_rel field from the given pg_class tuple.
*/
static Relation
AllocateRelationDesc(Form_pg_class relp)
{
Relation relation;
MemoryContext oldcxt;
Form_pg_class relationForm;
/* Relcache entries must live in CacheMemoryContext */
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
/*
* allocate and zero space for new relation descriptor
*/
relation = (Relation) palloc0(sizeof(RelationData));
/* make sure relation is marked as having no open file yet */
relation->rd_smgr = NULL;
/*
* Copy the relation tuple form
*
* We only allocate space for the fixed fields, ie, CLASS_TUPLE_SIZE. The
* variable-length fields (relacl, reloptions) are NOT stored in the
* relcache --- there'd be little point in it, since we don't copy the
* tuple's nulls bitmap and hence wouldn't know if the values are valid.
* Bottom line is that relacl *cannot* be retrieved from the relcache. Get
* it from the syscache if you need it. The same goes for the original
* form of reloptions (however, we do store the parsed form of reloptions
* in rd_options).
*/
/* Size of fixed part of pg_class tuples, not counting var-length fields */
#define CLASS_TUPLE_SIZE \
(offsetof(FormData_pg_class,relminmxid) + sizeof(TransactionId))
relationForm = (Form_pg_class) palloc(CLASS_TUPLE_SIZE);
//复制一份
memcpy(relationForm, relp, CLASS_TUPLE_SIZE);
/* initialize relation tuple form */
//给rd_rel
relation->rd_rel = relationForm;
/* and allocate attribute tuple form storage */
relation->rd_att = CreateTemplateTupleDesc(relationForm->relnatts);
/* which we mark as a reference-counted tupdesc */
relation->rd_att->tdrefcount = 1;
MemoryContextSwitchTo(oldcxt);
return relation;
}
/*
* CreateTemplateTupleDesc
* This function allocates an empty tuple descriptor structure.
*
* Tuple type ID information is initially set for an anonymous record type;
* caller can overwrite this if needed.
*/
TupleDesc
CreateTemplateTupleDesc(int natts)
{
TupleDesc desc;
/*
* sanity checks
*/
Assert(natts >= 0);
/*
* Allocate enough memory for the tuple descriptor, including the
* attribute rows.
*
* Note: the attribute array stride is sizeof(FormData_pg_attribute),
* since we declare the array elements as FormData_pg_attribute for
* notational convenience. However, we only guarantee that the first
* ATTRIBUTE_FIXED_PART_SIZE bytes of each entry are valid; most code that
* copies tupdesc entries around copies just that much. In principle that
* could be less due to trailing padding, although with the current
* definition of pg_attribute there probably isn't any padding.
*/
/*
* This struct is passed around within the backend to describe the structure
* of tuples. For tuples coming from on-disk relations, the information is
* collected from the pg_attribute, pg_attrdef, and pg_constraint catalogs.
* Transient row types (such as the result of a join query) have anonymous
* TupleDesc structs that generally omit any constraint info; therefore the
* structure is designed to let the constraints be omitted efficiently.
*
* Note that only user attributes, not system attributes, are mentioned in
* TupleDesc.
*
* If the tupdesc is known to correspond to a named rowtype (such as a table's
* rowtype) then tdtypeid identifies that type and tdtypmod is -1. Otherwise
* tdtypeid is RECORDOID, and tdtypmod can be either -1 for a fully anonymous
* row type, or a value >= 0 to allow the rowtype to be looked up in the
* typcache.c type cache.
*
* Note that tdtypeid is never the OID of a domain over composite, even if
* we are dealing with values that are known (at some higher level) to be of
* a domain-over-composite type. This is because tdtypeid/tdtypmod need to
* match up with the type labeling of composite Datums, and those are never
* explicitly marked as being of a domain type, either.
*
* Tuple descriptors that live in caches (relcache or typcache, at present)
* are reference-counted: they can be deleted when their reference count goes
* to zero. Tuple descriptors created by the executor need no reference
* counting, however: they are simply created in the appropriate memory
* context and go away when the context is freed. We set the tdrefcount
* field of such a descriptor to -1, while reference-counted descriptors
* always have tdrefcount >= 0.
*/
typedef struct TupleDescData
{
int natts; /* number of attributes in the tuple */
Oid tdtypeid; /* composite type ID for tuple type */
int32 tdtypmod; /* typmod for tuple type */
int tdrefcount; /* reference count, or -1 if not counting */
TupleConstr *constr; /* constraints, or NULL if none */
/* attrs[N] is the description of Attribute Number N+1 */
FormData_pg_attribute attrs[FLEXIBLE_ARRAY_MEMBER];
} TupleDescData;
*/
desc = (TupleDesc) palloc(offsetof(struct TupleDescData, attrs) +
natts * sizeof(FormData_pg_attribute));
/*
* Initialize other fields of the tupdesc.
attrs并没有填充,还需要冲pg_attribute中获取后再填充
*/
desc->natts = natts;
desc->constr = NULL;
desc->tdtypeid = RECORDOID;
desc->tdtypmod = -1;
desc->tdrefcount = -1; /* assume not reference-counted */
return desc;
}
代码6:RelationInitTableAccessMethod
cpp
/*
* Initialize table access method support for a table like relation
*/
void
RelationInitTableAccessMethod(Relation relation)
{
HeapTuple tuple;
Form_pg_am aform;
if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
{
/*
* Sequences are currently accessed like heap tables, but it doesn't
* seem prudent to show that in the catalog. So just overwrite it
* here.
*/
Assert(relation->rd_rel->relam == InvalidOid);
relation->rd_amhandler = F_HEAP_TABLEAM_HANDLER;
}
else if (IsCatalogRelation(relation))
{
/*
* Avoid doing a syscache lookup for catalog tables.
*/
Assert(relation->rd_rel->relam == HEAP_TABLE_AM_OID);
//是否为系统表
relation->rd_amhandler = F_HEAP_TABLEAM_HANDLER;
}
else
{
/*
* Look up the table access method, save the OID of its handler
* function.
*/
Assert(relation->rd_rel->relam != InvalidOid);
//从pg_am中获取access method
// select * from pg_am where oid='poly100w'::regclass;
// relation->rd_rel->relam从 pg_class中获取
tuple = SearchSysCache1(AMOID,
ObjectIdGetDatum(relation->rd_rel->relam));
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for access method %u",
relation->rd_rel->relam);
aform = (Form_pg_am) GETSTRUCT(tuple);
relation->rd_amhandler = aform->amhandler;
ReleaseSysCache(tuple);
}
/*
* Now we can fetch the table AM's API struct
*/
InitTableAmRoutine(relation);
}
* Fill in the TableAmRoutine for a relation
*
* relation's rd_amhandler must be valid already.
*/
static void
InitTableAmRoutine(Relation relation)
{
relation->rd_tableam = GetTableAmRoutine(relation->rd_amhandler);
}
const TableAmRoutine *
GetTableAmRoutine(Oid amhandler)
{
Datum datum;
const TableAmRoutine *routine;
datum = OidFunctionCall0(amhandler);
routine = (TableAmRoutine *) DatumGetPointer(datum);
if (routine == NULL || !IsA(routine, TableAmRoutine))
elog(ERROR, "table access method handler %u did not return a TableAmRoutine struct",
amhandler);
/*
* Assert that all required callbacks are present. That makes it a bit
* easier to keep AMs up to date, e.g. when forward porting them to a new
* major version.
*/
Assert(routine->scan_begin != NULL);
Assert(routine->scan_end != NULL);
Assert(routine->scan_rescan != NULL);
Assert(routine->scan_getnextslot != NULL);
Assert(routine->parallelscan_estimate != NULL);
Assert(routine->parallelscan_initialize != NULL);
Assert(routine->parallelscan_reinitialize != NULL);
Assert(routine->index_fetch_begin != NULL);
Assert(routine->index_fetch_reset != NULL);
Assert(routine->index_fetch_end != NULL);
Assert(routine->index_fetch_tuple != NULL);
Assert(routine->tuple_fetch_row_version != NULL);
Assert(routine->tuple_tid_valid != NULL);
Assert(routine->tuple_get_latest_tid != NULL);
Assert(routine->tuple_satisfies_snapshot != NULL);
Assert(routine->index_delete_tuples != NULL);
Assert(routine->tuple_insert != NULL);
/*
* Could be made optional, but would require throwing error during
* parse-analysis.
*/
Assert(routine->tuple_insert_speculative != NULL);
Assert(routine->tuple_complete_speculative != NULL);
Assert(routine->multi_insert != NULL);
Assert(routine->tuple_delete != NULL);
Assert(routine->tuple_update != NULL);
Assert(routine->tuple_lock != NULL);
Assert(routine->relation_set_new_filelocator != NULL);
Assert(routine->relation_nontransactional_truncate != NULL);
Assert(routine->relation_copy_data != NULL);
Assert(routine->relation_copy_for_cluster != NULL);
Assert(routine->relation_vacuum != NULL);
Assert(routine->scan_analyze_next_block != NULL);
Assert(routine->scan_analyze_next_tuple != NULL);
Assert(routine->index_build_range_scan != NULL);
Assert(routine->index_validate_scan != NULL);
Assert(routine->relation_size != NULL);
Assert(routine->relation_needs_toast_table != NULL);
Assert(routine->relation_estimate_size != NULL);
/* optional, but one callback implies presence of the other */
Assert((routine->scan_bitmap_next_block == NULL) ==
(routine->scan_bitmap_next_tuple == NULL));
Assert(routine->scan_sample_next_block != NULL);
Assert(routine->scan_sample_next_tuple != NULL);
return routine;
}
Datum
heap_tableam_handler(PG_FUNCTION_ARGS)
{
PG_RETURN_POINTER(&heapam_methods);
}