• R/O
  • HTTP
  • SSH
  • HTTPS

pg_hint_plan: Commit

firtst release


Commit MetaInfo

Révision8edf1db866961ff432fe9b89ff708e38267b4505 (tree)
l'heure2016-01-15 14:12:14
AuteurKyotaro Horiguchi <horiguchi.kyotaro@lab....>
CommiterKyotaro Horiguchi

Message de Log

Support PostgreSQL 9.5.0.

This branch PG95 has been a bit too early so it needs an additional
merge from master branch to complete to support PostgreSQL 9.5.

Change Summary

Modification

--- a/COPYRIGHT.postgresql
+++ b/COPYRIGHT.postgresql
@@ -2,7 +2,7 @@ core.c and make_join_rel.c are parts of PostgreSQL Database Management System.
22 (formerly known as Postgres, then as Postgres95)
33 Copyright holders of those files are following organizations:
44
5-Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
5+Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
66
77 Portions Copyright (c) 1994, The Regents of the University of California
88
--- a/Makefile
+++ b/Makefile
@@ -1,11 +1,11 @@
11 #
22 # pg_hint_plan: Makefile
33 #
4-# Copyright (c) 2012-2014, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
4+# Copyright (c) 2012-2015, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
55 #
66
77 MODULES = pg_hint_plan
8-HINTPLANVER = 1.1.2
8+HINTPLANVER = 1.1.3
99
1010 REGRESS = init base_plan pg_hint_plan ut-init ut-A ut-S ut-J ut-L ut-G ut-R ut-fdw ut-fini
1111
@@ -14,7 +14,7 @@ REGRESSION_EXPECTED = expected/init.out expected/base_plan.out expected/pg_hint_
1414 REGRESS_OPTS = --encoding=UTF8
1515
1616 EXTENSION = pg_hint_plan
17-DATA = pg_hint_plan--1.1.2.sql
17+DATA = pg_hint_plan--1.1.3.sql
1818
1919 EXTRA_CLEAN = sql/ut-fdw.sql expected/ut-fdw.out
2020
@@ -23,8 +23,8 @@ PGXS := $(shell $(PG_CONFIG) --pgxs)
2323 include $(PGXS)
2424
2525 STARBALL = pg_dbms_stats-$(DBMSSTATSVER).tar.gz
26-STARBALL94 = pg_hint_plan94-$(HINTPLANVER).tar.gz
27-STARBALLS = $(STARBALL) $(STARBALL94)
26+STARBALL95 = pg_hint_plan95-$(HINTPLANVER).tar.gz
27+STARBALLS = $(STARBALL) $(STARBALL95)
2828
2929 TARSOURCES = Makefile *.c *.h \
3030 pg_hint_plan--*.sql \
@@ -34,7 +34,7 @@ TARSOURCES = Makefile *.c *.h \
3434
3535 installcheck: $(REGRESSION_EXPECTED)
3636
37-rpms: rpm94
37+rpms: rpm95
3838
3939 # pg_hint_plan.c includes core.c and make_join_rel.c
4040 pg_hint_plan.o: core.c make_join_rel.c # pg_stat_statements.c
@@ -49,7 +49,7 @@ $(STARBALLS): $(TARSOURCES)
4949 tar -chzf $@ $(addprefix $(subst .tar.gz,,$@)/, $^)
5050 rm $(subst .tar.gz,,$@)
5151
52-rpm94: $(STARBALL94)
53- MAKE_ROOT=`pwd` rpmbuild -bb SPECS/pg_hint_plan94.spec
52+rpm95: $(STARBALL95)
53+ MAKE_ROOT=`pwd` rpmbuild -bb SPECS/pg_hint_plan95.spec
5454
5555
--- a/SPECS/pg_hint_plan94.spec
+++ /dev/null
@@ -1,84 +0,0 @@
1-# SPEC file for pg_hint_plan
2-# Copyright(C) 2012-2014 NIPPON TELEGRAPH AND TELEPHONE CORPORATION
3-
4-%define _pgdir /usr/pgsql-9.4
5-%define _bindir %{_pgdir}/bin
6-%define _libdir %{_pgdir}/lib
7-%define _datadir %{_pgdir}/share
8-%if "%(echo ${MAKE_ROOT})" != ""
9- %define _rpmdir %(echo ${MAKE_ROOT})/RPMS
10- %define _sourcedir %(echo ${MAKE_ROOT})
11-%endif
12-
13-## Set general information for pg_hint_plan.
14-Summary: Optimizer hint for PostgreSQL 9.4
15-Name: pg_hint_plan94
16-Version: 1.1.2
17-Release: 1%{?dist}
18-License: BSD
19-Group: Applications/Databases
20-Source0: %{name}-%{version}.tar.gz
21-#URL: http://example.com/pg_hint_plan/
22-BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-%(%{__id_u} -n)
23-Vendor: NIPPON TELEGRAPH AND TELEPHONE CORPORATION
24-
25-## We use postgresql-devel package
26-BuildRequires: postgresql94-devel
27-Requires: postgresql94-libs
28-
29-## Description for "pg_hint_plan"
30-%description
31-pg_hint_plan provides capability to force arbitrary plan to PostgreSQL' planner
32-to optimize queries by hand directly.
33-
34-If you have query plan better than which PostgreSQL chooses, you can force your
35-plan by adding special comment block with optimizer hint before the query you
36-want to optimize. You can control scan method, join method, join order, and
37-planner-related GUC parameters during planning.
38-
39-Note that this package is available for only PostgreSQL 9.4.
40-
41-## pre work for build pg_hint_plan
42-%prep
43-PATH=/usr/pgsql-9.4/bin:$PATH
44-if [ "${MAKE_ROOT}" != "" ]; then
45- pushd ${MAKE_ROOT}
46- make clean %{name}-%{version}.tar.gz
47- popd
48-fi
49-if [ ! -d %{_rpmdir} ]; then mkdir -p %{_rpmdir}; fi
50-%setup -q
51-
52-## Set variables for build environment
53-%build
54-PATH=/usr/pgsql-9.4/bin:$PATH
55-make USE_PGXS=1 %{?_smp_mflags}
56-
57-## Set variables for install
58-%install
59-rm -rf %{buildroot}
60-install -d %{buildroot}%{_libdir}
61-install pg_hint_plan.so %{buildroot}%{_libdir}/pg_hint_plan.so
62-install -d %{buildroot}%{_datadir}/extension
63-install -m 644 pg_hint_plan--1.1.2.sql %{buildroot}%{_datadir}/extension/pg_hint_plan--1.1.2.sql
64-install -m 644 pg_hint_plan.control %{buildroot}%{_datadir}/extension/pg_hint_plan.control
65-
66-%clean
67-rm -rf %{buildroot}
68-
69-%files
70-%defattr(0755,root,root)
71-%{_libdir}/pg_hint_plan.so
72-%defattr(0644,root,root)
73-%{_datadir}/extension/pg_hint_plan--1.1.2.sql
74-%{_datadir}/extension/pg_hint_plan.control
75-
76-# History of pg_hint_plan.
77-%changelog
78-* Thu Dec 17 2014 Kyotaro Horiguchi
79-- Support 9.4. New rev 1.1.2.
80-* Mon Sep 02 2013 Takashi Suzuki
81-- Initial cut for 1.1.0
82-* Mon Sep 24 2012 Shigeru Hanada <shigeru.hanada@gmail.com>
83-- Initial cut for 1.0.0
84-
--- a/core.c
+++ b/core.c
@@ -20,7 +20,7 @@
2020 * mark_dummy_rel()
2121 * restriction_is_constant_false()
2222 *
23- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
23+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
2424 * Portions Copyright (c) 1994, Regents of the University of California
2525 *
2626 *-------------------------------------------------------------------------
@@ -215,9 +215,6 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
215215 add_path(rel, (Path *)
216216 create_append_path(rel, subpaths, required_outer));
217217 }
218-
219- /* Select cheapest paths */
220- set_cheapest(rel);
221218 }
222219
223220 /*
@@ -720,7 +717,7 @@ join_search_one_level(PlannerInfo *root, int level)
720717 */
721718 if (joinrels[level] == NIL &&
722719 root->join_info_list == NIL &&
723- root->lateral_info_list == NIL)
720+ !root->hasLateralRTEs)
724721 elog(ERROR, "failed to build any %d-way joins", level);
725722 }
726723 }
@@ -819,9 +816,7 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
819816 SpecialJoinInfo *match_sjinfo;
820817 bool reversed;
821818 bool unique_ified;
822- bool is_valid_inner;
823- bool lateral_fwd;
824- bool lateral_rev;
819+ bool must_be_leftjoin;
825820 ListCell *l;
826821
827822 /*
@@ -834,12 +829,12 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
834829 /*
835830 * If we have any special joins, the proposed join might be illegal; and
836831 * in any case we have to determine its join type. Scan the join info
837- * list for conflicts.
832+ * list for matches and conflicts.
838833 */
839834 match_sjinfo = NULL;
840835 reversed = false;
841836 unique_ified = false;
842- is_valid_inner = true;
837+ must_be_leftjoin = false;
843838
844839 foreach(l, root->join_info_list)
845840 {
@@ -890,7 +885,8 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
890885 * If one input contains min_lefthand and the other contains
891886 * min_righthand, then we can perform the SJ at this join.
892887 *
893- * Barf if we get matches to more than one SJ (is that possible?)
888+ * Reject if we get matches to more than one SJ; that implies we're
889+ * considering something that's not really valid.
894890 */
895891 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
896892 bms_is_subset(sjinfo->min_righthand, rel2->relids))
@@ -955,90 +951,168 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
955951 }
956952 else
957953 {
958- /*----------
959- * Otherwise, the proposed join overlaps the RHS but isn't
960- * a valid implementation of this SJ. It might still be
961- * a legal join, however. If both inputs overlap the RHS,
962- * assume that it's OK. Since the inputs presumably got past
963- * this function's checks previously, they can't overlap the
964- * LHS and their violations of the RHS boundary must represent
965- * SJs that have been determined to commute with this one.
966- * We have to allow this to work correctly in cases like
967- * (a LEFT JOIN (b JOIN (c LEFT JOIN d)))
968- * when the c/d join has been determined to commute with the join
969- * to a, and hence d is not part of min_righthand for the upper
970- * join. It should be legal to join b to c/d but this will appear
971- * as a violation of the upper join's RHS.
972- * Furthermore, if one input overlaps the RHS and the other does
973- * not, we should still allow the join if it is a valid
974- * implementation of some other SJ. We have to allow this to
975- * support the associative identity
976- * (a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab
977- * since joining B directly to C violates the lower SJ's RHS.
978- * We assume that make_outerjoininfo() set things up correctly
979- * so that we'll only match to some SJ if the join is valid.
980- * Set flag here to check at bottom of loop.
981- *----------
954+ /*
955+ * Otherwise, the proposed join overlaps the RHS but isn't a valid
956+ * implementation of this SJ. But don't panic quite yet: the RHS
957+ * violation might have occurred previously, in one or both input
958+ * relations, in which case we must have previously decided that
959+ * it was OK to commute some other SJ with this one. If we need
960+ * to perform this join to finish building up the RHS, rejecting
961+ * it could lead to not finding any plan at all. (This can occur
962+ * because of the heuristics elsewhere in this file that postpone
963+ * clauseless joins: we might not consider doing a clauseless join
964+ * within the RHS until after we've performed other, validly
965+ * commutable SJs with one or both sides of the clauseless join.)
966+ * This consideration boils down to the rule that if both inputs
967+ * overlap the RHS, we can allow the join --- they are either
968+ * fully within the RHS, or represent previously-allowed joins to
969+ * rels outside it.
982970 */
983- if (sjinfo->jointype != JOIN_SEMI &&
984- bms_overlap(rel1->relids, sjinfo->min_righthand) &&
971+ if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
985972 bms_overlap(rel2->relids, sjinfo->min_righthand))
986- {
987- /* seems OK */
988- Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
989- }
990- else
991- is_valid_inner = false;
973+ continue; /* assume valid previous violation of RHS */
974+
975+ /*
976+ * The proposed join could still be legal, but only if we're
977+ * allowed to associate it into the RHS of this SJ. That means
978+ * this SJ must be a LEFT join (not SEMI or ANTI, and certainly
979+ * not FULL) and the proposed join must not overlap the LHS.
980+ */
981+ if (sjinfo->jointype != JOIN_LEFT ||
982+ bms_overlap(joinrelids, sjinfo->min_lefthand))
983+ return false; /* invalid join path */
984+
985+ /*
986+ * To be valid, the proposed join must be a LEFT join; otherwise
987+ * it can't associate into this SJ's RHS. But we may not yet have
988+ * found the SpecialJoinInfo matching the proposed join, so we
989+ * can't test that yet. Remember the requirement for later.
990+ */
991+ must_be_leftjoin = true;
992992 }
993993 }
994994
995995 /*
996- * Fail if violated some SJ's RHS and didn't match to another SJ. However,
997- * "matching" to a semijoin we are implementing by unique-ification
998- * doesn't count (think: it's really an inner join).
996+ * Fail if violated any SJ's RHS and didn't match to a LEFT SJ: the
997+ * proposed join can't associate into an SJ's RHS.
998+ *
999+ * Also, fail if the proposed join's predicate isn't strict; we're
1000+ * essentially checking to see if we can apply outer-join identity 3, and
1001+ * that's a requirement. (This check may be redundant with checks in
1002+ * make_outerjoininfo, but I'm not quite sure, and it's cheap to test.)
9991003 */
1000- if (!is_valid_inner &&
1001- (match_sjinfo == NULL || unique_ified))
1004+ if (must_be_leftjoin &&
1005+ (match_sjinfo == NULL ||
1006+ match_sjinfo->jointype != JOIN_LEFT ||
1007+ !match_sjinfo->lhs_strict))
10021008 return false; /* invalid join path */
10031009
10041010 /*
10051011 * We also have to check for constraints imposed by LATERAL references.
1006- * The proposed rels could each contain lateral references to the other,
1007- * in which case the join is impossible. If there are lateral references
1008- * in just one direction, then the join has to be done with a nestloop
1009- * with the lateral referencer on the inside. If the join matches an SJ
1010- * that cannot be implemented by such a nestloop, the join is impossible.
10111012 */
1012- lateral_fwd = lateral_rev = false;
1013- foreach(l, root->lateral_info_list)
1013+ if (root->hasLateralRTEs)
10141014 {
1015- LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l);
1015+ bool lateral_fwd;
1016+ bool lateral_rev;
1017+ Relids join_lateral_rels;
10161018
1017- if (bms_is_subset(ljinfo->lateral_rhs, rel2->relids) &&
1018- bms_overlap(ljinfo->lateral_lhs, rel1->relids))
1019+ /*
1020+ * The proposed rels could each contain lateral references to the
1021+ * other, in which case the join is impossible. If there are lateral
1022+ * references in just one direction, then the join has to be done with
1023+ * a nestloop with the lateral referencer on the inside. If the join
1024+ * matches an SJ that cannot be implemented by such a nestloop, the
1025+ * join is impossible.
1026+ *
1027+ * Also, if the lateral reference is only indirect, we should reject
1028+ * the join; whatever rel(s) the reference chain goes through must be
1029+ * joined to first.
1030+ *
1031+ * Another case that might keep us from building a valid plan is the
1032+ * implementation restriction described by have_dangerous_phv().
1033+ */
1034+ lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids);
1035+ lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids);
1036+ if (lateral_fwd && lateral_rev)
1037+ return false; /* have lateral refs in both directions */
1038+ if (lateral_fwd)
10191039 {
10201040 /* has to be implemented as nestloop with rel1 on left */
1021- if (lateral_rev)
1022- return false; /* have lateral refs in both directions */
1023- lateral_fwd = true;
1024- if (!bms_is_subset(ljinfo->lateral_lhs, rel1->relids))
1025- return false; /* rel1 can't compute the required parameter */
10261041 if (match_sjinfo &&
1027- (reversed || match_sjinfo->jointype == JOIN_FULL))
1042+ (reversed ||
1043+ unique_ified ||
1044+ match_sjinfo->jointype == JOIN_FULL))
10281045 return false; /* not implementable as nestloop */
1046+ /* check there is a direct reference from rel2 to rel1 */
1047+ if (!bms_overlap(rel1->relids, rel2->direct_lateral_relids))
1048+ return false; /* only indirect refs, so reject */
1049+ /* check we won't have a dangerous PHV */
1050+ if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids))
1051+ return false; /* might be unable to handle required PHV */
10291052 }
1030- if (bms_is_subset(ljinfo->lateral_rhs, rel1->relids) &&
1031- bms_overlap(ljinfo->lateral_lhs, rel2->relids))
1053+ else if (lateral_rev)
10321054 {
10331055 /* has to be implemented as nestloop with rel2 on left */
1034- if (lateral_fwd)
1035- return false; /* have lateral refs in both directions */
1036- lateral_rev = true;
1037- if (!bms_is_subset(ljinfo->lateral_lhs, rel2->relids))
1038- return false; /* rel2 can't compute the required parameter */
10391056 if (match_sjinfo &&
1040- (!reversed || match_sjinfo->jointype == JOIN_FULL))
1057+ (!reversed ||
1058+ unique_ified ||
1059+ match_sjinfo->jointype == JOIN_FULL))
10411060 return false; /* not implementable as nestloop */
1061+ /* check there is a direct reference from rel1 to rel2 */
1062+ if (!bms_overlap(rel2->relids, rel1->direct_lateral_relids))
1063+ return false; /* only indirect refs, so reject */
1064+ /* check we won't have a dangerous PHV */
1065+ if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids))
1066+ return false; /* might be unable to handle required PHV */
1067+ }
1068+
1069+ /*
1070+ * LATERAL references could also cause problems later on if we accept
1071+ * this join: if the join's minimum parameterization includes any rels
1072+ * that would have to be on the inside of an outer join with this join
1073+ * rel, then it's never going to be possible to build the complete
1074+ * query using this join. We should reject this join not only because
1075+ * it'll save work, but because if we don't, the clauseless-join
1076+ * heuristics might think that legality of this join means that some
1077+ * other join rel need not be formed, and that could lead to failure
1078+ * to find any plan at all. We have to consider not only rels that
1079+ * are directly on the inner side of an OJ with the joinrel, but also
1080+ * ones that are indirectly so, so search to find all such rels.
1081+ */
1082+ join_lateral_rels = min_join_parameterization(root, joinrelids,
1083+ rel1, rel2);
1084+ if (join_lateral_rels)
1085+ {
1086+ Relids join_plus_rhs = bms_copy(joinrelids);
1087+ bool more;
1088+
1089+ do
1090+ {
1091+ more = false;
1092+ foreach(l, root->join_info_list)
1093+ {
1094+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
1095+
1096+ if (bms_overlap(sjinfo->min_lefthand, join_plus_rhs) &&
1097+ !bms_is_subset(sjinfo->min_righthand, join_plus_rhs))
1098+ {
1099+ join_plus_rhs = bms_add_members(join_plus_rhs,
1100+ sjinfo->min_righthand);
1101+ more = true;
1102+ }
1103+ /* full joins constrain both sides symmetrically */
1104+ if (sjinfo->jointype == JOIN_FULL &&
1105+ bms_overlap(sjinfo->min_righthand, join_plus_rhs) &&
1106+ !bms_is_subset(sjinfo->min_lefthand, join_plus_rhs))
1107+ {
1108+ join_plus_rhs = bms_add_members(join_plus_rhs,
1109+ sjinfo->min_lefthand);
1110+ more = true;
1111+ }
1112+ }
1113+ } while (more);
1114+ if (bms_overlap(join_plus_rhs, join_lateral_rels))
1115+ return false; /* will not be able to join to some RHS rel */
10421116 }
10431117 }
10441118
@@ -1052,7 +1126,7 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
10521126 * has_join_restriction
10531127 * Detect whether the specified relation has join-order restrictions,
10541128 * due to being inside an outer join or an IN (sub-SELECT),
1055- * or participating in any LATERAL references.
1129+ * or participating in any LATERAL references or multi-rel PHVs.
10561130 *
10571131 * Essentially, this tests whether have_join_order_restriction() could
10581132 * succeed with this rel and some other one. It's OK if we sometimes
@@ -1064,12 +1138,15 @@ has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
10641138 {
10651139 ListCell *l;
10661140
1067- foreach(l, root->lateral_info_list)
1141+ if (rel->lateral_relids != NULL || rel->lateral_referencers != NULL)
1142+ return true;
1143+
1144+ foreach(l, root->placeholder_list)
10681145 {
1069- LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l);
1146+ PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
10701147
1071- if (bms_is_subset(ljinfo->lateral_rhs, rel->relids) ||
1072- bms_overlap(ljinfo->lateral_lhs, rel->relids))
1148+ if (bms_is_subset(rel->relids, phinfo->ph_eval_at) &&
1149+ !bms_equal(rel->relids, phinfo->ph_eval_at))
10731150 return true;
10741151 }
10751152
--- a/expected/pg_hint_plan.out
+++ b/expected/pg_hint_plan.out
@@ -3016,7 +3016,7 @@ error hint:
30163016 -> Seq Scan on t2
30173017 (5 rows)
30183018
3019--- inherite table test
3019+-- inheritance tables test
30203020 SET constraint_exclusion TO off;
30213021 EXPLAIN (COSTS false) SELECT * FROM p1 WHERE id >= 50 AND id <= 51 AND p1.ctid = '(1,1)';
30223022 QUERY PLAN
@@ -7983,7 +7983,7 @@ duplication hint:
79837983 error hint:
79847984
79857985 CONTEXT: SQL statement "/*+ SeqScan(t1) */ SELECT * FROM t1"
7986-PL/pgSQL function testfunc() line 3 at EXECUTE statement
7986+PL/pgSQL function testfunc() line 3 at EXECUTE
79877987 testfunc
79887988 ----------
79897989
--- a/pg_hint_plan--1.1.2.sql
+++ b/pg_hint_plan--1.1.3.sql
@@ -1,4 +1,4 @@
1-/* pg_hint_plan/pg_hint_plan--1.1.2.sql */
1+/* pg_hint_plan/pg_hint_plan--1.1.3.sql */
22
33 -- complain if script is sourced in psql, rather than via CREATE EXTENSION
44 \echo Use "CREATE EXTENSION pg_hint_plan" to load this file. \quit
--- a/pg_hint_plan.c
+++ b/pg_hint_plan.c
@@ -3785,6 +3785,8 @@ rebuild_scan_path(HintState *hstate, PlannerInfo *root, int level,
37853785 {
37863786 set_plain_rel_pathlist(root, rel, rte);
37873787 }
3788+
3789+ set_cheapest(rel);
37883790 }
37893791
37903792 /*
--- a/pg_hint_plan.control
+++ b/pg_hint_plan.control
@@ -1,6 +1,6 @@
11 # pg_hint_plan extension
22
33 comment = ''
4-default_version = '1.1.2'
4+default_version = '1.1.3'
55 relocatable = false
66 schema = hint_plan
--- a/pg_stat_statements.c
+++ b/pg_stat_statements.c
@@ -1,2148 +1,28 @@
11 /*-------------------------------------------------------------------------
22 *
33 * pg_stat_statements.c
4- * Track statement execution times across a whole database cluster.
4+ *
5+ * Part of pg_stat_statements.c in PostgreSQL 9.5.
56 *
6- * Execution costs are totalled for each distinct source query, and kept in
7- * a shared hashtable. (We track only as many distinct queries as will fit
8- * in the designated amount of shared memory.)
7+ * Copyright (c) 2008-2015, PostgreSQL Global Development Group
98 *
10- * As of Postgres 9.2, this module normalizes query entries. Normalization
11- * is a process whereby similar queries, typically differing only in their
12- * constants (though the exact rules are somewhat more subtle than that) are
13- * recognized as equivalent, and are tracked as a single entry. This is
14- * particularly useful for non-prepared queries.
15- *
16- * Normalization is implemented by fingerprinting queries, selectively
17- * serializing those fields of each query tree's nodes that are judged to be
18- * essential to the query. This is referred to as a query jumble. This is
19- * distinct from a regular serialization in that various extraneous
20- * information is ignored as irrelevant or not essential to the query, such
21- * as the collations of Vars and, most notably, the values of constants.
22- *
23- * This jumble is acquired at the end of parse analysis of each query, and
24- * a 32-bit hash of it is stored into the query's Query.queryId field.
25- * The server then copies this value around, making it available in plan
26- * tree(s) generated from the query. The executor can then use this value
27- * to blame query costs on the proper queryId.
28- *
29- * To facilitate presenting entries to users, we create "representative" query
30- * strings in which constants are replaced with '?' characters, to make it
31- * clearer what a normalized entry can represent. To save on shared memory,
32- * and to avoid having to truncate oversized query strings, we store these
33- * strings in a temporary external query-texts file. Offsets into this
34- * file are kept in shared memory.
35- *
36- * Note about locking issues: to create or delete an entry in the shared
37- * hashtable, one must hold pgss->lock exclusively. Modifying any field
38- * in an entry except the counters requires the same. To look up an entry,
39- * one must hold the lock shared. To read or update the counters within
40- * an entry, one must hold the lock shared or exclusive (so the entry doesn't
41- * disappear!) and also take the entry's mutex spinlock.
42- * The shared state variable pgss->extent (the next free spot in the external
43- * query-text file) should be accessed only while holding either the
44- * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
45- * allow reserving file space while holding only shared lock on pgss->lock.
46- * Rewriting the entire external query-text file, eg for garbage collection,
47- * requires holding pgss->lock exclusively; this allows individual entries
48- * in the file to be read or written while holding only shared lock.
49- *
50- *
51- * Copyright (c) 2008-2014, PostgreSQL Global Development Group
52- *
53- * IDENTIFICATION
54- * contrib/pg_stat_statements/pg_stat_statements.c
55- *
56- *-------------------------------------------------------------------------
57- */
58-#include "postgres.h"
59-
60-#include <sys/stat.h>
61-
62-#ifdef NOT_USED
63-#include <unistd.h>
64-#endif
65-
66-#include "access/hash.h"
67-#ifdef NOT_USED
68-#include "executor/instrument.h"
69-#include "funcapi.h"
70-#include "mb/pg_wchar.h"
71-#include "miscadmin.h"
72-#include "parser/analyze.h"
73-#include "parser/parsetree.h"
74-#endif
75-#include "parser/scanner.h"
76-#ifdef NOT_USED
77-#include "pgstat.h"
78-#include "storage/fd.h"
79-#include "storage/ipc.h"
80-#include "storage/spin.h"
81-#include "tcop/utility.h"
82-#include "utils/builtins.h"
83-#include "utils/memutils.h"
84-
85-PG_MODULE_MAGIC;
86-
87-/* Location of permanent stats file (valid when database is shut down) */
88-#define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
89-
90-/*
91- * Location of external query text file. We don't keep it in the core
92- * system's stats_temp_directory. The core system can safely use that GUC
93- * setting, because the statistics collector temp file paths are set only once
94- * as part of changing the GUC, but pg_stat_statements has no way of avoiding
95- * race conditions. Besides, we only expect modest, infrequent I/O for query
96- * strings, so placing the file on a faster filesystem is not compelling.
97- */
98-#define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
99-
100-/* Magic number identifying the stats file format */
101-static const uint32 PGSS_FILE_HEADER = 0x20140125;
102-
103-/* PostgreSQL major version number, changes in which invalidate all entries */
104-static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
105-
106-/* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
107-#define USAGE_EXEC(duration) (1.0)
108-#define USAGE_INIT (1.0) /* including initial planning */
109-#define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
110-#define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
111-#define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
112-#define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
113-#define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
114-
115-#define JUMBLE_SIZE 1024 /* query serialization buffer size */
116-
117-/*
118- * Extension version number, for supporting older extension versions' objects
119- */
120-typedef enum pgssVersion
121-{
122- PGSS_V1_0 = 0,
123- PGSS_V1_1,
124- PGSS_V1_2
125-} pgssVersion;
126-
127-/*
128- * Hashtable key that defines the identity of a hashtable entry. We separate
129- * queries by user and by database even if they are otherwise identical.
130- */
131-typedef struct pgssHashKey
132-{
133- Oid userid; /* user OID */
134- Oid dbid; /* database OID */
135- uint32 queryid; /* query identifier */
136-} pgssHashKey;
137-
138-/*
139- * The actual stats counters kept within pgssEntry.
140- */
141-typedef struct Counters
142-{
143- int64 calls; /* # of times executed */
144- double total_time; /* total execution time, in msec */
145- int64 rows; /* total # of retrieved or affected rows */
146- int64 shared_blks_hit; /* # of shared buffer hits */
147- int64 shared_blks_read; /* # of shared disk blocks read */
148- int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
149- int64 shared_blks_written; /* # of shared disk blocks written */
150- int64 local_blks_hit; /* # of local buffer hits */
151- int64 local_blks_read; /* # of local disk blocks read */
152- int64 local_blks_dirtied; /* # of local disk blocks dirtied */
153- int64 local_blks_written; /* # of local disk blocks written */
154- int64 temp_blks_read; /* # of temp blocks read */
155- int64 temp_blks_written; /* # of temp blocks written */
156- double blk_read_time; /* time spent reading, in msec */
157- double blk_write_time; /* time spent writing, in msec */
158- double usage; /* usage factor */
159-} Counters;
160-
161-/*
162- * Statistics per statement
163- *
164- * Note: in event of a failure in garbage collection of the query text file,
165- * we reset query_offset to zero and query_len to -1. This will be seen as
166- * an invalid state by qtext_fetch().
167- */
168-typedef struct pgssEntry
169-{
170- pgssHashKey key; /* hash key of entry - MUST BE FIRST */
171- Counters counters; /* the statistics for this query */
172- Size query_offset; /* query text offset in external file */
173- int query_len; /* # of valid bytes in query string */
174- int encoding; /* query text encoding */
175- slock_t mutex; /* protects the counters only */
176-} pgssEntry;
177-
178-/*
179- * Global shared state
180- */
181-typedef struct pgssSharedState
182-{
183- LWLock *lock; /* protects hashtable search/modification */
184- double cur_median_usage; /* current median usage in hashtable */
185- Size mean_query_len; /* current mean entry text length */
186- slock_t mutex; /* protects following fields only: */
187- Size extent; /* current extent of query file */
188- int n_writers; /* number of active writers to query file */
189- int gc_count; /* query file garbage collection cycle count */
190-} pgssSharedState;
191-
192-/*
193- * Struct for tracking locations/lengths of constants during normalization
194- */
195-typedef struct pgssLocationLen
196-{
197- int location; /* start offset in query text */
198- int length; /* length in bytes, or -1 to ignore */
199-} pgssLocationLen;
200-
201-/*
202- * Working state for computing a query jumble and producing a normalized
203- * query string
204- */
205-typedef struct pgssJumbleState
206-{
207- /* Jumble of current query tree */
208- unsigned char *jumble;
209-
210- /* Number of bytes used in jumble[] */
211- Size jumble_len;
212-
213- /* Array of locations of constants that should be removed */
214- pgssLocationLen *clocations;
215-
216- /* Allocated length of clocations array */
217- int clocations_buf_size;
218-
219- /* Current number of valid entries in clocations array */
220- int clocations_count;
221-} pgssJumbleState;
222-
223-/*---- Local variables ----*/
224-
225-/* Current nesting depth of ExecutorRun+ProcessUtility calls */
226-static int nested_level = 0;
227-
228-/* Saved hook values in case of unload */
229-static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
230-static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
231-static ExecutorStart_hook_type prev_ExecutorStart = NULL;
232-static ExecutorRun_hook_type prev_ExecutorRun = NULL;
233-static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
234-static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
235-static ProcessUtility_hook_type prev_ProcessUtility = NULL;
236-
237-/* Links to shared memory state */
238-static pgssSharedState *pgss = NULL;
239-static HTAB *pgss_hash = NULL;
240-
241-/*---- GUC variables ----*/
242-
243-typedef enum
244-{
245- PGSS_TRACK_NONE, /* track no statements */
246- PGSS_TRACK_TOP, /* only top level statements */
247- PGSS_TRACK_ALL /* all statements, including nested ones */
248-} PGSSTrackLevel;
249-
250-static const struct config_enum_entry track_options[] =
251-{
252- {"none", PGSS_TRACK_NONE, false},
253- {"top", PGSS_TRACK_TOP, false},
254- {"all", PGSS_TRACK_ALL, false},
255- {NULL, 0, false}
256-};
257-
258-static int pgss_max; /* max # statements to track */
259-static int pgss_track; /* tracking level */
260-static bool pgss_track_utility; /* whether to track utility commands */
261-static bool pgss_save; /* whether to save stats across shutdown */
262-
263-
264-#define pgss_enabled() \
265- (pgss_track == PGSS_TRACK_ALL || \
266- (pgss_track == PGSS_TRACK_TOP && nested_level == 0))
267-
268-#define record_gc_qtexts() \
269- do { \
270- volatile pgssSharedState *s = (volatile pgssSharedState *) pgss; \
271- SpinLockAcquire(&s->mutex); \
272- s->gc_count++; \
273- SpinLockRelease(&s->mutex); \
274- } while(0)
275-
276-/*---- Function declarations ----*/
277-
278-void _PG_init(void);
279-void _PG_fini(void);
280-
281-PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
282-PG_FUNCTION_INFO_V1(pg_stat_statements_1_2);
283-PG_FUNCTION_INFO_V1(pg_stat_statements);
284-
285-static void pgss_shmem_startup(void);
286-static void pgss_shmem_shutdown(int code, Datum arg);
287-static void pgss_post_parse_analyze(ParseState *pstate, Query *query);
288-static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
289-static void pgss_ExecutorRun(QueryDesc *queryDesc,
290- ScanDirection direction,
291- long count);
292-static void pgss_ExecutorFinish(QueryDesc *queryDesc);
293-static void pgss_ExecutorEnd(QueryDesc *queryDesc);
294-static void pgss_ProcessUtility(Node *parsetree, const char *queryString,
295- ProcessUtilityContext context, ParamListInfo params,
296- DestReceiver *dest, char *completionTag);
297-static uint32 pgss_hash_fn(const void *key, Size keysize);
298-static int pgss_match_fn(const void *key1, const void *key2, Size keysize);
299-static uint32 pgss_hash_string(const char *str);
300-static void pgss_store(const char *query, uint32 queryId,
301- double total_time, uint64 rows,
302- const BufferUsage *bufusage,
303- pgssJumbleState *jstate);
304-static void pg_stat_statements_internal(FunctionCallInfo fcinfo,
305- pgssVersion api_version,
306- bool showtext);
307-static Size pgss_memsize(void);
308-static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
309- int encoding, bool sticky);
310-static void entry_dealloc(void);
311-static bool qtext_store(const char *query, int query_len,
312- Size *query_offset, int *gc_count);
313-static char *qtext_load_file(Size *buffer_size);
314-static char *qtext_fetch(Size query_offset, int query_len,
315- char *buffer, Size buffer_size);
316-static bool need_gc_qtexts(void);
317-static void gc_qtexts(void);
318-static void entry_reset(void);
319-#endif
320-static void AppendJumble(pgssJumbleState *jstate,
321- const unsigned char *item, Size size);
322-static void JumbleQuery(pgssJumbleState *jstate, Query *query);
323-static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
324-static void JumbleExpr(pgssJumbleState *jstate, Node *node);
325-static void RecordConstLocation(pgssJumbleState *jstate, int location);
326-#ifdef NOT_USED
327-static char *generate_normalized_query(pgssJumbleState *jstate, const char *query,
328- int *query_len_p, int encoding);
329-#endif
330-static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query);
331-static int comp_location(const void *a, const void *b);
332-
333-
334-#ifdef NOT_USED
335-/*
336- * Module load callback
337- */
338-void
339-_PG_init(void)
340-{
341- /*
342- * In order to create our shared memory area, we have to be loaded via
343- * shared_preload_libraries. If not, fall out without hooking into any of
344- * the main system. (We don't throw error here because it seems useful to
345- * allow the pg_stat_statements functions to be created even when the
346- * module isn't active. The functions must protect themselves against
347- * being called then, however.)
348- */
349- if (!process_shared_preload_libraries_in_progress)
350- return;
351-
352- /*
353- * Define (or redefine) custom GUC variables.
354- */
355- DefineCustomIntVariable("pg_stat_statements.max",
356- "Sets the maximum number of statements tracked by pg_stat_statements.",
357- NULL,
358- &pgss_max,
359- 5000,
360- 100,
361- INT_MAX,
362- PGC_POSTMASTER,
363- 0,
364- NULL,
365- NULL,
366- NULL);
367-
368- DefineCustomEnumVariable("pg_stat_statements.track",
369- "Selects which statements are tracked by pg_stat_statements.",
370- NULL,
371- &pgss_track,
372- PGSS_TRACK_TOP,
373- track_options,
374- PGC_SUSET,
375- 0,
376- NULL,
377- NULL,
378- NULL);
379-
380- DefineCustomBoolVariable("pg_stat_statements.track_utility",
381- "Selects whether utility commands are tracked by pg_stat_statements.",
382- NULL,
383- &pgss_track_utility,
384- true,
385- PGC_SUSET,
386- 0,
387- NULL,
388- NULL,
389- NULL);
390-
391- DefineCustomBoolVariable("pg_stat_statements.save",
392- "Save pg_stat_statements statistics across server shutdowns.",
393- NULL,
394- &pgss_save,
395- true,
396- PGC_SIGHUP,
397- 0,
398- NULL,
399- NULL,
400- NULL);
401-
402- EmitWarningsOnPlaceholders("pg_stat_statements");
403-
404- /*
405- * Request additional shared resources. (These are no-ops if we're not in
406- * the postmaster process.) We'll allocate or attach to the shared
407- * resources in pgss_shmem_startup().
408- */
409- RequestAddinShmemSpace(pgss_memsize());
410- RequestAddinLWLocks(1);
411-
412- /*
413- * Install hooks.
414- */
415- prev_shmem_startup_hook = shmem_startup_hook;
416- shmem_startup_hook = pgss_shmem_startup;
417- prev_post_parse_analyze_hook = post_parse_analyze_hook;
418- post_parse_analyze_hook = pgss_post_parse_analyze;
419- prev_ExecutorStart = ExecutorStart_hook;
420- ExecutorStart_hook = pgss_ExecutorStart;
421- prev_ExecutorRun = ExecutorRun_hook;
422- ExecutorRun_hook = pgss_ExecutorRun;
423- prev_ExecutorFinish = ExecutorFinish_hook;
424- ExecutorFinish_hook = pgss_ExecutorFinish;
425- prev_ExecutorEnd = ExecutorEnd_hook;
426- ExecutorEnd_hook = pgss_ExecutorEnd;
427- prev_ProcessUtility = ProcessUtility_hook;
428- ProcessUtility_hook = pgss_ProcessUtility;
429-}
430-
431-/*
432- * Module unload callback
433- */
434-void
435-_PG_fini(void)
436-{
437- /* Uninstall hooks. */
438- shmem_startup_hook = prev_shmem_startup_hook;
439- post_parse_analyze_hook = prev_post_parse_analyze_hook;
440- ExecutorStart_hook = prev_ExecutorStart;
441- ExecutorRun_hook = prev_ExecutorRun;
442- ExecutorFinish_hook = prev_ExecutorFinish;
443- ExecutorEnd_hook = prev_ExecutorEnd;
444- ProcessUtility_hook = prev_ProcessUtility;
445-}
446-
447-/*
448- * shmem_startup hook: allocate or attach to shared memory,
449- * then load any pre-existing statistics from file.
450- * Also create and load the query-texts file, which is expected to exist
451- * (even if empty) while the module is enabled.
452- */
453-static void
454-pgss_shmem_startup(void)
455-{
456- bool found;
457- HASHCTL info;
458- FILE *file = NULL;
459- FILE *qfile = NULL;
460- uint32 header;
461- int32 num;
462- int32 pgver;
463- int32 i;
464- int buffer_size;
465- char *buffer = NULL;
466-
467- if (prev_shmem_startup_hook)
468- prev_shmem_startup_hook();
469-
470- /* reset in case this is a restart within the postmaster */
471- pgss = NULL;
472- pgss_hash = NULL;
473-
474- /*
475- * Create or attach to the shared memory state, including hash table
476- */
477- LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
478-
479- pgss = ShmemInitStruct("pg_stat_statements",
480- sizeof(pgssSharedState),
481- &found);
482-
483- if (!found)
484- {
485- /* First time through ... */
486- pgss->lock = LWLockAssign();
487- pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
488- pgss->mean_query_len = ASSUMED_LENGTH_INIT;
489- SpinLockInit(&pgss->mutex);
490- pgss->extent = 0;
491- pgss->n_writers = 0;
492- pgss->gc_count = 0;
493- }
494-
495- memset(&info, 0, sizeof(info));
496- info.keysize = sizeof(pgssHashKey);
497- info.entrysize = sizeof(pgssEntry);
498- info.hash = pgss_hash_fn;
499- info.match = pgss_match_fn;
500- pgss_hash = ShmemInitHash("pg_stat_statements hash",
501- pgss_max, pgss_max,
502- &info,
503- HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
504-
505- LWLockRelease(AddinShmemInitLock);
506-
507- /*
508- * If we're in the postmaster (or a standalone backend...), set up a shmem
509- * exit hook to dump the statistics to disk.
510- */
511- if (!IsUnderPostmaster)
512- on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
513-
514- /*
515- * Done if some other process already completed our initialization.
516- */
517- if (found)
518- return;
519-
520- /*
521- * Note: we don't bother with locks here, because there should be no other
522- * processes running when this code is reached.
523- */
524-
525- /* Unlink query text file possibly left over from crash */
526- unlink(PGSS_TEXT_FILE);
527-
528- /* Allocate new query text temp file */
529- qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
530- if (qfile == NULL)
531- goto write_error;
532-
533- /*
534- * If we were told not to load old statistics, we're done. (Note we do
535- * not try to unlink any old dump file in this case. This seems a bit
536- * questionable but it's the historical behavior.)
537- */
538- if (!pgss_save)
539- {
540- FreeFile(qfile);
541- return;
542- }
543-
544- /*
545- * Attempt to load old statistics from the dump file.
546- */
547- file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
548- if (file == NULL)
549- {
550- if (errno != ENOENT)
551- goto read_error;
552- /* No existing persisted stats file, so we're done */
553- FreeFile(qfile);
554- return;
555- }
556-
557- buffer_size = 2048;
558- buffer = (char *) palloc(buffer_size);
559-
560- if (fread(&header, sizeof(uint32), 1, file) != 1 ||
561- fread(&pgver, sizeof(uint32), 1, file) != 1 ||
562- fread(&num, sizeof(int32), 1, file) != 1)
563- goto read_error;
564-
565- if (header != PGSS_FILE_HEADER ||
566- pgver != PGSS_PG_MAJOR_VERSION)
567- goto data_error;
568-
569- for (i = 0; i < num; i++)
570- {
571- pgssEntry temp;
572- pgssEntry *entry;
573- Size query_offset;
574-
575- if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
576- goto read_error;
577-
578- /* Encoding is the only field we can easily sanity-check */
579- if (!PG_VALID_BE_ENCODING(temp.encoding))
580- goto data_error;
581-
582- /* Resize buffer as needed */
583- if (temp.query_len >= buffer_size)
584- {
585- buffer_size = Max(buffer_size * 2, temp.query_len + 1);
586- buffer = repalloc(buffer, buffer_size);
587- }
588-
589- if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
590- goto read_error;
591-
592- /* Should have a trailing null, but let's make sure */
593- buffer[temp.query_len] = '\0';
594-
595- /* Skip loading "sticky" entries */
596- if (temp.counters.calls == 0)
597- continue;
598-
599- /* Store the query text */
600- query_offset = pgss->extent;
601- if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
602- goto write_error;
603- pgss->extent += temp.query_len + 1;
604-
605- /* make the hashtable entry (discards old entries if too many) */
606- entry = entry_alloc(&temp.key, query_offset, temp.query_len,
607- temp.encoding,
608- false);
609-
610- /* copy in the actual stats */
611- entry->counters = temp.counters;
612- }
613-
614- pfree(buffer);
615- FreeFile(file);
616- FreeFile(qfile);
617-
618- /*
619- * Remove the persisted stats file so it's not included in
620- * backups/replication slaves, etc. A new file will be written on next
621- * shutdown.
622- *
623- * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
624- * because we remove that file on startup; it acts inversely to
625- * PGSS_DUMP_FILE, in that it is only supposed to be around when the
626- * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
627- * when the server is not running. Leaving the file creates no danger of
628- * a newly restored database having a spurious record of execution costs,
629- * which is what we're really concerned about here.
630- */
631- unlink(PGSS_DUMP_FILE);
632-
633- return;
634-
635-read_error:
636- ereport(LOG,
637- (errcode_for_file_access(),
638- errmsg("could not read pg_stat_statement file \"%s\": %m",
639- PGSS_DUMP_FILE)));
640- goto fail;
641-data_error:
642- ereport(LOG,
643- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
644- errmsg("ignoring invalid data in pg_stat_statement file \"%s\"",
645- PGSS_DUMP_FILE)));
646- goto fail;
647-write_error:
648- ereport(LOG,
649- (errcode_for_file_access(),
650- errmsg("could not write pg_stat_statement file \"%s\": %m",
651- PGSS_TEXT_FILE)));
652-fail:
653- if (buffer)
654- pfree(buffer);
655- if (file)
656- FreeFile(file);
657- if (qfile)
658- FreeFile(qfile);
659- /* If possible, throw away the bogus file; ignore any error */
660- unlink(PGSS_DUMP_FILE);
661-
662- /*
663- * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
664- * server is running with pg_stat_statements enabled
665- */
666-}
667-
668-/*
669- * shmem_shutdown hook: Dump statistics into file.
670- *
671- * Note: we don't bother with acquiring lock, because there should be no
672- * other processes running when this is called.
673- */
674-static void
675-pgss_shmem_shutdown(int code, Datum arg)
676-{
677- FILE *file;
678- char *qbuffer = NULL;
679- Size qbuffer_size = 0;
680- HASH_SEQ_STATUS hash_seq;
681- int32 num_entries;
682- pgssEntry *entry;
683-
684- /* Don't try to dump during a crash. */
685- if (code)
686- return;
687-
688- /* Safety check ... shouldn't get here unless shmem is set up. */
689- if (!pgss || !pgss_hash)
690- return;
691-
692- /* Don't dump if told not to. */
693- if (!pgss_save)
694- return;
695-
696- file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
697- if (file == NULL)
698- goto error;
699-
700- if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
701- goto error;
702- if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
703- goto error;
704- num_entries = hash_get_num_entries(pgss_hash);
705- if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
706- goto error;
707-
708- qbuffer = qtext_load_file(&qbuffer_size);
709- if (qbuffer == NULL)
710- goto error;
711-
712- /*
713- * When serializing to disk, we store query texts immediately after their
714- * entry data. Any orphaned query texts are thereby excluded.
715- */
716- hash_seq_init(&hash_seq, pgss_hash);
717- while ((entry = hash_seq_search(&hash_seq)) != NULL)
718- {
719- int len = entry->query_len;
720- char *qstr = qtext_fetch(entry->query_offset, len,
721- qbuffer, qbuffer_size);
722-
723- if (qstr == NULL)
724- continue; /* Ignore any entries with bogus texts */
725-
726- if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
727- fwrite(qstr, 1, len + 1, file) != len + 1)
728- {
729- /* note: we assume hash_seq_term won't change errno */
730- hash_seq_term(&hash_seq);
731- goto error;
732- }
733- }
734-
735- free(qbuffer);
736- qbuffer = NULL;
737-
738- if (FreeFile(file))
739- {
740- file = NULL;
741- goto error;
742- }
743-
744- /*
745- * Rename file into place, so we atomically replace any old one.
746- */
747- if (rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE) != 0)
748- ereport(LOG,
749- (errcode_for_file_access(),
750- errmsg("could not rename pg_stat_statement file \"%s\": %m",
751- PGSS_DUMP_FILE ".tmp")));
752-
753- /* Unlink query-texts file; it's not needed while shutdown */
754- unlink(PGSS_TEXT_FILE);
755-
756- return;
757-
758-error:
759- ereport(LOG,
760- (errcode_for_file_access(),
761- errmsg("could not write pg_stat_statement file \"%s\": %m",
762- PGSS_DUMP_FILE ".tmp")));
763- if (qbuffer)
764- free(qbuffer);
765- if (file)
766- FreeFile(file);
767- unlink(PGSS_DUMP_FILE ".tmp");
768- unlink(PGSS_TEXT_FILE);
769-}
770-
771-/*
772- * Post-parse-analysis hook: mark query with a queryId
773- */
774-static void
775-pgss_post_parse_analyze(ParseState *pstate, Query *query)
776-{
777- pgssJumbleState jstate;
778-
779- if (prev_post_parse_analyze_hook)
780- prev_post_parse_analyze_hook(pstate, query);
781-
782- /* Assert we didn't do this already */
783- Assert(query->queryId == 0);
784-
785- /* Safety check... */
786- if (!pgss || !pgss_hash)
787- return;
788-
789- /*
790- * Utility statements get queryId zero. We do this even in cases where
791- * the statement contains an optimizable statement for which a queryId
792- * could be derived (such as EXPLAIN or DECLARE CURSOR). For such cases,
793- * runtime control will first go through ProcessUtility and then the
794- * executor, and we don't want the executor hooks to do anything, since we
795- * are already measuring the statement's costs at the utility level.
796- */
797- if (query->utilityStmt)
798- {
799- query->queryId = 0;
800- return;
801- }
802-
803- /* Set up workspace for query jumbling */
804- jstate.jumble = (unsigned char *) palloc(JUMBLE_SIZE);
805- jstate.jumble_len = 0;
806- jstate.clocations_buf_size = 32;
807- jstate.clocations = (pgssLocationLen *)
808- palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
809- jstate.clocations_count = 0;
810-
811- /* Compute query ID and mark the Query node with it */
812- JumbleQuery(&jstate, query);
813- query->queryId = hash_any(jstate.jumble, jstate.jumble_len);
814-
815- /*
816- * If we are unlucky enough to get a hash of zero, use 1 instead, to
817- * prevent confusion with the utility-statement case.
818- */
819- if (query->queryId == 0)
820- query->queryId = 1;
821-
822- /*
823- * If we were able to identify any ignorable constants, we immediately
824- * create a hash table entry for the query, so that we can record the
825- * normalized form of the query string. If there were no such constants,
826- * the normalized string would be the same as the query text anyway, so
827- * there's no need for an early entry.
828- */
829- if (jstate.clocations_count > 0)
830- pgss_store(pstate->p_sourcetext,
831- query->queryId,
832- 0,
833- 0,
834- NULL,
835- &jstate);
836-}
837-
838-/*
839- * ExecutorStart hook: start up tracking if needed
840- */
841-static void
842-pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
843-{
844- if (prev_ExecutorStart)
845- prev_ExecutorStart(queryDesc, eflags);
846- else
847- standard_ExecutorStart(queryDesc, eflags);
848-
849- /*
850- * If query has queryId zero, don't track it. This prevents double
851- * counting of optimizable statements that are directly contained in
852- * utility statements.
853- */
854- if (pgss_enabled() && queryDesc->plannedstmt->queryId != 0)
855- {
856- /*
857- * Set up to track total elapsed time in ExecutorRun. Make sure the
858- * space is allocated in the per-query context so it will go away at
859- * ExecutorEnd.
860- */
861- if (queryDesc->totaltime == NULL)
862- {
863- MemoryContext oldcxt;
864-
865- oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
866- queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL);
867- MemoryContextSwitchTo(oldcxt);
868- }
869- }
870-}
871-
872-/*
873- * ExecutorRun hook: all we need do is track nesting depth
874- */
875-static void
876-pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
877-{
878- nested_level++;
879- PG_TRY();
880- {
881- if (prev_ExecutorRun)
882- prev_ExecutorRun(queryDesc, direction, count);
883- else
884- standard_ExecutorRun(queryDesc, direction, count);
885- nested_level--;
886- }
887- PG_CATCH();
888- {
889- nested_level--;
890- PG_RE_THROW();
891- }
892- PG_END_TRY();
893-}
894-
895-/*
896- * ExecutorFinish hook: all we need do is track nesting depth
897- */
898-static void
899-pgss_ExecutorFinish(QueryDesc *queryDesc)
900-{
901- nested_level++;
902- PG_TRY();
903- {
904- if (prev_ExecutorFinish)
905- prev_ExecutorFinish(queryDesc);
906- else
907- standard_ExecutorFinish(queryDesc);
908- nested_level--;
909- }
910- PG_CATCH();
911- {
912- nested_level--;
913- PG_RE_THROW();
914- }
915- PG_END_TRY();
916-}
917-
918-/*
919- * ExecutorEnd hook: store results if needed
920- */
921-static void
922-pgss_ExecutorEnd(QueryDesc *queryDesc)
923-{
924- uint32 queryId = queryDesc->plannedstmt->queryId;
925-
926- if (queryId != 0 && queryDesc->totaltime && pgss_enabled())
927- {
928- /*
929- * Make sure stats accumulation is done. (Note: it's okay if several
930- * levels of hook all do this.)
931- */
932- InstrEndLoop(queryDesc->totaltime);
933-
934- pgss_store(queryDesc->sourceText,
935- queryId,
936- queryDesc->totaltime->total * 1000.0, /* convert to msec */
937- queryDesc->estate->es_processed,
938- &queryDesc->totaltime->bufusage,
939- NULL);
940- }
941-
942- if (prev_ExecutorEnd)
943- prev_ExecutorEnd(queryDesc);
944- else
945- standard_ExecutorEnd(queryDesc);
946-}
947-
948-/*
949- * ProcessUtility hook
950- */
951-static void
952-pgss_ProcessUtility(Node *parsetree, const char *queryString,
953- ProcessUtilityContext context, ParamListInfo params,
954- DestReceiver *dest, char *completionTag)
955-{
956- /*
957- * If it's an EXECUTE statement, we don't track it and don't increment the
958- * nesting level. This allows the cycles to be charged to the underlying
959- * PREPARE instead (by the Executor hooks), which is much more useful.
960- *
961- * We also don't track execution of PREPARE. If we did, we would get one
962- * hash table entry for the PREPARE (with hash calculated from the query
963- * string), and then a different one with the same query string (but hash
964- * calculated from the query tree) would be used to accumulate costs of
965- * ensuing EXECUTEs. This would be confusing, and inconsistent with other
966- * cases where planning time is not included at all.
967- *
968- * Likewise, we don't track execution of DEALLOCATE.
969- */
970- if (pgss_track_utility && pgss_enabled() &&
971- !IsA(parsetree, ExecuteStmt) &&
972- !IsA(parsetree, PrepareStmt) &&
973- !IsA(parsetree, DeallocateStmt))
974- {
975- instr_time start;
976- instr_time duration;
977- uint64 rows;
978- BufferUsage bufusage_start,
979- bufusage;
980- uint32 queryId;
981-
982- bufusage_start = pgBufferUsage;
983- INSTR_TIME_SET_CURRENT(start);
984-
985- nested_level++;
986- PG_TRY();
987- {
988- if (prev_ProcessUtility)
989- prev_ProcessUtility(parsetree, queryString,
990- context, params,
991- dest, completionTag);
992- else
993- standard_ProcessUtility(parsetree, queryString,
994- context, params,
995- dest, completionTag);
996- nested_level--;
997- }
998- PG_CATCH();
999- {
1000- nested_level--;
1001- PG_RE_THROW();
1002- }
1003- PG_END_TRY();
1004-
1005- INSTR_TIME_SET_CURRENT(duration);
1006- INSTR_TIME_SUBTRACT(duration, start);
1007-
1008- /* parse command tag to retrieve the number of affected rows. */
1009- if (completionTag &&
1010- strncmp(completionTag, "COPY ", 5) == 0)
1011- {
1012-#ifdef HAVE_STRTOULL
1013- rows = strtoull(completionTag + 5, NULL, 10);
1014-#else
1015- rows = strtoul(completionTag + 5, NULL, 10);
1016-#endif
1017- }
1018- else
1019- rows = 0;
1020-
1021- /* calc differences of buffer counters. */
1022- bufusage.shared_blks_hit =
1023- pgBufferUsage.shared_blks_hit - bufusage_start.shared_blks_hit;
1024- bufusage.shared_blks_read =
1025- pgBufferUsage.shared_blks_read - bufusage_start.shared_blks_read;
1026- bufusage.shared_blks_dirtied =
1027- pgBufferUsage.shared_blks_dirtied - bufusage_start.shared_blks_dirtied;
1028- bufusage.shared_blks_written =
1029- pgBufferUsage.shared_blks_written - bufusage_start.shared_blks_written;
1030- bufusage.local_blks_hit =
1031- pgBufferUsage.local_blks_hit - bufusage_start.local_blks_hit;
1032- bufusage.local_blks_read =
1033- pgBufferUsage.local_blks_read - bufusage_start.local_blks_read;
1034- bufusage.local_blks_dirtied =
1035- pgBufferUsage.local_blks_dirtied - bufusage_start.local_blks_dirtied;
1036- bufusage.local_blks_written =
1037- pgBufferUsage.local_blks_written - bufusage_start.local_blks_written;
1038- bufusage.temp_blks_read =
1039- pgBufferUsage.temp_blks_read - bufusage_start.temp_blks_read;
1040- bufusage.temp_blks_written =
1041- pgBufferUsage.temp_blks_written - bufusage_start.temp_blks_written;
1042- bufusage.blk_read_time = pgBufferUsage.blk_read_time;
1043- INSTR_TIME_SUBTRACT(bufusage.blk_read_time, bufusage_start.blk_read_time);
1044- bufusage.blk_write_time = pgBufferUsage.blk_write_time;
1045- INSTR_TIME_SUBTRACT(bufusage.blk_write_time, bufusage_start.blk_write_time);
1046-
1047- /* For utility statements, we just hash the query string directly */
1048- queryId = pgss_hash_string(queryString);
1049-
1050- pgss_store(queryString,
1051- queryId,
1052- INSTR_TIME_GET_MILLISEC(duration),
1053- rows,
1054- &bufusage,
1055- NULL);
1056- }
1057- else
1058- {
1059- if (prev_ProcessUtility)
1060- prev_ProcessUtility(parsetree, queryString,
1061- context, params,
1062- dest, completionTag);
1063- else
1064- standard_ProcessUtility(parsetree, queryString,
1065- context, params,
1066- dest, completionTag);
1067- }
1068-}
1069-
1070-/*
1071- * Calculate hash value for a key
1072- */
1073-static uint32
1074-pgss_hash_fn(const void *key, Size keysize)
1075-{
1076- const pgssHashKey *k = (const pgssHashKey *) key;
1077-
1078- return hash_uint32((uint32) k->userid) ^
1079- hash_uint32((uint32) k->dbid) ^
1080- hash_uint32((uint32) k->queryid);
1081-}
1082-
1083-/*
1084- * Compare two keys - zero means match
1085- */
1086-static int
1087-pgss_match_fn(const void *key1, const void *key2, Size keysize)
1088-{
1089- const pgssHashKey *k1 = (const pgssHashKey *) key1;
1090- const pgssHashKey *k2 = (const pgssHashKey *) key2;
1091-
1092- if (k1->userid == k2->userid &&
1093- k1->dbid == k2->dbid &&
1094- k1->queryid == k2->queryid)
1095- return 0;
1096- else
1097- return 1;
1098-}
1099-
1100-/*
1101- * Given an arbitrarily long query string, produce a hash for the purposes of
1102- * identifying the query, without normalizing constants. Used when hashing
1103- * utility statements.
1104- */
1105-static uint32
1106-pgss_hash_string(const char *str)
1107-{
1108- return hash_any((const unsigned char *) str, strlen(str));
1109-}
1110-
1111-/*
1112- * Store some statistics for a statement.
1113- *
1114- * If jstate is not NULL then we're trying to create an entry for which
1115- * we have no statistics as yet; we just want to record the normalized
1116- * query string. total_time, rows, bufusage are ignored in this case.
1117- */
1118-static void
1119-pgss_store(const char *query, uint32 queryId,
1120- double total_time, uint64 rows,
1121- const BufferUsage *bufusage,
1122- pgssJumbleState *jstate)
1123-{
1124- pgssHashKey key;
1125- pgssEntry *entry;
1126- char *norm_query = NULL;
1127- int encoding = GetDatabaseEncoding();
1128- int query_len;
1129-
1130- Assert(query != NULL);
1131-
1132- /* Safety check... */
1133- if (!pgss || !pgss_hash)
1134- return;
1135-
1136- query_len = strlen(query);
1137-
1138- /* Set up key for hashtable search */
1139- key.userid = GetUserId();
1140- key.dbid = MyDatabaseId;
1141- key.queryid = queryId;
1142-
1143- /* Lookup the hash table entry with shared lock. */
1144- LWLockAcquire(pgss->lock, LW_SHARED);
1145-
1146- entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1147-
1148- /* Create new entry, if not present */
1149- if (!entry)
1150- {
1151- Size query_offset;
1152- int gc_count;
1153- bool stored;
1154- bool do_gc;
1155-
1156- /*
1157- * Create a new, normalized query string if caller asked. We don't
1158- * need to hold the lock while doing this work. (Note: in any case,
1159- * it's possible that someone else creates a duplicate hashtable entry
1160- * in the interval where we don't hold the lock below. That case is
1161- * handled by entry_alloc.)
1162- */
1163- if (jstate)
1164- {
1165- LWLockRelease(pgss->lock);
1166- norm_query = generate_normalized_query(jstate, query,
1167- &query_len,
1168- encoding);
1169- LWLockAcquire(pgss->lock, LW_SHARED);
1170- }
1171-
1172- /* Append new query text to file with only shared lock held */
1173- stored = qtext_store(norm_query ? norm_query : query, query_len,
1174- &query_offset, &gc_count);
1175-
1176- /*
1177- * Determine whether we need to garbage collect external query texts
1178- * while the shared lock is still held. This micro-optimization
1179- * avoids taking the time to decide this while holding exclusive lock.
1180- */
1181- do_gc = need_gc_qtexts();
1182-
1183- /* Need exclusive lock to make a new hashtable entry - promote */
1184- LWLockRelease(pgss->lock);
1185- LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1186-
1187- /*
1188- * A garbage collection may have occurred while we weren't holding the
1189- * lock. In the unlikely event that this happens, the query text we
1190- * stored above will have been garbage collected, so write it again.
1191- * This should be infrequent enough that doing it while holding
1192- * exclusive lock isn't a performance problem.
1193- */
1194- if (!stored || pgss->gc_count != gc_count)
1195- stored = qtext_store(norm_query ? norm_query : query, query_len,
1196- &query_offset, NULL);
1197-
1198- /* If we failed to write to the text file, give up */
1199- if (!stored)
1200- goto done;
1201-
1202- /* OK to create a new hashtable entry */
1203- entry = entry_alloc(&key, query_offset, query_len, encoding,
1204- jstate != NULL);
1205-
1206- /* If needed, perform garbage collection while exclusive lock held */
1207- if (do_gc)
1208- gc_qtexts();
1209- }
1210-
1211- /* Increment the counts, except when jstate is not NULL */
1212- if (!jstate)
1213- {
1214- /*
1215- * Grab the spinlock while updating the counters (see comment about
1216- * locking rules at the head of the file)
1217- */
1218- volatile pgssEntry *e = (volatile pgssEntry *) entry;
1219-
1220- SpinLockAcquire(&e->mutex);
1221-
1222- /* "Unstick" entry if it was previously sticky */
1223- if (e->counters.calls == 0)
1224- e->counters.usage = USAGE_INIT;
1225-
1226- e->counters.calls += 1;
1227- e->counters.total_time += total_time;
1228- e->counters.rows += rows;
1229- e->counters.shared_blks_hit += bufusage->shared_blks_hit;
1230- e->counters.shared_blks_read += bufusage->shared_blks_read;
1231- e->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
1232- e->counters.shared_blks_written += bufusage->shared_blks_written;
1233- e->counters.local_blks_hit += bufusage->local_blks_hit;
1234- e->counters.local_blks_read += bufusage->local_blks_read;
1235- e->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
1236- e->counters.local_blks_written += bufusage->local_blks_written;
1237- e->counters.temp_blks_read += bufusage->temp_blks_read;
1238- e->counters.temp_blks_written += bufusage->temp_blks_written;
1239- e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
1240- e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
1241- e->counters.usage += USAGE_EXEC(total_time);
1242-
1243- SpinLockRelease(&e->mutex);
1244- }
1245-
1246-done:
1247- LWLockRelease(pgss->lock);
1248-
1249- /* We postpone this clean-up until we're out of the lock */
1250- if (norm_query)
1251- pfree(norm_query);
1252-}
1253-
1254-/*
1255- * Reset all statement statistics.
1256- */
1257-Datum
1258-pg_stat_statements_reset(PG_FUNCTION_ARGS)
1259-{
1260- if (!pgss || !pgss_hash)
1261- ereport(ERROR,
1262- (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1263- errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1264- entry_reset();
1265- PG_RETURN_VOID();
1266-}
1267-
1268-/* Number of output arguments (columns) for various API versions */
1269-#define PG_STAT_STATEMENTS_COLS_V1_0 14
1270-#define PG_STAT_STATEMENTS_COLS_V1_1 18
1271-#define PG_STAT_STATEMENTS_COLS_V1_2 19
1272-#define PG_STAT_STATEMENTS_COLS 19 /* maximum of above */
1273-
1274-/*
1275- * Retrieve statement statistics.
1276- *
1277- * The SQL API of this function has changed multiple times, and will likely
1278- * do so again in future. To support the case where a newer version of this
1279- * loadable module is being used with an old SQL declaration of the function,
1280- * we continue to support the older API versions. For 1.2 and later, the
1281- * expected API version is identified by embedding it in the C name of the
1282- * function. Unfortunately we weren't bright enough to do that for 1.1.
1283- */
1284-Datum
1285-pg_stat_statements_1_2(PG_FUNCTION_ARGS)
1286-{
1287- bool showtext = PG_GETARG_BOOL(0);
1288-
1289- pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
1290-
1291- return (Datum) 0;
1292-}
1293-
1294-/*
1295- * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1296- * This can be removed someday, perhaps.
1297- */
1298-Datum
1299-pg_stat_statements(PG_FUNCTION_ARGS)
1300-{
1301- /* If it's really API 1.1, we'll figure that out below */
1302- pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
1303-
1304- return (Datum) 0;
1305-}
1306-
1307-/* Common code for all versions of pg_stat_statements() */
1308-static void
1309-pg_stat_statements_internal(FunctionCallInfo fcinfo,
1310- pgssVersion api_version,
1311- bool showtext)
1312-{
1313- ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1314- TupleDesc tupdesc;
1315- Tuplestorestate *tupstore;
1316- MemoryContext per_query_ctx;
1317- MemoryContext oldcontext;
1318- Oid userid = GetUserId();
1319- bool is_superuser = superuser();
1320- char *qbuffer = NULL;
1321- Size qbuffer_size = 0;
1322- Size extent = 0;
1323- int gc_count = 0;
1324- HASH_SEQ_STATUS hash_seq;
1325- pgssEntry *entry;
1326-
1327- /* hash table must exist already */
1328- if (!pgss || !pgss_hash)
1329- ereport(ERROR,
1330- (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1331- errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1332-
1333- /* check to see if caller supports us returning a tuplestore */
1334- if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1335- ereport(ERROR,
1336- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1337- errmsg("set-valued function called in context that cannot accept a set")));
1338- if (!(rsinfo->allowedModes & SFRM_Materialize))
1339- ereport(ERROR,
1340- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1341- errmsg("materialize mode required, but it is not " \
1342- "allowed in this context")));
1343-
1344- /* Switch into long-lived context to construct returned data structures */
1345- per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1346- oldcontext = MemoryContextSwitchTo(per_query_ctx);
1347-
1348- /* Build a tuple descriptor for our result type */
1349- if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1350- elog(ERROR, "return type must be a row type");
1351-
1352- /*
1353- * Check we have the expected number of output arguments. Aside from
1354- * being a good safety check, we need a kluge here to detect API version
1355- * 1.1, which was wedged into the code in an ill-considered way.
1356- */
1357- switch (tupdesc->natts)
1358- {
1359- case PG_STAT_STATEMENTS_COLS_V1_0:
1360- if (api_version != PGSS_V1_0)
1361- elog(ERROR, "incorrect number of output arguments");
1362- break;
1363- case PG_STAT_STATEMENTS_COLS_V1_1:
1364- /* pg_stat_statements() should have told us 1.0 */
1365- if (api_version != PGSS_V1_0)
1366- elog(ERROR, "incorrect number of output arguments");
1367- api_version = PGSS_V1_1;
1368- break;
1369- case PG_STAT_STATEMENTS_COLS_V1_2:
1370- if (api_version != PGSS_V1_2)
1371- elog(ERROR, "incorrect number of output arguments");
1372- break;
1373- default:
1374- elog(ERROR, "incorrect number of output arguments");
1375- }
1376-
1377- tupstore = tuplestore_begin_heap(true, false, work_mem);
1378- rsinfo->returnMode = SFRM_Materialize;
1379- rsinfo->setResult = tupstore;
1380- rsinfo->setDesc = tupdesc;
1381-
1382- MemoryContextSwitchTo(oldcontext);
1383-
1384- /*
1385- * We'd like to load the query text file (if needed) while not holding any
1386- * lock on pgss->lock. In the worst case we'll have to do this again
1387- * after we have the lock, but it's unlikely enough to make this a win
1388- * despite occasional duplicated work. We need to reload if anybody
1389- * writes to the file (either a retail qtext_store(), or a garbage
1390- * collection) between this point and where we've gotten shared lock. If
1391- * a qtext_store is actually in progress when we look, we might as well
1392- * skip the speculative load entirely.
1393- */
1394- if (showtext)
1395- {
1396- int n_writers;
1397-
1398- /* Take the mutex so we can examine variables */
1399- {
1400- volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
1401-
1402- SpinLockAcquire(&s->mutex);
1403- extent = s->extent;
1404- n_writers = s->n_writers;
1405- gc_count = s->gc_count;
1406- SpinLockRelease(&s->mutex);
1407- }
1408-
1409- /* No point in loading file now if there are active writers */
1410- if (n_writers == 0)
1411- qbuffer = qtext_load_file(&qbuffer_size);
1412- }
1413-
1414- /*
1415- * Get shared lock, load or reload the query text file if we must, and
1416- * iterate over the hashtable entries.
1417- *
1418- * With a large hash table, we might be holding the lock rather longer
1419- * than one could wish. However, this only blocks creation of new hash
1420- * table entries, and the larger the hash table the less likely that is to
1421- * be needed. So we can hope this is okay. Perhaps someday we'll decide
1422- * we need to partition the hash table to limit the time spent holding any
1423- * one lock.
1424- */
1425- LWLockAcquire(pgss->lock, LW_SHARED);
1426-
1427- if (showtext)
1428- {
1429- /*
1430- * Here it is safe to examine extent and gc_count without taking the
1431- * mutex. Note that although other processes might change
1432- * pgss->extent just after we look at it, the strings they then write
1433- * into the file cannot yet be referenced in the hashtable, so we
1434- * don't care whether we see them or not.
1435- *
1436- * If qtext_load_file fails, we just press on; we'll return NULL for
1437- * every query text.
1438- */
1439- if (qbuffer == NULL ||
1440- pgss->extent != extent ||
1441- pgss->gc_count != gc_count)
1442- {
1443- if (qbuffer)
1444- free(qbuffer);
1445- qbuffer = qtext_load_file(&qbuffer_size);
1446- }
1447- }
1448-
1449- hash_seq_init(&hash_seq, pgss_hash);
1450- while ((entry = hash_seq_search(&hash_seq)) != NULL)
1451- {
1452- Datum values[PG_STAT_STATEMENTS_COLS];
1453- bool nulls[PG_STAT_STATEMENTS_COLS];
1454- int i = 0;
1455- Counters tmp;
1456- int64 queryid = entry->key.queryid;
1457-
1458- memset(values, 0, sizeof(values));
1459- memset(nulls, 0, sizeof(nulls));
1460-
1461- values[i++] = ObjectIdGetDatum(entry->key.userid);
1462- values[i++] = ObjectIdGetDatum(entry->key.dbid);
1463-
1464- if (is_superuser || entry->key.userid == userid)
1465- {
1466- if (api_version >= PGSS_V1_2)
1467- values[i++] = Int64GetDatumFast(queryid);
1468-
1469- if (showtext)
1470- {
1471- char *qstr = qtext_fetch(entry->query_offset,
1472- entry->query_len,
1473- qbuffer,
1474- qbuffer_size);
1475-
1476- if (qstr)
1477- {
1478- char *enc;
1479-
1480- enc = pg_any_to_server(qstr,
1481- entry->query_len,
1482- entry->encoding);
1483-
1484- values[i++] = CStringGetTextDatum(enc);
1485-
1486- if (enc != qstr)
1487- pfree(enc);
1488- }
1489- else
1490- {
1491- /* Just return a null if we fail to find the text */
1492- nulls[i++] = true;
1493- }
1494- }
1495- else
1496- {
1497- /* Query text not requested */
1498- nulls[i++] = true;
1499- }
1500- }
1501- else
1502- {
1503- /* Don't show queryid */
1504- if (api_version >= PGSS_V1_2)
1505- nulls[i++] = true;
1506-
1507- /*
1508- * Don't show query text, but hint as to the reason for not doing
1509- * so if it was requested
1510- */
1511- if (showtext)
1512- values[i++] = CStringGetTextDatum("<insufficient privilege>");
1513- else
1514- nulls[i++] = true;
1515- }
1516-
1517- /* copy counters to a local variable to keep locking time short */
1518- {
1519- volatile pgssEntry *e = (volatile pgssEntry *) entry;
1520-
1521- SpinLockAcquire(&e->mutex);
1522- tmp = e->counters;
1523- SpinLockRelease(&e->mutex);
1524- }
1525-
1526- /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1527- if (tmp.calls == 0)
1528- continue;
1529-
1530- values[i++] = Int64GetDatumFast(tmp.calls);
1531- values[i++] = Float8GetDatumFast(tmp.total_time);
1532- values[i++] = Int64GetDatumFast(tmp.rows);
1533- values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
1534- values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
1535- if (api_version >= PGSS_V1_1)
1536- values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
1537- values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
1538- values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
1539- values[i++] = Int64GetDatumFast(tmp.local_blks_read);
1540- if (api_version >= PGSS_V1_1)
1541- values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
1542- values[i++] = Int64GetDatumFast(tmp.local_blks_written);
1543- values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
1544- values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
1545- if (api_version >= PGSS_V1_1)
1546- {
1547- values[i++] = Float8GetDatumFast(tmp.blk_read_time);
1548- values[i++] = Float8GetDatumFast(tmp.blk_write_time);
1549- }
1550-
1551- Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
1552- api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
1553- api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
1554- -1 /* fail if you forget to update this assert */ ));
1555-
1556- tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1557- }
1558-
1559- /* clean up and return the tuplestore */
1560- LWLockRelease(pgss->lock);
1561-
1562- if (qbuffer)
1563- free(qbuffer);
1564-
1565- tuplestore_donestoring(tupstore);
1566-}
1567-
1568-/*
1569- * Estimate shared memory space needed.
1570- */
1571-static Size
1572-pgss_memsize(void)
1573-{
1574- Size size;
1575-
1576- size = MAXALIGN(sizeof(pgssSharedState));
1577- size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
1578-
1579- return size;
1580-}
1581-
1582-/*
1583- * Allocate a new hashtable entry.
1584- * caller must hold an exclusive lock on pgss->lock
1585- *
1586- * "query" need not be null-terminated; we rely on query_len instead
1587- *
1588- * If "sticky" is true, make the new entry artificially sticky so that it will
1589- * probably still be there when the query finishes execution. We do this by
1590- * giving it a median usage value rather than the normal value. (Strictly
1591- * speaking, query strings are normalized on a best effort basis, though it
1592- * would be difficult to demonstrate this even under artificial conditions.)
1593- *
1594- * Note: despite needing exclusive lock, it's not an error for the target
1595- * entry to already exist. This is because pgss_store releases and
1596- * reacquires lock after failing to find a match; so someone else could
1597- * have made the entry while we waited to get exclusive lock.
1598- */
1599-static pgssEntry *
1600-entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
1601- bool sticky)
1602-{
1603- pgssEntry *entry;
1604- bool found;
1605-
1606- /* Make space if needed */
1607- while (hash_get_num_entries(pgss_hash) >= pgss_max)
1608- entry_dealloc();
1609-
1610- /* Find or create an entry with desired hash code */
1611- entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
1612-
1613- if (!found)
1614- {
1615- /* New entry, initialize it */
1616-
1617- /* reset the statistics */
1618- memset(&entry->counters, 0, sizeof(Counters));
1619- /* set the appropriate initial usage count */
1620- entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
1621- /* re-initialize the mutex each time ... we assume no one using it */
1622- SpinLockInit(&entry->mutex);
1623- /* ... and don't forget the query text metadata */
1624- Assert(query_len >= 0);
1625- entry->query_offset = query_offset;
1626- entry->query_len = query_len;
1627- entry->encoding = encoding;
1628- }
1629-
1630- return entry;
1631-}
1632-
1633-/*
1634- * qsort comparator for sorting into increasing usage order
1635- */
1636-static int
1637-entry_cmp(const void *lhs, const void *rhs)
1638-{
1639- double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
1640- double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
1641-
1642- if (l_usage < r_usage)
1643- return -1;
1644- else if (l_usage > r_usage)
1645- return +1;
1646- else
1647- return 0;
1648-}
1649-
1650-/*
1651- * Deallocate least used entries.
1652- * Caller must hold an exclusive lock on pgss->lock.
1653- */
1654-static void
1655-entry_dealloc(void)
1656-{
1657- HASH_SEQ_STATUS hash_seq;
1658- pgssEntry **entries;
1659- pgssEntry *entry;
1660- int nvictims;
1661- int i;
1662- Size totlen = 0;
1663-
1664- /*
1665- * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
1666- * While we're scanning the table, apply the decay factor to the usage
1667- * values.
1668- */
1669-
1670- entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
1671-
1672- i = 0;
1673- hash_seq_init(&hash_seq, pgss_hash);
1674- while ((entry = hash_seq_search(&hash_seq)) != NULL)
1675- {
1676- entries[i++] = entry;
1677- /* "Sticky" entries get a different usage decay rate. */
1678- if (entry->counters.calls == 0)
1679- entry->counters.usage *= STICKY_DECREASE_FACTOR;
1680- else
1681- entry->counters.usage *= USAGE_DECREASE_FACTOR;
1682- /* Accumulate total size, too. */
1683- totlen += entry->query_len + 1;
1684- }
1685-
1686- qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
1687-
1688- if (i > 0)
1689- {
1690- /* Record the (approximate) median usage */
1691- pgss->cur_median_usage = entries[i / 2]->counters.usage;
1692- /* Record the mean query length */
1693- pgss->mean_query_len = totlen / i;
1694- }
1695-
1696- nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
1697- nvictims = Min(nvictims, i);
1698-
1699- for (i = 0; i < nvictims; i++)
1700- {
1701- hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
1702- }
1703-
1704- pfree(entries);
1705-}
1706-
1707-/*
1708- * Given a null-terminated string, allocate a new entry in the external query
1709- * text file and store the string there.
1710- *
1711- * Although we could compute the string length via strlen(), callers already
1712- * have it handy, so we require them to pass it too.
1713- *
1714- * If successful, returns true, and stores the new entry's offset in the file
1715- * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
1716- * number of garbage collections that have occurred so far.
1717- *
1718- * On failure, returns false.
1719- *
1720- * At least a shared lock on pgss->lock must be held by the caller, so as
1721- * to prevent a concurrent garbage collection. Share-lock-holding callers
1722- * should pass a gc_count pointer to obtain the number of garbage collections,
1723- * so that they can recheck the count after obtaining exclusive lock to
1724- * detect whether a garbage collection occurred (and removed this entry).
1725- */
1726-static bool
1727-qtext_store(const char *query, int query_len,
1728- Size *query_offset, int *gc_count)
1729-{
1730- Size off;
1731- int fd;
1732-
1733- /*
1734- * We use a spinlock to protect extent/n_writers/gc_count, so that
1735- * multiple processes may execute this function concurrently.
1736- */
1737- {
1738- volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
1739-
1740- SpinLockAcquire(&s->mutex);
1741- off = s->extent;
1742- s->extent += query_len + 1;
1743- s->n_writers++;
1744- if (gc_count)
1745- *gc_count = s->gc_count;
1746- SpinLockRelease(&s->mutex);
1747- }
1748-
1749- *query_offset = off;
1750-
1751- /* Now write the data into the successfully-reserved part of the file */
1752- fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY,
1753- S_IRUSR | S_IWUSR);
1754- if (fd < 0)
1755- goto error;
1756-
1757- if (lseek(fd, off, SEEK_SET) != off)
1758- goto error;
1759-
1760- if (write(fd, query, query_len + 1) != query_len + 1)
1761- goto error;
1762-
1763- CloseTransientFile(fd);
1764-
1765- /* Mark our write complete */
1766- {
1767- volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
1768-
1769- SpinLockAcquire(&s->mutex);
1770- s->n_writers--;
1771- SpinLockRelease(&s->mutex);
1772- }
1773-
1774- return true;
1775-
1776-error:
1777- ereport(LOG,
1778- (errcode_for_file_access(),
1779- errmsg("could not write pg_stat_statement file \"%s\": %m",
1780- PGSS_TEXT_FILE)));
1781-
1782- if (fd >= 0)
1783- CloseTransientFile(fd);
1784-
1785- /* Mark our write complete */
1786- {
1787- volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
1788-
1789- SpinLockAcquire(&s->mutex);
1790- s->n_writers--;
1791- SpinLockRelease(&s->mutex);
1792- }
1793-
1794- return false;
1795-}
1796-
1797-/*
1798- * Read the external query text file into a malloc'd buffer.
1799- *
1800- * Returns NULL (without throwing an error) if unable to read, eg
1801- * file not there or insufficient memory.
1802- *
1803- * On success, the buffer size is also returned into *buffer_size.
1804- *
1805- * This can be called without any lock on pgss->lock, but in that case
1806- * the caller is responsible for verifying that the result is sane.
1807- */
1808-static char *
1809-qtext_load_file(Size *buffer_size)
1810-{
1811- char *buf;
1812- int fd;
1813- struct stat stat;
1814-
1815- fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDONLY | PG_BINARY, 0);
1816- if (fd < 0)
1817- {
1818- if (errno != ENOENT)
1819- ereport(LOG,
1820- (errcode_for_file_access(),
1821- errmsg("could not read pg_stat_statement file \"%s\": %m",
1822- PGSS_TEXT_FILE)));
1823- return NULL;
1824- }
1825-
1826- /* Get file length */
1827- if (fstat(fd, &stat))
1828- {
1829- ereport(LOG,
1830- (errcode_for_file_access(),
1831- errmsg("could not stat pg_stat_statement file \"%s\": %m",
1832- PGSS_TEXT_FILE)));
1833- CloseTransientFile(fd);
1834- return NULL;
1835- }
1836-
1837- /* Allocate buffer; beware that off_t might be wider than size_t */
1838- if (stat.st_size <= MaxAllocSize)
1839- buf = (char *) malloc(stat.st_size);
1840- else
1841- buf = NULL;
1842- if (buf == NULL)
1843- {
1844- ereport(LOG,
1845- (errcode(ERRCODE_OUT_OF_MEMORY),
1846- errmsg("out of memory")));
1847- CloseTransientFile(fd);
1848- return NULL;
1849- }
1850-
1851- /*
1852- * OK, slurp in the file. If we get a short read and errno doesn't get
1853- * set, the reason is probably that garbage collection truncated the file
1854- * since we did the fstat(), so we don't log a complaint --- but we don't
1855- * return the data, either, since it's most likely corrupt due to
1856- * concurrent writes from garbage collection.
1857- */
1858- errno = 0;
1859- if (read(fd, buf, stat.st_size) != stat.st_size)
1860- {
1861- if (errno)
1862- ereport(LOG,
1863- (errcode_for_file_access(),
1864- errmsg("could not read pg_stat_statement file \"%s\": %m",
1865- PGSS_TEXT_FILE)));
1866- free(buf);
1867- CloseTransientFile(fd);
1868- return NULL;
1869- }
1870-
1871- CloseTransientFile(fd);
1872-
1873- *buffer_size = stat.st_size;
1874- return buf;
1875-}
1876-
1877-/*
1878- * Locate a query text in the file image previously read by qtext_load_file().
1879- *
1880- * We validate the given offset/length, and return NULL if bogus. Otherwise,
1881- * the result points to a null-terminated string within the buffer.
1882- */
1883-static char *
1884-qtext_fetch(Size query_offset, int query_len,
1885- char *buffer, Size buffer_size)
1886-{
1887- /* File read failed? */
1888- if (buffer == NULL)
1889- return NULL;
1890- /* Bogus offset/length? */
1891- if (query_len < 0 ||
1892- query_offset + query_len >= buffer_size)
1893- return NULL;
1894- /* As a further sanity check, make sure there's a trailing null */
1895- if (buffer[query_offset + query_len] != '\0')
1896- return NULL;
1897- /* Looks OK */
1898- return buffer + query_offset;
1899-}
1900-
1901-/*
1902- * Do we need to garbage-collect the external query text file?
1903- *
1904- * Caller should hold at least a shared lock on pgss->lock.
1905- */
1906-static bool
1907-need_gc_qtexts(void)
1908-{
1909- Size extent;
1910-
1911- /* Read shared extent pointer */
1912- {
1913- volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
1914-
1915- SpinLockAcquire(&s->mutex);
1916- extent = s->extent;
1917- SpinLockRelease(&s->mutex);
1918- }
1919-
1920- /* Don't proceed if file does not exceed 512 bytes per possible entry */
1921- if (extent < 512 * pgss_max)
1922- return false;
1923-
1924- /*
1925- * Don't proceed if file is less than about 50% bloat. Nothing can or
1926- * should be done in the event of unusually large query texts accounting
1927- * for file's large size. We go to the trouble of maintaining the mean
1928- * query length in order to prevent garbage collection from thrashing
1929- * uselessly.
1930- */
1931- if (extent < pgss->mean_query_len * pgss_max * 2)
1932- return false;
1933-
1934- return true;
1935-}
1936-
1937-/*
1938- * Garbage-collect orphaned query texts in external file.
1939- *
1940- * This won't be called often in the typical case, since it's likely that
1941- * there won't be too much churn, and besides, a similar compaction process
1942- * occurs when serializing to disk at shutdown or as part of resetting.
1943- * Despite this, it seems prudent to plan for the edge case where the file
1944- * becomes unreasonably large, with no other method of compaction likely to
1945- * occur in the foreseeable future.
1946- *
1947- * The caller must hold an exclusive lock on pgss->lock.
1948- */
1949-static void
1950-gc_qtexts(void)
1951-{
1952- char *qbuffer;
1953- Size qbuffer_size;
1954- FILE *qfile;
1955- HASH_SEQ_STATUS hash_seq;
1956- pgssEntry *entry;
1957- Size extent;
1958- int nentries;
1959-
1960- /*
1961- * When called from pgss_store, some other session might have proceeded
1962- * with garbage collection in the no-lock-held interim of lock strength
1963- * escalation. Check once more that this is actually necessary.
1964- */
1965- if (!need_gc_qtexts())
1966- return;
1967-
1968- /*
1969- * Load the old texts file. If we fail (out of memory, for instance) just
1970- * skip the garbage collection.
1971- */
1972- qbuffer = qtext_load_file(&qbuffer_size);
1973- if (qbuffer == NULL)
1974- return;
1975-
1976- /*
1977- * We overwrite the query texts file in place, so as to reduce the risk of
1978- * an out-of-disk-space failure. Since the file is guaranteed not to get
1979- * larger, this should always work on traditional filesystems; though we
1980- * could still lose on copy-on-write filesystems.
1981- */
1982- qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
1983- if (qfile == NULL)
1984- {
1985- ereport(LOG,
1986- (errcode_for_file_access(),
1987- errmsg("could not write pg_stat_statement file \"%s\": %m",
1988- PGSS_TEXT_FILE)));
1989- goto gc_fail;
1990- }
1991-
1992- extent = 0;
1993- nentries = 0;
1994-
1995- hash_seq_init(&hash_seq, pgss_hash);
1996- while ((entry = hash_seq_search(&hash_seq)) != NULL)
1997- {
1998- int query_len = entry->query_len;
1999- char *qry = qtext_fetch(entry->query_offset,
2000- query_len,
2001- qbuffer,
2002- qbuffer_size);
2003-
2004- if (qry == NULL)
2005- {
2006- /* Trouble ... drop the text */
2007- entry->query_offset = 0;
2008- entry->query_len = -1;
2009- continue;
2010- }
2011-
2012- if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2013- {
2014- ereport(LOG,
2015- (errcode_for_file_access(),
2016- errmsg("could not write pg_stat_statement file \"%s\": %m",
2017- PGSS_TEXT_FILE)));
2018- hash_seq_term(&hash_seq);
2019- goto gc_fail;
2020- }
2021-
2022- entry->query_offset = extent;
2023- extent += query_len + 1;
2024- nentries++;
2025- }
2026-
2027- /*
2028- * Truncate away any now-unused space. If this fails for some odd reason,
2029- * we log it, but there's no need to fail.
2030- */
2031- if (ftruncate(fileno(qfile), extent) != 0)
2032- ereport(LOG,
2033- (errcode_for_file_access(),
2034- errmsg("could not truncate pg_stat_statement file \"%s\": %m",
2035- PGSS_TEXT_FILE)));
2036-
2037- if (FreeFile(qfile))
2038- {
2039- ereport(LOG,
2040- (errcode_for_file_access(),
2041- errmsg("could not write pg_stat_statement file \"%s\": %m",
2042- PGSS_TEXT_FILE)));
2043- qfile = NULL;
2044- goto gc_fail;
2045- }
2046-
2047- elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2048- pgss->extent, extent);
2049-
2050- /* Reset the shared extent pointer */
2051- pgss->extent = extent;
2052-
2053- /*
2054- * Also update the mean query length, to be sure that need_gc_qtexts()
2055- * won't still think we have a problem.
2056- */
2057- if (nentries > 0)
2058- pgss->mean_query_len = extent / nentries;
2059- else
2060- pgss->mean_query_len = ASSUMED_LENGTH_INIT;
2061-
2062- free(qbuffer);
2063-
2064- /*
2065- * OK, count a garbage collection cycle. (Note: even though we have
2066- * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2067- * other processes may examine gc_count while holding only the mutex.
2068- * Also, we have to advance the count *after* we've rewritten the file,
2069- * else other processes might not realize they read a stale file.)
2070- */
2071- record_gc_qtexts();
2072-
2073- return;
2074-
2075-gc_fail:
2076- /* clean up resources */
2077- if (qfile)
2078- FreeFile(qfile);
2079- if (qbuffer)
2080- free(qbuffer);
2081-
2082- /*
2083- * Since the contents of the external file are now uncertain, mark all
2084- * hashtable entries as having invalid texts.
2085- */
2086- hash_seq_init(&hash_seq, pgss_hash);
2087- while ((entry = hash_seq_search(&hash_seq)) != NULL)
2088- {
2089- entry->query_offset = 0;
2090- entry->query_len = -1;
2091- }
2092-
2093- /* Seems like a good idea to bump the GC count even though we failed */
2094- record_gc_qtexts();
2095-}
2096-
2097-/*
2098- * Release all entries.
9+ *-------------------------------------------------------------------------
209910 */
2100-static void
2101-entry_reset(void)
2102-{
2103- HASH_SEQ_STATUS hash_seq;
2104- pgssEntry *entry;
2105- FILE *qfile;
2106-
2107- LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
2108-
2109- hash_seq_init(&hash_seq, pgss_hash);
2110- while ((entry = hash_seq_search(&hash_seq)) != NULL)
2111- {
2112- hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
2113- }
2114-
2115- /*
2116- * Write new empty query file, perhaps even creating a new one to recover
2117- * if the file was missing.
2118- */
2119- qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
2120- if (qfile == NULL)
2121- {
2122- ereport(LOG,
2123- (errcode_for_file_access(),
2124- errmsg("could not create pg_stat_statement file \"%s\": %m",
2125- PGSS_TEXT_FILE)));
2126- goto done;
2127- }
2128-
2129- /* If ftruncate fails, log it, but it's not a fatal problem */
2130- if (ftruncate(fileno(qfile), 0) != 0)
2131- ereport(LOG,
2132- (errcode_for_file_access(),
2133- errmsg("could not truncate pg_stat_statement file \"%s\": %m",
2134- PGSS_TEXT_FILE)));
11+#include "postgres.h"
213512
2136- FreeFile(qfile);
13+#include <sys/stat.h>
213714
2138-done:
2139- pgss->extent = 0;
2140- /* This counts as a query text garbage collection for our purposes */
2141- record_gc_qtexts();
15+#include "access/hash.h"
16+#include "parser/scanner.h"
214217
2143- LWLockRelease(pgss->lock);
2144-}
2145-#endif
18+static void AppendJumble(pgssJumbleState *jstate,
19+ const unsigned char *item, Size size);
20+static void JumbleQuery(pgssJumbleState *jstate, Query *query);
21+static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
22+static void JumbleExpr(pgssJumbleState *jstate, Node *node);
23+static void RecordConstLocation(pgssJumbleState *jstate, int location);
24+static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query);
25+static int comp_location(const void *a, const void *b);
214626
214727 /*
214828 * AppendJumble: Append a value that is substantive in a given query to
@@ -2209,8 +89,10 @@ JumbleQuery(pgssJumbleState *jstate, Query *query)
220989 JumbleRangeTable(jstate, query->rtable);
221090 JumbleExpr(jstate, (Node *) query->jointree);
221191 JumbleExpr(jstate, (Node *) query->targetList);
92+ JumbleExpr(jstate, (Node *) query->onConflict);
221293 JumbleExpr(jstate, (Node *) query->returningList);
221394 JumbleExpr(jstate, (Node *) query->groupClause);
95+ JumbleExpr(jstate, (Node *) query->groupingSets);
221496 JumbleExpr(jstate, query->havingQual);
221597 JumbleExpr(jstate, (Node *) query->windowClause);
221698 JumbleExpr(jstate, (Node *) query->distinctClause);
@@ -2239,6 +121,7 @@ JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
2239121 {
2240122 case RTE_RELATION:
2241123 APP_JUMB(rte->relid);
124+ JumbleExpr(jstate, (Node *) rte->tablesample);
2242125 break;
2243126 case RTE_SUBQUERY:
2244127 JumbleQuery(jstate, rte->subquery);
@@ -2341,6 +224,13 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
2341224 JumbleExpr(jstate, (Node *) expr->aggfilter);
2342225 }
2343226 break;
227+ case T_GroupingFunc:
228+ {
229+ GroupingFunc *grpnode = (GroupingFunc *) node;
230+
231+ JumbleExpr(jstate, (Node *) grpnode->refs);
232+ }
233+ break;
2344234 case T_WindowFunc:
2345235 {
2346236 WindowFunc *expr = (WindowFunc *) node;
@@ -2576,6 +466,15 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
2576466 APP_JUMB(ce->cursor_param);
2577467 }
2578468 break;
469+ case T_InferenceElem:
470+ {
471+ InferenceElem *ie = (InferenceElem *) node;
472+
473+ APP_JUMB(ie->infercollid);
474+ APP_JUMB(ie->inferopclass);
475+ JumbleExpr(jstate, ie->expr);
476+ }
477+ break;
2579478 case T_TargetEntry:
2580479 {
2581480 TargetEntry *tle = (TargetEntry *) node;
@@ -2612,12 +511,32 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
2612511 JumbleExpr(jstate, from->quals);
2613512 }
2614513 break;
514+ case T_OnConflictExpr:
515+ {
516+ OnConflictExpr *conf = (OnConflictExpr *) node;
517+
518+ APP_JUMB(conf->action);
519+ JumbleExpr(jstate, (Node *) conf->arbiterElems);
520+ JumbleExpr(jstate, conf->arbiterWhere);
521+ JumbleExpr(jstate, (Node *) conf->onConflictSet);
522+ JumbleExpr(jstate, conf->onConflictWhere);
523+ APP_JUMB(conf->constraint);
524+ APP_JUMB(conf->exclRelIndex);
525+ JumbleExpr(jstate, (Node *) conf->exclRelTlist);
526+ }
527+ break;
2615528 case T_List:
2616529 foreach(temp, (List *) node)
2617530 {
2618531 JumbleExpr(jstate, (Node *) lfirst(temp));
2619532 }
2620533 break;
534+ case T_IntList:
535+ foreach(temp, (List *) node)
536+ {
537+ APP_JUMB(lfirst_int(temp));
538+ }
539+ break;
2621540 case T_SortGroupClause:
2622541 {
2623542 SortGroupClause *sgc = (SortGroupClause *) node;
@@ -2628,6 +547,13 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
2628547 APP_JUMB(sgc->nulls_first);
2629548 }
2630549 break;
550+ case T_GroupingSet:
551+ {
552+ GroupingSet *gsnode = (GroupingSet *) node;
553+
554+ JumbleExpr(jstate, (Node *) gsnode->content);
555+ }
556+ break;
2631557 case T_WindowClause:
2632558 {
2633559 WindowClause *wc = (WindowClause *) node;
@@ -2666,6 +592,15 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
2666592 JumbleExpr(jstate, rtfunc->funcexpr);
2667593 }
2668594 break;
595+ case T_TableSampleClause:
596+ {
597+ TableSampleClause *tsc = (TableSampleClause *) node;
598+
599+ APP_JUMB(tsc->tsmhandler);
600+ JumbleExpr(jstate, (Node *) tsc->args);
601+ JumbleExpr(jstate, (Node *) tsc->repeatable);
602+ }
603+ break;
2669604 default:
2670605 /* Only a warning, since we can stumble along anyway */
2671606 elog(WARNING, "unrecognized node type: %d",
@@ -2827,6 +762,9 @@ fill_in_constant_lengths(pgssJumbleState *jstate, const char *query)
2827762 ScanKeywords,
2828763 NumScanKeywords);
2829764
765+ /* we don't want to re-emit any escape string warnings */
766+ yyextra.escape_string_warning = false;
767+
2830768 /* Search for each constant, in sequence */
2831769 for (i = 0; i < jstate->clocations_count; i++)
2832770 {
--- a/sql/pg_hint_plan.sql
+++ b/sql/pg_hint_plan.sql
@@ -338,7 +338,7 @@ EXPLAIN (COSTS false) SELECT * FROM t1 FULL OUTER JOIN t2 ON (t1.id = t2.id);
338338 /*+NestLoop(t1 t2)*/
339339 EXPLAIN (COSTS false) SELECT * FROM t1 FULL OUTER JOIN t2 ON (t1.id = t2.id);
340340
341--- inherite table test
341+-- inheritance tables test
342342 SET constraint_exclusion TO off;
343343 EXPLAIN (COSTS false) SELECT * FROM p1 WHERE id >= 50 AND id <= 51 AND p1.ctid = '(1,1)';
344344 SET constraint_exclusion TO on;
Afficher sur ancien navigateur de dépôt.