# 2013 August 3 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # This file contains automated tests used to verify that the sqlite_stat4 # functionality is working. # set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix analyze9 ifcapable !stat4 { finish_test return } proc s {blob} { set ret "" binary scan $blob c* bytes foreach b $bytes { set t [binary format c $b] if {[string is print $t]} { append ret $t } else { append ret . } } return $ret } db function s s do_execsql_test 1.0 { CREATE TABLE t1(a TEXT, b TEXT); INSERT INTO t1 VALUES('(0)', '(0)'); INSERT INTO t1 VALUES('(1)', '(1)'); INSERT INTO t1 VALUES('(2)', '(2)'); INSERT INTO t1 VALUES('(3)', '(3)'); INSERT INTO t1 VALUES('(4)', '(4)'); CREATE INDEX i1 ON t1(a, b); } {} do_execsql_test 1.1 { ANALYZE; } {} do_execsql_test 1.2 { SELECT tbl,idx,nEq,nLt,nDLt,test_decode(sample) FROM sqlite_stat4; } { t1 i1 {1 1 1} {0 0 0} {0 0 0} {(0) (0) 1} t1 i1 {1 1 1} {1 1 1} {1 1 1} {(1) (1) 2} t1 i1 {1 1 1} {2 2 2} {2 2 2} {(2) (2) 3} t1 i1 {1 1 1} {3 3 3} {3 3 3} {(3) (3) 4} t1 i1 {1 1 1} {4 4 4} {4 4 4} {(4) (4) 5} } if {[permutation] != "utf16"} { do_execsql_test 1.3 { SELECT tbl,idx,nEq,nLt,nDLt,s(sample) FROM sqlite_stat4; } { t1 i1 {1 1 1} {0 0 0} {0 0 0} ....(0)(0) t1 i1 {1 1 1} {1 1 1} {1 1 1} ....(1)(1). t1 i1 {1 1 1} {2 2 2} {2 2 2} ....(2)(2). t1 i1 {1 1 1} {3 3 3} {3 3 3} ....(3)(3). t1 i1 {1 1 1} {4 4 4} {4 4 4} ....(4)(4). } } #------------------------------------------------------------------------- # This is really just to test SQL user function "test_decode". # reset_db do_execsql_test 2.1 { CREATE TABLE t1(a, b, c); INSERT INTO t1 VALUES('some text', 14, NULL); INSERT INTO t1 VALUES(22.0, NULL, x'656667'); CREATE INDEX i1 ON t1(a, b, c); ANALYZE; SELECT test_decode(sample) FROM sqlite_stat4; } { {22.0 NULL x'656667' 2} {{some text} 14 NULL 1} } #------------------------------------------------------------------------- # reset_db do_execsql_test 3.1 { CREATE TABLE t2(a, b); CREATE INDEX i2 ON t2(a, b); BEGIN; } do_test 3.2 { for {set i 0} {$i < 1000} {incr i} { set a [expr $i / 10] set b [expr int(rand() * 15.0)] execsql { INSERT INTO t2 VALUES($a, $b) } } execsql COMMIT } {} db func lindex lindex # Each value of "a" occurs exactly 10 times in the table. # do_execsql_test 3.3.1 { SELECT count(*) FROM t2 GROUP BY a; } [lrange [string repeat "10 " 100] 0 99] # The first element in the "nEq" list of all samples should therefore be 10. # do_execsql_test 3.3.2 { ANALYZE; SELECT lindex(nEq, 0) FROM sqlite_stat4; } [lrange [string repeat "10 " 100] 0 23] #------------------------------------------------------------------------- # do_execsql_test 3.4 { DROP TABLE IF EXISTS t1; CREATE TABLE t1(a INTEGER PRIMARY KEY, b, c); INSERT INTO t1 VALUES(1, 1, 'one-a'); INSERT INTO t1 VALUES(11, 1, 'one-b'); INSERT INTO t1 VALUES(21, 1, 'one-c'); INSERT INTO t1 VALUES(31, 1, 'one-d'); INSERT INTO t1 VALUES(41, 1, 'one-e'); INSERT INTO t1 VALUES(51, 1, 'one-f'); INSERT INTO t1 VALUES(61, 1, 'one-g'); INSERT INTO t1 VALUES(71, 1, 'one-h'); INSERT INTO t1 VALUES(81, 1, 'one-i'); INSERT INTO t1 VALUES(91, 1, 'one-j'); INSERT INTO t1 SELECT a+1,2,'two' || substr(c,4) FROM t1; INSERT INTO t1 SELECT a+2,3,'three'||substr(c,4) FROM t1 WHERE c GLOB 'one-*'; INSERT INTO t1 SELECT a+3,4,'four'||substr(c,4) FROM t1 WHERE c GLOB 'one-*'; INSERT INTO t1 SELECT a+4,5,'five'||substr(c,4) FROM t1 WHERE c GLOB 'one-*'; INSERT INTO t1 SELECT a+5,6,'six'||substr(c,4) FROM t1 WHERE c GLOB 'one-*'; CREATE INDEX t1b ON t1(b); ANALYZE; SELECT c FROM t1 WHERE b=3 AND a BETWEEN 30 AND 60; } {three-d three-e three-f} #------------------------------------------------------------------------- # These tests verify that the sample selection for stat4 appears to be # working as designed. # reset_db db func lindex lindex db func lrange lrange do_execsql_test 4.0 { DROP TABLE IF EXISTS t1; CREATE TABLE t1(a, b, c); CREATE INDEX i1 ON t1(c, b, a); } proc insert_filler_rows_n {iStart args} { set A(-ncopy) 1 set A(-nval) 1 foreach {k v} $args { if {[info exists A($k)]==0} { error "no such option: $k" } set A($k) $v } if {[llength $args] % 2} { error "option requires an argument: [lindex $args end]" } for {set i 0} {$i < $A(-nval)} {incr i} { set iVal [expr $iStart+$i] for {set j 0} {$j < $A(-ncopy)} {incr j} { execsql { INSERT INTO t1 VALUES($iVal, $iVal, $iVal) } } } } do_test 4.1 { execsql { BEGIN } insert_filler_rows_n 0 -ncopy 10 -nval 19 insert_filler_rows_n 20 -ncopy 1 -nval 100 execsql { INSERT INTO t1(c, b, a) VALUES(200, 1, 'a'); INSERT INTO t1(c, b, a) VALUES(200, 1, 'b'); INSERT INTO t1(c, b, a) VALUES(200, 1, 'c'); INSERT INTO t1(c, b, a) VALUES(200, 2, 'e'); INSERT INTO t1(c, b, a) VALUES(200, 2, 'f'); INSERT INTO t1(c, b, a) VALUES(201, 3, 'g'); INSERT INTO t1(c, b, a) VALUES(201, 4, 'h'); ANALYZE; SELECT count(*) FROM sqlite_stat4; SELECT count(*) FROM t1; } } {24 297} do_execsql_test 4.2 { SELECT neq, lrange(nlt, 0, 2), lrange(ndlt, 0, 2), lrange(test_decode(sample), 0, 2) FROM sqlite_stat4 ORDER BY rowid LIMIT 16; } { {10 10 10 1} {0 0 0} {0 0 0} {0 0 0} {10 10 10 1} {10 10 10} {1 1 1} {1 1 1} {10 10 10 1} {20 20 20} {2 2 2} {2 2 2} {10 10 10 1} {30 30 30} {3 3 3} {3 3 3} {10 10 10 1} {40 40 40} {4 4 4} {4 4 4} {10 10 10 1} {50 50 50} {5 5 5} {5 5 5} {10 10 10 1} {60 60 60} {6 6 6} {6 6 6} {10 10 10 1} {70 70 70} {7 7 7} {7 7 7} {10 10 10 1} {80 80 80} {8 8 8} {8 8 8} {10 10 10 1} {90 90 90} {9 9 9} {9 9 9} {10 10 10 1} {100 100 100} {10 10 10} {10 10 10} {10 10 10 1} {110 110 110} {11 11 11} {11 11 11} {10 10 10 1} {120 120 120} {12 12 12} {12 12 12} {10 10 10 1} {130 130 130} {13 13 13} {13 13 13} {10 10 10 1} {140 140 140} {14 14 14} {14 14 14} {10 10 10 1} {150 150 150} {15 15 15} {15 15 15} } do_execsql_test 4.3 { SELECT neq, lrange(nlt, 0, 2), lrange(ndlt, 0, 2), lrange(test_decode(sample), 0, 1) FROM sqlite_stat4 ORDER BY rowid DESC LIMIT 2; } { {2 1 1 1} {295 296 296} {120 122 125} {201 4} {5 3 1 1} {290 290 290} {119 119 119} {200 1} } do_execsql_test 4.4 { SELECT count(DISTINCT c) FROM t1 WHERE c<201 } 120 do_execsql_test 4.5 { SELECT count(DISTINCT c) FROM t1 WHERE c<200 } 119 # Check that the perioidic samples are present. do_execsql_test 4.6 { SELECT count(*) FROM sqlite_stat4 WHERE lindex(test_decode(sample), 3) IN ('34', '68', '102', '136', '170', '204', '238', '272') } {8} reset_db do_test 4.7 { execsql { BEGIN; CREATE TABLE t1(o,t INTEGER PRIMARY KEY); CREATE INDEX i1 ON t1(o); } for {set i 0} {$i<10000} {incr i [expr (($i<1000)?1:10)]} { execsql { INSERT INTO t1 VALUES('x', $i) } } execsql { COMMIT; ANALYZE; SELECT count(*) FROM sqlite_stat4; } } {8} do_execsql_test 4.8 { SELECT test_decode(sample) FROM sqlite_stat4; } { {x 211} {x 423} {x 635} {x 847} {x 1590} {x 3710} {x 5830} {x 7950} } #------------------------------------------------------------------------- # The following would cause a crash at one point. # reset_db do_execsql_test 5.1 { PRAGMA encoding = 'utf-16'; CREATE TABLE t0(v); ANALYZE; } #------------------------------------------------------------------------- # This was also crashing (corrupt sqlite_stat4 table). # reset_db do_execsql_test 6.1 { CREATE TABLE t1(a, b); CREATE INDEX i1 ON t1(a); CREATE INDEX i2 ON t1(b); INSERT INTO t1 VALUES(1, 1); INSERT INTO t1 VALUES(2, 2); INSERT INTO t1 VALUES(3, 3); INSERT INTO t1 VALUES(4, 4); INSERT INTO t1 VALUES(5, 5); ANALYZE; PRAGMA writable_schema = 1; CREATE TEMP TABLE x1 AS SELECT tbl,idx,neq,nlt,ndlt,sample FROM sqlite_stat4 ORDER BY (rowid%5), rowid; DELETE FROM sqlite_stat4; INSERT INTO sqlite_stat4 SELECT * FROM x1; PRAGMA writable_schema = 0; ANALYZE sqlite_master; } do_execsql_test 6.2 { SELECT * FROM t1 WHERE a = 'abc'; } #------------------------------------------------------------------------- # The following tests experiment with adding corrupted records to the # 'sample' column of the sqlite_stat4 table. # reset_db sqlite3_db_config_lookaside db 0 0 0 database_may_be_corrupt do_execsql_test 7.1 { CREATE TABLE t1(a, b); CREATE INDEX i1 ON t1(a, b); INSERT INTO t1 VALUES(1, 1); INSERT INTO t1 VALUES(2, 2); INSERT INTO t1 VALUES(3, 3); INSERT INTO t1 VALUES(4, 4); INSERT INTO t1 VALUES(5, 5); ANALYZE; UPDATE sqlite_stat4 SET sample = X'' WHERE rowid = 1; ANALYZE sqlite_master; } do_execsql_test 7.2 { UPDATE sqlite_stat4 SET sample = X'FFFF'; ANALYZE sqlite_master; SELECT * FROM t1 WHERE a = 1; } {1 1} do_execsql_test 7.3 { ANALYZE; UPDATE sqlite_stat4 SET neq = '0 0 0'; ANALYZE sqlite_master; SELECT * FROM t1 WHERE a = 1; } {1 1} do_execsql_test 7.4 { ANALYZE; UPDATE sqlite_stat4 SET ndlt = '0 0 0'; ANALYZE sqlite_master; SELECT * FROM t1 WHERE a = 3; } {3 3} do_execsql_test 7.5 { ANALYZE; UPDATE sqlite_stat4 SET nlt = '0 0 0'; ANALYZE sqlite_master; SELECT * FROM t1 WHERE a = 5; } {5 5} database_never_corrupt #------------------------------------------------------------------------- # reset_db do_execsql_test 8.1 { CREATE TABLE t1(x TEXT); CREATE INDEX i1 ON t1(x); INSERT INTO t1 VALUES('1'); INSERT INTO t1 VALUES('2'); INSERT INTO t1 VALUES('3'); INSERT INTO t1 VALUES('4'); ANALYZE; } do_execsql_test 8.2 { SELECT * FROM t1 WHERE x = 3; } {3} #------------------------------------------------------------------------- # Check that the bug fixed by [91733bc485] really is fixed. # reset_db do_execsql_test 9.1 { CREATE TABLE t1(a, b, c, d, e); CREATE INDEX i1 ON t1(a, b, c, d); CREATE INDEX i2 ON t1(e); } do_test 9.2 { execsql BEGIN; for {set i 0} {$i < 100} {incr i} { execsql "INSERT INTO t1 VALUES('x', 'y', 'z', $i, [expr $i/2])" } for {set i 0} {$i < 21} {incr i} { execsql "INSERT INTO t1 VALUES('x', 'y', 'z', 101, $i)" } for {set i 102} {$i < 200} {incr i} { execsql "INSERT INTO t1 VALUES('x', 'y', 'z', $i, [expr $i/2])" } execsql COMMIT execsql ANALYZE } {} do_eqp_test 9.3.1 { SELECT * FROM t1 WHERE a='x' AND b='y' AND c='z' AND d=101 AND e=5; } {/t1 USING INDEX i2/} do_eqp_test 9.3.2 { SELECT * FROM t1 WHERE a='x' AND b='y' AND c='z' AND d=99 AND e=5; } {/t1 USING INDEX i1/} set value_d [expr 101] do_eqp_test 9.4.1 { SELECT * FROM t1 WHERE a='x' AND b='y' AND c='z' AND d=$value_d AND e=5 } {/t1 USING INDEX i2/} set value_d [expr 99] do_eqp_test 9.4.2 { SELECT * FROM t1 WHERE a='x' AND b='y' AND c='z' AND d=$value_d AND e=5 } {/t1 USING INDEX i1/} #------------------------------------------------------------------------- # Check that the planner takes stat4 data into account when considering # "IS NULL" and "IS NOT NULL" constraints. # do_execsql_test 10.1.1 { DROP TABLE IF EXISTS t3; CREATE TABLE t3(a, b); CREATE INDEX t3a ON t3(a); CREATE INDEX t3b ON t3(b); } do_test 10.1.2 { for {set i 1} {$i < 100} {incr i} { if {$i>90} { set a $i } else { set a NULL } set b [expr $i % 5] execsql "INSERT INTO t3 VALUES($a, $b)" } execsql ANALYZE } {} do_eqp_test 10.1.3 { SELECT * FROM t3 WHERE a IS NULL AND b = 2 } {/t3 USING INDEX t3b/} do_eqp_test 10.1.4 { SELECT * FROM t3 WHERE a IS NOT NULL AND b = 2 } {/t3 USING INDEX t3a/} do_execsql_test 10.2.1 { DROP TABLE IF EXISTS t3; CREATE TABLE t3(x, a, b); CREATE INDEX t3a ON t3(x, a); CREATE INDEX t3b ON t3(x, b); } do_test 10.2.2 { for {set i 1} {$i < 100} {incr i} { if {$i>90} { set a $i } else { set a NULL } set b [expr $i % 5] execsql "INSERT INTO t3 VALUES('xyz', $a, $b)" } execsql ANALYZE } {} do_eqp_test 10.2.3 { SELECT * FROM t3 WHERE x = 'xyz' AND a IS NULL AND b = 2 } {/t3 USING INDEX t3b/} do_eqp_test 10.2.4 { SELECT * FROM t3 WHERE x = 'xyz' AND a IS NOT NULL AND b = 2 } {/t3 USING INDEX t3a/} #------------------------------------------------------------------------- # Check that stat4 data is used correctly with non-default collation # sequences. # foreach {tn schema} { 1 { CREATE TABLE t4(a COLLATE nocase, b); CREATE INDEX t4a ON t4(a); CREATE INDEX t4b ON t4(b); } 2 { CREATE TABLE t4(a, b); CREATE INDEX t4a ON t4(a COLLATE nocase); CREATE INDEX t4b ON t4(b); } } { drop_all_tables do_test 11.$tn.1 { execsql $schema } {} do_test 11.$tn.2 { for {set i 0} {$i < 100} {incr i} { if { ($i % 10)==0 } { set a ABC } else { set a DEF } set b [expr $i % 5] execsql { INSERT INTO t4 VALUES($a, $b) } } execsql ANALYZE } {} do_eqp_test 11.$tn.3 { SELECT * FROM t4 WHERE a = 'def' AND b = 3; } {/t4 USING INDEX t4b/} if {$tn==1} { set sql "SELECT * FROM t4 WHERE a = 'abc' AND b = 3;" do_eqp_test 11.$tn.4 $sql {/t4 USING INDEX t4a/} } else { set sql "SELECT * FROM t4 WHERE a = 'abc' COLLATE nocase AND b = 3;" do_eqp_test 11.$tn.5 $sql {/t4 USING INDEX t4a/} set sql "SELECT * FROM t4 WHERE a COLLATE nocase = 'abc' AND b = 3;" do_eqp_test 11.$tn.6 $sql {/t4 USING INDEX t4a/} } } foreach {tn schema} { 1 { CREATE TABLE t4(x, a COLLATE nocase, b); CREATE INDEX t4a ON t4(x, a); CREATE INDEX t4b ON t4(x, b); } 2 { CREATE TABLE t4(x, a, b); CREATE INDEX t4a ON t4(x, a COLLATE nocase); CREATE INDEX t4b ON t4(x, b); } } { drop_all_tables do_test 12.$tn.1 { execsql $schema } {} do_test 12.$tn.2 { for {set i 0} {$i < 100} {incr i} { if { ($i % 10)==0 } { set a ABC } else { set a DEF } set b [expr $i % 5] execsql { INSERT INTO t4 VALUES(X'abcdef', $a, $b) } } execsql ANALYZE } {} do_eqp_test 12.$tn.3 { SELECT * FROM t4 WHERE x=X'abcdef' AND a = 'def' AND b = 3; } {/t4 USING INDEX t4b/} if {$tn==1} { set sql "SELECT * FROM t4 WHERE x=X'abcdef' AND a = 'abc' AND b = 3;" do_eqp_test 12.$tn.4 $sql {/t4 USING INDEX t4a/} } else { set sql { SELECT * FROM t4 WHERE x=X'abcdef' AND a = 'abc' COLLATE nocase AND b = 3 } do_eqp_test 12.$tn.5 $sql {/t4 USING INDEX t4a/} set sql { SELECT * FROM t4 WHERE x=X'abcdef' AND a COLLATE nocase = 'abc' AND b = 3 } do_eqp_test 12.$tn.6 $sql {/t4 USING INDEX t4a/} } } #------------------------------------------------------------------------- # Check that affinities are taken into account when using stat4 data to # estimate the number of rows scanned by a rowid constraint. # drop_all_tables do_test 13.1 { execsql { CREATE TABLE t1(a, b, c, d); CREATE INDEX i1 ON t1(a); CREATE INDEX i2 ON t1(b, c); } for {set i 0} {$i<100} {incr i} { if {$i %2} {set a abc} else {set a def} execsql { INSERT INTO t1(rowid, a, b, c) VALUES($i, $a, $i, $i) } } execsql ANALYZE } {} do_eqp_test 13.2.1 { SELECT * FROM t1 WHERE a='abc' AND rowid<15 AND b<12 } {/SEARCH t1 USING INDEX i1/} do_eqp_test 13.2.2 { SELECT * FROM t1 WHERE a='abc' AND rowid<'15' AND b<12 } {/SEARCH t1 USING INDEX i1/} do_eqp_test 13.3.1 { SELECT * FROM t1 WHERE a='abc' AND rowid<100 AND b<12 } {/SEARCH t1 USING INDEX i2/} do_eqp_test 13.3.2 { SELECT * FROM t1 WHERE a='abc' AND rowid<'100' AND b<12 } {/SEARCH t1 USING INDEX i2/} #------------------------------------------------------------------------- # Check also that affinities are taken into account when using stat4 data # to estimate the number of rows scanned by any other constraint on a # column other than the leftmost. # drop_all_tables do_test 14.1 { execsql { CREATE TABLE t1(a, b INTEGER, c) } for {set i 0} {$i<100} {incr i} { set c [expr $i % 3] execsql { INSERT INTO t1 VALUES('ott', $i, $c) } } execsql { CREATE INDEX i1 ON t1(a, b); CREATE INDEX i2 ON t1(c); ANALYZE; } } {} do_eqp_test 13.2.1 { SELECT * FROM t1 WHERE a='ott' AND b<10 AND c=1 } {/SEARCH t1 USING INDEX i1/} do_eqp_test 13.2.2 { SELECT * FROM t1 WHERE a='ott' AND b<'10' AND c=1 } {/SEARCH t1 USING INDEX i1/} #------------------------------------------------------------------------- # By default, 16 non-periodic samples are collected for the stat4 table. # The following tests attempt to verify that the most common keys are # being collected. # proc check_stat4 {tn} { db eval ANALYZE db eval {SELECT a, b, c, d FROM t1} { incr k($a) incr k([list $a $b]) incr k([list $a $b $c]) if { [info exists k([list $a $b $c $d])]==0 } { incr nRow } incr k([list $a $b $c $d]) } set L [list] foreach key [array names k] { lappend L [list $k($key) $key] } set nSample $nRow if {$nSample>16} {set nSample 16} set nThreshold [lindex [lsort -decr -integer -index 0 $L] [expr $nSample-1] 0] foreach key [array names k] { if {$k($key)>$nThreshold} { set expect($key) 1 } if {$k($key)==$nThreshold} { set possible($key) 1 } } set nPossible [expr $nSample - [llength [array names expect]]] #puts "EXPECT: [array names expect]" #puts "POSSIBLE($nPossible/[array size possible]): [array names possible]" #puts "HAVE: [db eval {SELECT test_decode(sample) FROM sqlite_stat4 WHERE idx='i1'}]" db eval {SELECT test_decode(sample) AS s FROM sqlite_stat4 WHERE idx='i1'} { set seen 0 for {set i 0} {$i<4} {incr i} { unset -nocomplain expect([lrange $s 0 $i]) if {[info exists possible([lrange $s 0 $i])]} { set seen 1 unset -nocomplain possible([lrange $s 0 $i]) } } if {$seen} {incr nPossible -1} } if {$nPossible<0} {set nPossible 0} set res [list [llength [array names expect]] $nPossible] uplevel [list do_test $tn [list set {} $res] {0 0}] } drop_all_tables do_test 14.1.1 { execsql { CREATE TABLE t1(a,b,c,d); CREATE INDEX i1 ON t1(a,b,c,d); } for {set i 0} {$i < 160} {incr i} { execsql { INSERT INTO t1 VALUES($i,$i,$i,$i) } if {($i % 10)==0} { execsql { INSERT INTO t1 VALUES($i,$i,$i,$i) } } } } {} check_stat4 14.1.2 do_test 14.2.1 { execsql { DELETE FROM t1 } for {set i 0} {$i < 1600} {incr i} { execsql { INSERT INTO t1 VALUES($i/10,$i/17,$i/27,$i/37) } } } {} check_stat4 14.2.2 do_test 14.3.1 { for {set i 0} {$i < 10} {incr i} { execsql { INSERT INTO t1 VALUES($i*50,$i*50,$i*50,$i*50) } execsql { INSERT INTO t1 VALUES($i*50,$i*50,$i*50,$i*50) } execsql { INSERT INTO t1 VALUES($i*50,$i*50,$i*50,$i*50) } execsql { INSERT INTO t1 VALUES($i*50,$i*50,$i*50,$i*50) } execsql { INSERT INTO t1 VALUES($i*50,$i*50,$i*50,$i*50) } execsql { INSERT INTO t1 VALUES($i*50,$i*50,$i*50,$i*50) } execsql { INSERT INTO t1 VALUES($i*50,$i*50,$i*50,$i*50) } execsql { INSERT INTO t1 VALUES($i*50,$i*50,$i*50,$i*50) } execsql { INSERT INTO t1 VALUES($i*50,$i*50,$i*50,$i*50) } execsql { INSERT INTO t1 VALUES($i*50,$i*50,$i*50,$i*50) } } } {} check_stat4 14.3.2 do_test 14.4.1 { execsql {DELETE FROM t1} for {set i 1} {$i < 160} {incr i} { set b [expr $i % 10] if {$b==0 || $b==2} {set b 1} execsql { INSERT INTO t1 VALUES($i/10,$b,$i,$i) } } } {} check_stat4 14.4.2 db func lrange lrange db func lindex lindex do_execsql_test 14.4.3 { SELECT lrange(test_decode(sample), 0, 1) AS s FROM sqlite_stat4 WHERE lindex(s, 1)=='1' ORDER BY rowid } { {0 1} {1 1} {2 1} {3 1} {4 1} {5 1} {6 1} {7 1} {8 1} {9 1} {10 1} {11 1} {12 1} {13 1} {14 1} {15 1} } #------------------------------------------------------------------------- # Test that nothing untoward happens if the stat4 table contains entries # for indexes that do not exist. Or NULL values in the idx column. # Or NULL values in any of the other columns. # drop_all_tables do_execsql_test 15.1 { CREATE TABLE x1(a, b, UNIQUE(a, b)); INSERT INTO x1 VALUES(1, 2); INSERT INTO x1 VALUES(3, 4); INSERT INTO x1 VALUES(5, 6); ANALYZE; INSERT INTO sqlite_stat4 VALUES(NULL, NULL, NULL, NULL, NULL, NULL); } db close sqlite3 db test.db do_execsql_test 15.2 { SELECT * FROM x1 } {1 2 3 4 5 6} do_execsql_test 15.3 { INSERT INTO sqlite_stat4 VALUES(42, 42, 42, 42, 42, 42); } db close sqlite3 db test.db do_execsql_test 15.4 { SELECT * FROM x1 } {1 2 3 4 5 6} do_execsql_test 15.5 { UPDATE sqlite_stat1 SET stat = NULL; } db close sqlite3 db test.db do_execsql_test 15.6 { SELECT * FROM x1 } {1 2 3 4 5 6} do_execsql_test 15.7 { ANALYZE; UPDATE sqlite_stat1 SET tbl = 'no such tbl'; } db close sqlite3 db test.db do_execsql_test 15.8 { SELECT * FROM x1 } {1 2 3 4 5 6} do_execsql_test 15.9 { ANALYZE; UPDATE sqlite_stat4 SET neq = NULL, nlt=NULL, ndlt=NULL; } db close sqlite3 db test.db do_execsql_test 15.10 { SELECT * FROM x1 } {1 2 3 4 5 6} # This is just for coverage.... do_execsql_test 15.11 { ANALYZE; UPDATE sqlite_stat1 SET stat = stat || ' unordered'; } db close sqlite3 db test.db do_execsql_test 15.12 { SELECT * FROM x1 } {1 2 3 4 5 6} #------------------------------------------------------------------------- # Test that allocations used for sqlite_stat4 samples are included in # the quantity returned by SQLITE_DBSTATUS_SCHEMA_USED. # set one [string repeat x 1000] set two [string repeat x 2000] do_test 16.1 { reset_db execsql { CREATE TABLE t1(a, UNIQUE(a)); INSERT INTO t1 VALUES($one); ANALYZE; } set nByte [lindex [sqlite3_db_status db SCHEMA_USED 0] 1] reset_db execsql { CREATE TABLE t1(a, UNIQUE(a)); INSERT INTO t1 VALUES($two); ANALYZE; } set nByte2 [lindex [sqlite3_db_status db SCHEMA_USED 0] 1] puts -nonewline " (nByte=$nByte nByte2=$nByte2)" expr {$nByte2 > $nByte+900 && $nByte2 < $nByte+1100} } {1} #------------------------------------------------------------------------- # Test that stat4 data may be used with partial indexes. # do_test 17.1 { reset_db execsql { CREATE TABLE t1(a, b, c, d); CREATE INDEX i1 ON t1(a, b) WHERE d IS NOT NULL; INSERT INTO t1 VALUES(-1, -1, -1, NULL); INSERT INTO t1 SELECT 2*a,2*b,2*c,d FROM t1; INSERT INTO t1 SELECT 2*a,2*b,2*c,d FROM t1; INSERT INTO t1 SELECT 2*a,2*b,2*c,d FROM t1; INSERT INTO t1 SELECT 2*a,2*b,2*c,d FROM t1; INSERT INTO t1 SELECT 2*a,2*b,2*c,d FROM t1; INSERT INTO t1 SELECT 2*a,2*b,2*c,d FROM t1; } for {set i 0} {$i < 32} {incr i} { if {$i<8} {set b 0} else { set b $i } execsql { INSERT INTO t1 VALUES($i%2, $b, $i/2, 'abc') } } execsql {ANALYZE main.t1} } {} do_catchsql_test 17.1.2 { ANALYZE temp.t1; } {1 {no such table: temp.t1}} do_eqp_test 17.2 { SELECT * FROM t1 WHERE d IS NOT NULL AND a=0 AND b=10 AND c=10; } {/USING INDEX i1/} do_eqp_test 17.3 { SELECT * FROM t1 WHERE d IS NOT NULL AND a=0 AND b=0 AND c=10; } {/USING INDEX i1/} do_execsql_test 17.4 { CREATE INDEX i2 ON t1(c, d); ANALYZE main.i2; } do_eqp_test 17.5 { SELECT * FROM t1 WHERE d IS NOT NULL AND a=0 AND b=10 AND c=10; } {/USING INDEX i1/} do_eqp_test 17.6 { SELECT * FROM t1 WHERE d IS NOT NULL AND a=0 AND b=0 AND c=10; } {/USING INDEX i2/} #------------------------------------------------------------------------- # do_test 18.1 { reset_db execsql { CREATE TABLE t1(a, b); CREATE INDEX i1 ON t1(a, b); } for {set i 0} {$i < 9} {incr i} { execsql { INSERT INTO t1 VALUES($i, 0); INSERT INTO t1 VALUES($i, 0); INSERT INTO t1 VALUES($i, 0); INSERT INTO t1 VALUES($i, 0); INSERT INTO t1 VALUES($i, 0); INSERT INTO t1 VALUES($i, 0); INSERT INTO t1 VALUES($i, 0); INSERT INTO t1 VALUES($i, 0); INSERT INTO t1 VALUES($i, 0); INSERT INTO t1 VALUES($i, 0); INSERT INTO t1 VALUES($i, 0); INSERT INTO t1 VALUES($i, 0); INSERT INTO t1 VALUES($i, 0); INSERT INTO t1 VALUES($i, 0); INSERT INTO t1 VALUES($i, 0); } } execsql ANALYZE execsql { SELECT count(*) FROM sqlite_stat4 } } {9} #------------------------------------------------------------------------- # For coverage. # ifcapable view { do_test 19.1 { reset_db execsql { CREATE TABLE t1(x, y); CREATE INDEX i1 ON t1(x, y); CREATE VIEW v1 AS SELECT * FROM t1; ANALYZE; } } {} } ifcapable auth { proc authproc {op args} { if {$op == "SQLITE_ANALYZE"} { return "SQLITE_DENY" } return "SQLITE_OK" } do_test 19.2 { reset_db db auth authproc execsql { CREATE TABLE t1(x, y); CREATE VIEW v1 AS SELECT * FROM t1; } catchsql ANALYZE } {1 {not authorized}} } #------------------------------------------------------------------------- # reset_db proc r {args} { expr rand() } db func r r db func lrange lrange do_test 20.1 { execsql { CREATE TABLE t1(a,b,c,d); CREATE INDEX i1 ON t1(a,b,c,d); } for {set i 0} {$i < 16} {incr i} { execsql { INSERT INTO t1 VALUES($i, r(), r(), r()); INSERT INTO t1 VALUES($i, $i, r(), r()); INSERT INTO t1 VALUES($i, $i, $i, r()); INSERT INTO t1 VALUES($i, $i, $i, $i); INSERT INTO t1 VALUES($i, $i, $i, $i); INSERT INTO t1 VALUES($i, $i, $i, r()); INSERT INTO t1 VALUES($i, $i, r(), r()); INSERT INTO t1 VALUES($i, r(), r(), r()); } } } {} do_execsql_test 20.2 { ANALYZE } for {set i 0} {$i<16} {incr i} { set val "$i $i $i $i" do_execsql_test 20.3.$i { SELECT count(*) FROM sqlite_stat4 WHERE lrange(test_decode(sample), 0, 3)=$val } {1} } #------------------------------------------------------------------------- # reset_db do_execsql_test 21.0 { CREATE TABLE t2(a, b); CREATE INDEX i2 ON t2(a); } do_test 21.1 { for {set i 1} {$i < 100} {incr i} { execsql { INSERT INTO t2 VALUES(CASE WHEN $i < 80 THEN 'one' ELSE 'two' END, $i) } } execsql ANALYZE } {} # Condition (a='one') matches 80% of the table. (rowid<10) reduces this to # 10%, but (rowid<50) only reduces it to 50%. So in the first case below # the index is used. In the second, it is not. # do_eqp_test 21.2 { SELECT * FROM t2 WHERE a='one' AND rowid < 10 } {/*USING INDEX i2 (a=? AND rowid<?)*/} do_eqp_test 21.3 { SELECT * FROM t2 WHERE a='one' AND rowid < 50 } {/*USING INTEGER PRIMARY KEY*/} #------------------------------------------------------------------------- # reset_db do_execsql_test 22.0 { CREATE TABLE t3(a, b, c, d, PRIMARY KEY(a, b)) WITHOUT ROWID; SELECT * FROM t3; } {} do_execsql_test 22.1 { WITH r(x) AS ( SELECT 1 UNION ALL SELECT x+1 FROM r WHERE x<=100 ) INSERT INTO t3 SELECT CASE WHEN (x>45 AND x<96) THEN 'B' ELSE 'A' END, /* Column "a" */ x, /* Column "b" */ CASE WHEN (x<51) THEN 'one' ELSE 'two' END, /* Column "c" */ x /* Column "d" */ FROM r; CREATE INDEX i3 ON t3(c); CREATE INDEX i4 ON t3(d); ANALYZE; } # Expression (c='one' AND a='B') matches 5 table rows. But (c='one' AND a=A') # matches 45. Expression (d<?) matches 20. Neither index is a covering index. # # Therefore, with stat4 data, SQLite prefers (c='one' AND a='B') over (d<20), # and (d<20) over (c='one' AND a='A'). foreach {tn where res} { 1 "c='one' AND a='B' AND d < 20" {/*INDEX i3 (c=? AND a=?)*/} 2 "c='one' AND a='A' AND d < 20" {/*INDEX i4 (d<?)*/} } { do_eqp_test 22.2.$tn "SELECT * FROM t3 WHERE $where" $res } proc int_to_char {i} { set ret "" set char [list a b c d e f g h i j] foreach {div} {1000 100 10 1} { append ret [lindex $char [expr ($i / $div) % 10]] } set ret } db func int_to_char int_to_char do_execsql_test 23.0 { CREATE TABLE t4( a COLLATE nocase, b, c, d, e, f, PRIMARY KEY(c, b, a) ) WITHOUT ROWID; CREATE INDEX i41 ON t4(e); CREATE INDEX i42 ON t4(f); WITH data(a, b, c, d, e, f) AS ( SELECT int_to_char(0), 'xyz', 'zyx', '*', 0, 0 UNION ALL SELECT int_to_char(f+1), b, c, d, (e+1) % 2, f+1 FROM data WHERE f<1024 ) INSERT INTO t4 SELECT a, b, c, d, e, f FROM data; ANALYZE; } {} do_eqp_test 23.1 { SELECT * FROM t4 WHERE (e=1 AND b='xyz' AND c='zyx' AND a<'AEA') AND f<300 -- Formerly used index i41. But i41 is not a covering index whereas -- the PRIMARY KEY is a covering index, and so as of 2017-10-15, the -- PRIMARY KEY is preferred. } {SEARCH t4 USING PRIMARY KEY (c=? AND b=? AND a<?)} do_eqp_test 23.2 { SELECT * FROM t4 WHERE (e=1 AND b='xyz' AND c='zyx' AND a<'JJJ') AND f<300 } {SEARCH t4 USING INDEX i42 (f<?)} do_execsql_test 24.0 { CREATE TABLE t5(c, d, b, e, a, PRIMARY KEY(a, b, c)) WITHOUT ROWID; WITH data(a, b, c, d, e) AS ( SELECT 'z', 'y', 0, 0, 0 UNION ALL SELECT a, CASE WHEN b='y' THEN 'n' ELSE 'y' END, c+1, e/250, e+1 FROM data WHERE e<1000 ) INSERT INTO t5(a, b, c, d, e) SELECT * FROM data; CREATE INDEX t5d ON t5(d); CREATE INDEX t5e ON t5(e); ANALYZE; } foreach {tn where eqp} { 1 "d=0 AND a='z' AND b='n' AND e<200" {/*t5d (d=? AND a=? AND b=?)*/} 2 "d=0 AND a='z' AND b='n' AND e<100" {/*t5e (e<?)*/} 3 "d=0 AND e<300" {/*t5d (d=?)*/} 4 "d=0 AND e<200" {/*t5e (e<?)*/} } { do_eqp_test 24.$tn "SeLeCt * FROM t5 WHERE $where" $eqp } #------------------------------------------------------------------------- # Test that if stat4 data is available but cannot be used because the # rhs of a range constraint is a complex expression, the default estimates # are used instead. ifcapable stat4&&cte { do_execsql_test 25.1 { CREATE TABLE t6(a, b); WITH ints(i,j) AS ( SELECT 1,1 UNION ALL SELECT i+1,j+1 FROM ints WHERE i<100 ) INSERT INTO t6 SELECT * FROM ints; CREATE INDEX aa ON t6(a); CREATE INDEX bb ON t6(b); ANALYZE; } # Term (b<?) is estimated at 25%. Better than (a<30) but not as # good as (a<20). do_eqp_test 25.2.1 { SELECT * FROM t6 WHERE a<30 AND b<? } \ {SEARCH t6 USING INDEX bb (b<?)} do_eqp_test 25.2.2 { SELECT * FROM t6 WHERE a<20 AND b<? } \ {SEARCH t6 USING INDEX aa (a<?)} # Term (b BETWEEN ? AND ?) is estimated at 1/64. do_eqp_test 25.3.1 { SELECT * FROM t6 WHERE a BETWEEN 5 AND 10 AND b BETWEEN ? AND ? } {SEARCH t6 USING INDEX bb (b>? AND b<?)} # Term (b BETWEEN ? AND 60) is estimated to return roughly 15 rows - # 60 from (b<=60) multiplied by 0.25 for the b>=? term. Better than # (a<20) but not as good as (a<10). do_eqp_test 25.4.1 { SELECT * FROM t6 WHERE a < 10 AND (b BETWEEN ? AND 60) } {SEARCH t6 USING INDEX aa (a<?)} do_eqp_test 25.4.2 { SELECT * FROM t6 WHERE a < 20 AND (b BETWEEN ? AND 60) } {SEARCH t6 USING INDEX bb (b>? AND b<?)} } #------------------------------------------------------------------------- # Check that a problem in they way stat4 data is used has been # resolved (see below). # reset_db do_test 26.1.1 { db transaction { execsql { CREATE TABLE t1(x, y, z); CREATE INDEX t1xy ON t1(x, y); CREATE INDEX t1z ON t1(z); } for {set i 0} {$i < 10000} {incr i} { execsql { INSERT INTO t1(x, y) VALUES($i, $i) } } for {set i 0} {$i < 10} {incr i} { execsql { WITH cnt(x) AS (SELECT 1 UNION ALL SELECT x+1 FROM cnt WHERE x<100) INSERT INTO t1(x, y) SELECT 10000+$i, x FROM cnt; INSERT INTO t1(x, y) SELECT 10000+$i, 100; } } execsql { UPDATE t1 SET z = rowid / 20; ANALYZE; } } } {} do_execsql_test 26.1.2 { SELECT count(*) FROM t1 WHERE x = 10000 AND y < 50; } {49} do_execsql_test 26.1.3 { SELECT count(*) FROM t1 WHERE z = 444; } {20} # The analyzer knows that any (z=?) expression matches 20 rows. So it # will use index "t1z" if the estimate of hits for (x=10000 AND y<50) # is greater than 20 rows. # # And it should be. The analyzer has a stat4 sample as follows: # # sample=(x=10000, y=100) nLt=(10000 10099) # # There should be no other samples that start with (x=10000). So it knows # that (x=10000 AND y<50) must match somewhere between 0 and 99 rows, but # no more than that. Guessing less than 20 is therefore unreasonable. # # At one point though, due to a problem in whereKeyStats(), the planner was # estimating that (x=10000 AND y<50) would match only 2 rows. # do_eqp_test 26.1.4 { SELECT * FROM t1 WHERE x = 10000 AND y < 50 AND z = 444; } {SEARCH t1 USING INDEX t1z (z=?)} # This test - 26.2.* - tests that another manifestation of the same problem # is no longer present in the library. Assuming: # # CREATE INDEX t1xy ON t1(x, y) # # and that have samples for index t1xy as follows: # # # sample=('A', 70) nEq=(100, 2) nLt=(900, 970) # sample=('B', 70) nEq=(100, 2) nLt=(1000, 1070) # # the planner should estimate that (x = 'B' AND y > 25) matches 76 rows # (70 * 2/3 + 30). Before, due to the problem, the planner was estimating # that this matched 100 rows. # reset_db do_execsql_test 26.2.1 { BEGIN; CREATE TABLE t1(x, y, z); CREATE INDEX i1 ON t1(x, y); CREATE INDEX i2 ON t1(z); WITH cnt(y) AS (SELECT 0 UNION ALL SELECT y+1 FROM cnt WHERE y<99), letters(x) AS ( SELECT 'A' UNION SELECT 'B' UNION SELECT 'C' UNION SELECT 'D' ) INSERT INTO t1(x, y) SELECT x, y FROM letters, cnt; WITH letters(x) AS ( SELECT 'A' UNION SELECT 'B' UNION SELECT 'C' UNION SELECT 'D' ) INSERT INTO t1(x, y) SELECT x, 70 FROM letters; WITH cnt(i) AS (SELECT 0 UNION ALL SELECT i+1 FROM cnt WHERE i<9999) INSERT INTO t1(x, y) SELECT i, i FROM cnt; UPDATE t1 SET z = (rowid / 95); ANALYZE; COMMIT; } do_eqp_test 26.2.2 { SELECT * FROM t1 WHERE x='B' AND y>25 AND z=?; } {SEARCH t1 USING INDEX i1 (x=? AND y>?)} finish_test