# 2007 August 21
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# The focus of this file is testing some specific characteristics of the 
# IO traffic generated by SQLite (making sure SQLite is not writing out
# more database pages than it has to, stuff like that).
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
set ::testprefix io

db close
sqlite3_simulate_device
sqlite3 db test.db -vfs devsym

# Test summary:
#
# io-1.* -  Test that quick-balance does not journal pages unnecessarily.
#
# io-2.* -  Test the "atomic-write optimization".
#
# io-3.* -  Test the IO traffic enhancements triggered when the 
#           IOCAP_SEQUENTIAL device capability flag is set (no 
#           fsync() calls on the journal file).
#
# io-4.* -  Test the IO traffic enhancements triggered when the 
#           IOCAP_SAFE_APPEND device capability flag is set (fewer 
#           fsync() calls on the journal file, no need to set nRec
#           field in the single journal header).
#
# io-5.* -  Test that the default page size is selected and used 
#           correctly.
#
# io-6.* -  Test that the pager-cache is not being flushed unnecessarily 
#           after a transaction that uses the special atomic-write path
#           is committed.
#           

set ::nWrite 0
proc nWrite {db} {
  set bt [btree_from_db $db]
  db_enter $db
  array set stats [btree_pager_stats $bt]
  db_leave $db
  set res [expr $stats(write) - $::nWrite]
  set ::nWrite $stats(write)
  set res
}

set ::nSync 0
proc nSync {} {
  set res [expr {$::sqlite_sync_count - $::nSync}]
  set ::nSync $::sqlite_sync_count
  set res
}

do_test io-1.1 {
  execsql {
    PRAGMA auto_vacuum = OFF;
    PRAGMA page_size = 1024;
    CREATE TABLE abc(a,b);
  }
  nWrite db
} {2}

# Insert into the table 4 records of aproximately 240 bytes each.
# This should completely fill the root-page of the table. Each
# INSERT causes 2 db pages to be written - the root-page of "abc"
# and page 1 (db change-counter page).
do_test io-1.2 {
  set ret [list]
  execsql { INSERT INTO abc VALUES(1,randstr(230,230)); }
  lappend ret [nWrite db]
  execsql { INSERT INTO abc VALUES(2,randstr(230,230)); }
  lappend ret [nWrite db]
  execsql { INSERT INTO abc VALUES(3,randstr(230,230)); }
  lappend ret [nWrite db]
  execsql { INSERT INTO abc VALUES(4,randstr(230,230)); }
  lappend ret [nWrite db]
} {2 2 2 2}

# Insert another 240 byte record. This causes two leaf pages
# to be added to the root page of abc. 4 pages in total
# are written to the db file - the two leaf pages, the root
# of abc and the change-counter page.
do_test io-1.3 {
  execsql { INSERT INTO abc VALUES(5,randstr(230,230)); }
  nWrite db
} {4}

# Insert another 3 240 byte records. After this, the tree consists of 
# the root-node, which is close to empty, and two leaf pages, both of 
# which are full. 
do_test io-1.4 {
  set ret [list]
  execsql { INSERT INTO abc VALUES(6,randstr(230,230)); }
  lappend ret [nWrite db]
  execsql { INSERT INTO abc VALUES(7,randstr(230,230)); }
  lappend ret [nWrite db]
  execsql { INSERT INTO abc VALUES(8,randstr(230,230)); }
  lappend ret [nWrite db]
} {2 2 2}

# This insert should use the quick-balance trick to add a third leaf
# to the b-tree used to store table abc. It should only be necessary to
# write to 3 pages to do this: the change-counter, the root-page and
# the new leaf page.
do_test io-1.5 {
  execsql { INSERT INTO abc VALUES(9,randstr(230,230)); }
  nWrite db
} {3}

ifcapable atomicwrite {

#----------------------------------------------------------------------
# Test cases io-2.* test the atomic-write optimization.
#
do_test io-2.1 {
  execsql { DELETE FROM abc; VACUUM; }
} {}

# Clear the write and sync counts.
nWrite db ; nSync

# The following INSERT updates 2 pages and requires 4 calls to fsync():
#
#   1) The directory in which the journal file is created,
#   2) The journal file (to sync the page data),
#   3) The journal file (to sync the journal file header),
#   4) The database file.
#
do_test io-2.2 {
  execsql { INSERT INTO abc VALUES(1, 2) }
  list [nWrite db] [nSync]
} {2 4}

# Set the device-characteristic mask to include the SQLITE_IOCAP_ATOMIC,
# then do another INSERT similar to the one in io-2.2. This should
# only write 1 page and require a single fsync().
# 
# The single fsync() is the database file. Only one page is reported as
# written because page 1 - the change-counter page - is written using
# an out-of-band method that bypasses the write counter.
#
# UPDATE: As of [05f98d4eec] (adding SQLITE_DBSTATUS_CACHE_WRITE), the
# second write is also counted. So this now reports two writes and a
# single fsync.
#
sqlite3_simulate_device -char atomic
do_test io-2.3 {
  execsql { INSERT INTO abc VALUES(3, 4) }
  list [nWrite db] [nSync]
} {2 1}

# Test that the journal file is not created and the change-counter is
# updated when the atomic-write optimization is used.
#
do_test io-2.4.1 {
  execsql {
    BEGIN;
    INSERT INTO abc VALUES(5, 6);
  }
  sqlite3 db2 test.db -vfs devsym
  execsql { SELECT * FROM abc } db2
} {1 2 3 4}
do_test io-2.4.2 {
  file exists test.db-journal
} {0}
do_test io-2.4.3 {
  execsql { COMMIT }
  execsql { SELECT * FROM abc } db2
} {1 2 3 4 5 6}
db2 close

# Test that the journal file is created and sync()d if the transaction
# modifies more than one database page, even if the IOCAP_ATOMIC flag
# is set.
#
do_test io-2.5.1 {
  execsql { CREATE TABLE def(d, e) }
  nWrite db ; nSync
  execsql {
    BEGIN;
    INSERT INTO abc VALUES(7, 8);
  }
  file exists test.db-journal
} {0}
do_test io-2.5.2 {
  execsql { INSERT INTO def VALUES('a', 'b'); }
  file exists test.db-journal
} {1}
do_test io-2.5.3 {
  execsql { COMMIT }
  list [nWrite db] [nSync]
} {3 4}

# Test that the journal file is created and sync()d if the transaction
# modifies a single database page and also appends a page to the file.
# Internally, this case is handled differently to the one above. The
# journal file is not actually created until the 'COMMIT' statement
# is executed.
#
# Changed 2010-03-27:  The size of the database is now stored in 
# bytes 28..31 and so when a page is added to the database, page 1
# is immediately modified and the journal file immediately comes into
# existence.  To fix this test, the BEGIN is changed into a a
# BEGIN IMMEDIATE and the INSERT is omitted.
#
do_test io-2.6.1 {
  execsql {
    BEGIN IMMEDIATE;
    -- INSERT INTO abc VALUES(9, randstr(1000,1000));
  }
  file exists test.db-journal
} {0}
do_test io-2.6.2 {
  # Create a file at "test.db-journal". This will prevent SQLite from
  # opening the journal for exclusive access. As a result, the COMMIT
  # should fail with SQLITE_CANTOPEN and the transaction rolled back.
  #
  file mkdir test.db-journal
  catchsql {
    INSERT INTO abc VALUES(9, randstr(1000,1000));
    COMMIT
  }
} {1 {unable to open database file}}
do_test io-2.6.3 {
  forcedelete test.db-journal
  catchsql { COMMIT }
} {0 {}}
do_test io-2.6.4 {
  execsql { SELECT * FROM abc }
} {1 2 3 4 5 6 7 8}

# Test that if the database modification is part of multi-file commit,
# the journal file is always created. In this case, the journal file
# is created during execution of the COMMIT statement, so we have to
# use the same technique to check that it is created as in the above 
# block.
forcedelete test2.db test2.db-journal
ifcapable attach {
  do_test io-2.7.1 {
    execsql {
      ATTACH 'test2.db' AS aux;
      PRAGMA aux.page_size = 1024;
      CREATE TABLE aux.abc2(a, b);
      BEGIN;
      INSERT INTO abc VALUES(9, 10);
    }
    file exists test.db-journal
  } {0}
  do_test io-2.7.2 {
    execsql { INSERT INTO abc2 SELECT * FROM abc }
    file exists test2.db-journal
  } {0}
  do_test io-2.7.3 {
    execsql { SELECT * FROM abc UNION ALL SELECT * FROM abc2 }
  } {1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10}
  do_test io-2.7.4 {
    file mkdir test2.db-journal
    catchsql { COMMIT }
  } {1 {unable to open database file}}
  do_test io-2.7.5 {
    forcedelete test2.db-journal
    catchsql { COMMIT }
  } {1 {cannot commit - no transaction is active}}
  do_test io-2.7.6 {
    execsql { SELECT * FROM abc UNION ALL SELECT * FROM abc2 }
  } {1 2 3 4 5 6 7 8}
}

# Try an explicit ROLLBACK before the journal file is created.
#
do_test io-2.8.1 {
  execsql {
    BEGIN;
    DELETE FROM abc;
  }
  file exists test.db-journal
} {0}
do_test io-2.8.2 {
  execsql { SELECT * FROM abc }
} {}
do_test io-2.8.3 {
  execsql {
    ROLLBACK;
    SELECT * FROM abc;
  }
} {1 2 3 4 5 6 7 8}

# Test that the atomic write optimisation is not enabled if the sector
# size is larger than the page-size.
#
do_test io-2.9.1 {
  db close
  sqlite3 db test.db
  sqlite3_simulate_device -char atomic -sectorsize 2048
  execsql {
    BEGIN;
    INSERT INTO abc VALUES(9, 10);
  }
  file exists test.db-journal
} {1}
do_test io-2.9.2 {
  execsql { ROLLBACK; }
  db close
  forcedelete test.db test.db-journal
  sqlite3 db test.db -vfs devsym
  execsql {
    PRAGMA auto_vacuum = OFF;
    PRAGMA page_size = 2048;
    CREATE TABLE abc(a, b);
  }
  execsql {
    BEGIN;
    INSERT INTO abc VALUES(9, 10);
  }
  file exists test.db-journal
} {0}
do_test io-2.9.3 {
  execsql { COMMIT }
} {}

# Test a couple of the more specific IOCAP_ATOMIC flags 
# (i.e IOCAP_ATOMIC2K etc.).
#
do_test io-2.10.1 {
  sqlite3_simulate_device -char atomic1k
  execsql {
    BEGIN;
    INSERT INTO abc VALUES(11, 12);
  }
  file exists test.db-journal
} {1}
do_test io-2.10.2 {
  execsql { ROLLBACK }
  sqlite3_simulate_device -char atomic2k
  execsql {
    BEGIN;
    INSERT INTO abc VALUES(11, 12);
  }
  file exists test.db-journal
} {0}
do_test io-2.10.3 {
  execsql { ROLLBACK }
} {}

do_test io-2.11.0 {
  execsql { 
    PRAGMA locking_mode = exclusive;
    PRAGMA locking_mode;
  }
} {exclusive exclusive}
do_test io-2.11.1 {
  execsql { 
    INSERT INTO abc VALUES(11, 12);
  }
  file exists test.db-journal
} {0}

do_test io-2.11.2 {
  execsql { 
    PRAGMA locking_mode = normal;
    INSERT INTO abc VALUES(13, 14);
  }
  file exists test.db-journal
} {0}

} ;# /* ifcapable atomicwrite */

#----------------------------------------------------------------------
# Test cases io-3.* test the IOCAP_SEQUENTIAL optimization.
#
sqlite3_simulate_device -char sequential -sectorsize 0
ifcapable pager_pragmas {
  do_test io-3.1 {
    db close
    forcedelete test.db test.db-journal
    sqlite3 db test.db -vfs devsym
    db eval {
      PRAGMA auto_vacuum=OFF;
    }
    # File size might be 1 due to the hack to work around ticket #3260.
    # Search for #3260 in os_unix.c for additional information.
    expr {[file size test.db]>1}
  } {0}
  do_test io-3.2 {
    execsql { CREATE TABLE abc(a, b) }
    nSync
    execsql {
      PRAGMA temp_store = memory;
      PRAGMA cache_size = 10;
      BEGIN;
      INSERT INTO abc VALUES('hello', 'world');
      INSERT INTO abc SELECT * FROM abc;
      INSERT INTO abc SELECT * FROM abc;
      INSERT INTO abc SELECT * FROM abc;
      INSERT INTO abc SELECT * FROM abc;
      INSERT INTO abc SELECT * FROM abc;
      INSERT INTO abc SELECT * FROM abc;
      INSERT INTO abc SELECT * FROM abc;
      INSERT INTO abc SELECT * FROM abc;
      INSERT INTO abc SELECT * FROM abc;
      INSERT INTO abc SELECT * FROM abc;
      INSERT INTO abc SELECT * FROM abc;
    }
    # File has grown - showing there was a cache-spill - but there 
    # have been no calls to fsync(). The file is probably about 30KB.
    # But some VFS implementations (symbian) buffer writes so the actual
    # size may be a little less than that. So this test case just tests
    # that the file is now greater than 20000 bytes in size.
    list [expr [file size test.db]>20000] [nSync]
  } {1 0}
  do_test io-3.3 {
    # The COMMIT requires a single fsync() - to the database file.
    execsql { COMMIT }
    list [file size test.db] [nSync]
  } "[expr {[nonzero_reserved_bytes]?40960:39936}] 1"
}

#----------------------------------------------------------------------
# Test cases io-4.* test the IOCAP_SAFE_APPEND optimization.
#
sqlite3_simulate_device -char safe_append

# With the SAFE_APPEND flag set, simple transactions require 3, rather
# than 4, calls to fsync(). The fsync() calls are on:
#
#   1) The directory in which the journal file is created, (unix only)
#   2) The journal file (to sync the page data),
#   3) The database file.
#
# Normally, when the SAFE_APPEND flag is not set, there is another fsync()
# on the journal file between steps (2) and (3) above.
#
set expected_sync_count 2
if {$::tcl_platform(platform)=="unix"} {
  ifcapable dirsync {
    incr expected_sync_count
  }
}

do_test io-4.1 {
  execsql { DELETE FROM abc }
  nSync
  execsql { INSERT INTO abc VALUES('a', 'b') }
  nSync
} $expected_sync_count

# With SAFE_APPEND set, the nRec field of the journal file header should
# be set to 0xFFFFFFFF before the first journal sync. The nRec field
# occupies bytes 8-11 of the journal file.
#
do_test io-4.2.1 {
  execsql { BEGIN }
  execsql { INSERT INTO abc VALUES('c', 'd') }
  file exists test.db-journal
} {1}
if {$::tcl_platform(platform)=="unix"} {
  do_test io-4.2.2 {
    hexio_read test.db-journal 8 4
  } {FFFFFFFF}
}
do_test io-4.2.3 {
  execsql { COMMIT }
  nSync
} $expected_sync_count
sqlite3_simulate_device -char safe_append

# With SAFE_APPEND set, there should only ever be one journal-header
# written to the database, even though the sync-mode is "full".
#
do_test io-4.3.1 {
  execsql {
    INSERT INTO abc SELECT * FROM abc;
    INSERT INTO abc SELECT * FROM abc;
    INSERT INTO abc SELECT * FROM abc;
    INSERT INTO abc SELECT * FROM abc;
    INSERT INTO abc SELECT * FROM abc;
    INSERT INTO abc SELECT * FROM abc;
    INSERT INTO abc SELECT * FROM abc;
    INSERT INTO abc SELECT * FROM abc;
    INSERT INTO abc SELECT * FROM abc;
    INSERT INTO abc SELECT * FROM abc;
    INSERT INTO abc SELECT * FROM abc;
  }
  expr {[file size test.db]/1024}
} {43}
ifcapable pager_pragmas {
  do_test io-4.3.2 {
    execsql {
      PRAGMA synchronous = full;
      PRAGMA cache_size = 10;
      PRAGMA synchronous;
    }
  } {2}
}
do_test io-4.3.3 {
  execsql {
    BEGIN;
    UPDATE abc SET a = 'x';
  }
  file exists test.db-journal
} {1}
if {$tcl_platform(platform) != "symbian"} {
  # This test is not run on symbian because the file-buffer makes it
  # difficult to predict the exact size of the file as reported by 
  # [file size].
  do_test io-4.3.4 {
    # The UPDATE statement in the statement above modifies 41 pages 
    # (all pages in the database except page 1 and the root page of 
    # abc). Because the cache_size is set to 10, this must have required
    # at least 4 cache-spills. If there were no journal headers written
    # to the journal file after the cache-spill, then the size of the
    # journal file is give by:
    #
    #    <jrnl file size> = <jrnl header size> + nPage * (<page-size> + 8)
    #
    # If the journal file contains additional headers, this formula
    # will not predict the size of the journal file.
    #
    file size test.db-journal
  } [expr 512 + (1024+8)*41]
}

#----------------------------------------------------------------------
# Test cases io-5.* test that the default page size is selected and
# used correctly.
#
set tn 0
foreach {char                 sectorsize pgsize} {
         {}                     512      1024
         {}                    1024      1024
         {}                    2048      2048
         {}                    8192      8192
         {}                   16384      8192
         {atomic}               512      8192
         {atomic512}            512      1024
         {atomic2K}             512      2048
         {atomic2K}            4096      4096
         {atomic2K atomic}      512      8192
         {atomic64K}            512      1024
} {
  incr tn
  if {$pgsize>$::SQLITE_MAX_PAGE_SIZE} continue
  db close
  forcedelete test.db test.db-journal
  sqlite3_simulate_device -char $char -sectorsize $sectorsize
  sqlite3 db test.db -vfs devsym
  db eval {
    PRAGMA auto_vacuum=OFF;
  }
  ifcapable !atomicwrite {
    if {[regexp {^atomic} $char]} continue
  }
  do_test io-5.$tn {
    execsql {
      CREATE TABLE abc(a, b, c);
    }
    expr {[file size test.db]/2}
  } $pgsize
}

#----------------------------------------------------------------------
#
do_test io-6.1 {
  db close
  sqlite3_simulate_device -char atomic
  forcedelete test.db
  sqlite3 db test.db -vfs devsym
  execsql {
    PRAGMA mmap_size = 0;
    PRAGMA page_size = 1024;
    PRAGMA cache_size = 2000;
    CREATE TABLE t1(x);
    CREATE TABLE t2(x);
    CREATE TABLE t3(x);
    CREATE INDEX i3 ON t3(x);
    INSERT INTO t3 VALUES(randomblob(100));
    INSERT INTO t3 SELECT randomblob(100) FROM t3;
    INSERT INTO t3 SELECT randomblob(100) FROM t3;
    INSERT INTO t3 SELECT randomblob(100) FROM t3;
    INSERT INTO t3 SELECT randomblob(100) FROM t3;
    INSERT INTO t3 SELECT randomblob(100) FROM t3;
    INSERT INTO t3 SELECT randomblob(100) FROM t3;
    INSERT INTO t3 SELECT randomblob(100) FROM t3;
    INSERT INTO t3 SELECT randomblob(100) FROM t3;
    INSERT INTO t3 SELECT randomblob(100) FROM t3;
    INSERT INTO t3 SELECT randomblob(100) FROM t3;
    INSERT INTO t3 SELECT randomblob(100) FROM t3;
  }

  db_save_and_close
} {}

foreach {tn sql} {
  1 { BEGIN;
        INSERT INTO t1 VALUES('123');
        INSERT INTO t2 VALUES('456');
      COMMIT;
  }
  2 { BEGIN;
        INSERT INTO t1 VALUES('123');
      COMMIT;
  }
} {

  # These tests don't work with memsubsys1, as it causes the effective page
  # cache size to become too small to hold the entire db in memory.
  if {[permutation] == "memsubsys1"} continue

  db_restore
  sqlite3 db test.db -vfs devsym
  execsql {
    PRAGMA cache_size = 2000;
    PRAGMA mmap_size = 0;
    SELECT x FROM t3 ORDER BY rowid;
    SELECT x FROM t3 ORDER BY x;
  }
  do_execsql_test 6.2.$tn.1 { PRAGMA integrity_check } {ok}
  do_execsql_test 6.2.$tn.2 $sql

  # Corrupt the database file on disk. This should not matter for the
  # purposes of the following "PRAGMA integrity_check", as the entire
  # database should be cached in the pager-cache. If corruption is
  # reported, it indicates that executing $sql caused the pager cache
  # to be flushed. Which is a bug.
  hexio_write test.db [expr 1024 * 5] [string repeat 00 2048]
  do_execsql_test 6.2.$tn.3 { PRAGMA integrity_check } {ok}
  db close
}

sqlite3_simulate_device -char {} -sectorsize 0
unregister_devsim

finish_test