#include <sqlite3ext.h>
SQLITE_EXTENSION_INIT1
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <stdarg.h>
#include <ctype.h>
#include <stdio.h>
#ifndef SQLITE_OMIT_VIRTUALTABLE
#if defined(__GNUC__)
# define CSV_NOINLINE __attribute__((noinline))
#elif defined(_MSC_VER) && _MSC_VER>=1310
# define CSV_NOINLINE __declspec(noinline)
#else
# define CSV_NOINLINE
#endif
#define CSV_MXERR 200
#define CSV_INBUFSZ 1024
typedef struct CsvReader CsvReader;
struct CsvReader {
FILE *in;
char *z;
int n;
int nAlloc;
int nLine;
int bNotFirst;
int cTerm;
size_t iIn;
size_t nIn;
char *zIn;
char zErr[CSV_MXERR];
};
static void csv_reader_init(CsvReader *p){
p->in = 0;
p->z = 0;
p->n = 0;
p->nAlloc = 0;
p->nLine = 0;
p->bNotFirst = 0;
p->nIn = 0;
p->zIn = 0;
p->zErr[0] = 0;
}
static void csv_reader_reset(CsvReader *p){
if( p->in ){
fclose(p->in);
sqlite3_free(p->zIn);
}
sqlite3_free(p->z);
csv_reader_init(p);
}
static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
va_list ap;
va_start(ap, zFormat);
sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
va_end(ap);
}
static int csv_reader_open(
CsvReader *p,
const char *zFilename,
const char *zData
){
if( zFilename ){
p->zIn = sqlite3_malloc( CSV_INBUFSZ );
if( p->zIn==0 ){
csv_errmsg(p, "out of memory");
return 1;
}
p->in = fopen(zFilename, "rb");
if( p->in==0 ){
sqlite3_free(p->zIn);
csv_reader_reset(p);
csv_errmsg(p, "cannot open '%s' for reading", zFilename);
return 1;
}
}else{
assert( p->in==0 );
p->zIn = (char*)zData;
p->nIn = strlen(zData);
}
return 0;
}
static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
size_t got;
assert( p->iIn>=p->nIn );
assert( p->in!=0 );
got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
if( got==0 ) return EOF;
p->nIn = got;
p->iIn = 1;
return p->zIn[0];
}
static int csv_getc(CsvReader *p){
if( p->iIn >= p->nIn ){
if( p->in!=0 ) return csv_getc_refill(p);
return EOF;
}
return ((unsigned char*)p->zIn)[p->iIn++];
}
static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
char *zNew;
int nNew = p->nAlloc*2 + 100;
zNew = sqlite3_realloc64(p->z, nNew);
if( zNew ){
p->z = zNew;
p->nAlloc = nNew;
p->z[p->n++] = c;
return 0;
}else{
csv_errmsg(p, "out of memory");
return 1;
}
}
static int csv_append(CsvReader *p, char c){
if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
p->z[p->n++] = c;
return 0;
}
static char *csv_read_one_field(CsvReader *p){
int c;
p->n = 0;
c = csv_getc(p);
if( c==EOF ){
p->cTerm = EOF;
return 0;
}
if( c=='"' ){
int pc, ppc;
int startLine = p->nLine;
pc = ppc = 0;
while( 1 ){
c = csv_getc(p);
if( c<='"' || pc=='"' ){
if( c=='\n' ) p->nLine++;
if( c=='"' ){
if( pc=='"' ){
pc = 0;
continue;
}
}
if( (c==',' && pc=='"')
|| (c=='\n' && pc=='"')
|| (c=='\n' && pc=='\r' && ppc=='"')
|| (c==EOF && pc=='"')
){
do{ p->n--; }while( p->z[p->n]!='"' );
p->cTerm = (char)c;
break;
}
if( pc=='"' && c!='\r' ){
csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
break;
}
if( c==EOF ){
csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
startLine, '"');
p->cTerm = (char)c;
break;
}
}
if( csv_append(p, (char)c) ) return 0;
ppc = pc;
pc = c;
}
}else{
if( (c&0xff)==0xef && p->bNotFirst==0 ){
csv_append(p, (char)c);
c = csv_getc(p);
if( (c&0xff)==0xbb ){
csv_append(p, (char)c);
c = csv_getc(p);
if( (c&0xff)==0xbf ){
p->bNotFirst = 1;
p->n = 0;
return csv_read_one_field(p);
}
}
}
while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
if( csv_append(p, (char)c) ) return 0;
c = csv_getc(p);
}
if( c=='\n' ){
p->nLine++;
if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
}
p->cTerm = (char)c;
}
assert( p->z==0 || p->n<p->nAlloc );
if( p->z ) p->z[p->n] = 0;
p->bNotFirst = 1;
return p->z;
}
static int csvtabCreate(sqlite3*, void*, int, const char*const*,
sqlite3_vtab**,char**);
static int csvtabConnect(sqlite3*, void*, int, const char*const*,
sqlite3_vtab**,char**);
static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
static int csvtabDisconnect(sqlite3_vtab*);
static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
static int csvtabClose(sqlite3_vtab_cursor*);
static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
int argc, sqlite3_value **argv);
static int csvtabNext(sqlite3_vtab_cursor*);
static int csvtabEof(sqlite3_vtab_cursor*);
static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
typedef struct CsvTable {
sqlite3_vtab base;
char *zFilename;
char *zData;
long iStart;
int nCol;
unsigned int tstFlags;
} CsvTable;
#define CSVTEST_FIDX 0x0001
typedef struct CsvCursor {
sqlite3_vtab_cursor base;
CsvReader rdr;
char **azVal;
int *aLen;
sqlite3_int64 iRowid;
} CsvCursor;
static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
sqlite3_free(pTab->base.zErrMsg);
pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
}
static int csvtabDisconnect(sqlite3_vtab *pVtab){
CsvTable *p = (CsvTable*)pVtab;
sqlite3_free(p->zFilename);
sqlite3_free(p->zData);
sqlite3_free(p);
return SQLITE_OK;
}
static const char *csv_skip_whitespace(const char *z){
while( isspace((unsigned char)z[0]) ) z++;
return z;
}
static void csv_trim_whitespace(char *z){
size_t n = strlen(z);
while( n>0 && isspace((unsigned char)z[n]) ) n--;
z[n] = 0;
}
static void csv_dequote(char *z){
int j;
char cQuote = z[0];
size_t i, n;
if( cQuote!='\'' && cQuote!='"' ) return;
n = strlen(z);
if( n<2 || z[n-1]!=z[0] ) return;
for(i=1, j=0; i<n-1; i++){
if( z[i]==cQuote && z[i+1]==cQuote ) i++;
z[j++] = z[i];
}
z[j] = 0;
}
static const char *csv_parameter(const char *zTag, int nTag, const char *z){
z = csv_skip_whitespace(z);
if( strncmp(zTag, z, nTag)!=0 ) return 0;
z = csv_skip_whitespace(z+nTag);
if( z[0]!='=' ) return 0;
return csv_skip_whitespace(z+1);
}
static int csv_string_parameter(
CsvReader *p,
const char *zParam,
const char *zArg,
char **pzVal
){
const char *zValue;
zValue = csv_parameter(zParam,(int)strlen(zParam),zArg);
if( zValue==0 ) return 0;
p->zErr[0] = 0;
if( *pzVal ){
csv_errmsg(p, "more than one '%s' parameter", zParam);
return 1;
}
*pzVal = sqlite3_mprintf("%s", zValue);
if( *pzVal==0 ){
csv_errmsg(p, "out of memory");
return 1;
}
csv_trim_whitespace(*pzVal);
csv_dequote(*pzVal);
return 1;
}
static int csv_boolean(const char *z){
if( sqlite3_stricmp("yes",z)==0
|| sqlite3_stricmp("on",z)==0
|| sqlite3_stricmp("true",z)==0
|| (z[0]=='1' && z[1]==0)
){
return 1;
}
if( sqlite3_stricmp("no",z)==0
|| sqlite3_stricmp("off",z)==0
|| sqlite3_stricmp("false",z)==0
|| (z[0]=='0' && z[1]==0)
){
return 0;
}
return -1;
}
static int csv_boolean_parameter(
const char *zTag,
int nTag,
const char *z,
int *pValue
){
int b;
z = csv_skip_whitespace(z);
if( strncmp(zTag, z, nTag)!=0 ) return 0;
z = csv_skip_whitespace(z + nTag);
if( z[0]==0 ){
*pValue = 1;
return 1;
}
if( z[0]!='=' ) return 0;
z = csv_skip_whitespace(z+1);
b = csv_boolean(z);
if( b>=0 ){
*pValue = b;
return 1;
}
return 0;
}
static int csvtabConnect(
sqlite3 *db,
void *pAux,
int argc, const char *const*argv,
sqlite3_vtab **ppVtab,
char **pzErr
){
CsvTable *pNew = 0;
int bHeader = -1;
int rc = SQLITE_OK;
int i, j;
#ifdef SQLITE_TEST
int tstFlags = 0;
#endif
int b;
int nCol = -99;
CsvReader sRdr;
static const char *azParam[] = {
"filename", "data", "schema",
};
char *azPValue[3];
# define CSV_FILENAME (azPValue[0])
# define CSV_DATA (azPValue[1])
# define CSV_SCHEMA (azPValue[2])
assert( sizeof(azPValue)==sizeof(azParam) );
memset(&sRdr, 0, sizeof(sRdr));
memset(azPValue, 0, sizeof(azPValue));
for(i=3; i<argc; i++){
const char *z = argv[i];
const char *zValue;
for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
}
if( j<sizeof(azParam)/sizeof(azParam[0]) ){
if( sRdr.zErr[0] ) goto csvtab_connect_error;
}else
if( csv_boolean_parameter("header",6,z,&b) ){
if( bHeader>=0 ){
csv_errmsg(&sRdr, "more than one 'header' parameter");
goto csvtab_connect_error;
}
bHeader = b;
}else
#ifdef SQLITE_TEST
if( (zValue = csv_parameter("testflags",9,z))!=0 ){
tstFlags = (unsigned int)atoi(zValue);
}else
#endif
if( (zValue = csv_parameter("columns",7,z))!=0 ){
if( nCol>0 ){
csv_errmsg(&sRdr, "more than one 'columns' parameter");
goto csvtab_connect_error;
}
nCol = atoi(zValue);
if( nCol<=0 ){
csv_errmsg(&sRdr, "column= value must be positive");
goto csvtab_connect_error;
}
}else
{
csv_errmsg(&sRdr, "bad parameter: '%s'", z);
goto csvtab_connect_error;
}
}
if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
csv_errmsg(&sRdr, "must specify either filename= or data= but not both");
goto csvtab_connect_error;
}
if( (nCol<=0 || bHeader==1)
&& csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA)
){
goto csvtab_connect_error;
}
pNew = sqlite3_malloc( sizeof(*pNew) );
*ppVtab = (sqlite3_vtab*)pNew;
if( pNew==0 ) goto csvtab_connect_oom;
memset(pNew, 0, sizeof(*pNew));
if( CSV_SCHEMA==0 ){
sqlite3_str *pStr = sqlite3_str_new(0);
char *zSep = "";
int iCol = 0;
sqlite3_str_appendf(pStr, "CREATE TABLE x(");
if( nCol<0 && bHeader<1 ){
nCol = 0;
do{
csv_read_one_field(&sRdr);
nCol++;
}while( sRdr.cTerm==',' );
}
if( nCol>0 && bHeader<1 ){
for(iCol=0; iCol<nCol; iCol++){
sqlite3_str_appendf(pStr, "%sc%d TEXT", zSep, iCol);
zSep = ",";
}
}else{
do{
char *z = csv_read_one_field(&sRdr);
if( (nCol>0 && iCol<nCol) || (nCol<0 && bHeader) ){
sqlite3_str_appendf(pStr,"%s\"%w\" TEXT", zSep, z);
zSep = ",";
iCol++;
}
}while( sRdr.cTerm==',' );
if( nCol<0 ){
nCol = iCol;
}else{
while( iCol<nCol ){
sqlite3_str_appendf(pStr,"%sc%d TEXT", zSep, ++iCol);
zSep = ",";
}
}
}
pNew->nCol = nCol;
sqlite3_str_appendf(pStr, ")");
CSV_SCHEMA = sqlite3_str_finish(pStr);
if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
}else if( nCol<0 ){
do{
csv_read_one_field(&sRdr);
pNew->nCol++;
}while( sRdr.cTerm==',' );
}else{
pNew->nCol = nCol;
}
pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0;
pNew->zData = CSV_DATA; CSV_DATA = 0;
#ifdef SQLITE_TEST
pNew->tstFlags = tstFlags;
#endif
if( bHeader!=1 ){
pNew->iStart = 0;
}else if( pNew->zData ){
pNew->iStart = (int)sRdr.iIn;
}else{
pNew->iStart = (int)(ftell(sRdr.in) - sRdr.nIn + sRdr.iIn);
}
csv_reader_reset(&sRdr);
rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
if( rc ){
csv_errmsg(&sRdr, "bad schema: '%s' - %s", CSV_SCHEMA, sqlite3_errmsg(db));
goto csvtab_connect_error;
}
for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
sqlite3_free(azPValue[i]);
}
sqlite3_vtab_config(db, SQLITE_VTAB_DIRECTONLY);
return SQLITE_OK;
csvtab_connect_oom:
rc = SQLITE_NOMEM;
csv_errmsg(&sRdr, "out of memory");
csvtab_connect_error:
if( pNew ) csvtabDisconnect(&pNew->base);
for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
sqlite3_free(azPValue[i]);
}
if( sRdr.zErr[0] ){
sqlite3_free(*pzErr);
*pzErr = sqlite3_mprintf("%s", sRdr.zErr);
}
csv_reader_reset(&sRdr);
if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
return rc;
}
static void csvtabCursorRowReset(CsvCursor *pCur){
CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
int i;
for(i=0; i<pTab->nCol; i++){
sqlite3_free(pCur->azVal[i]);
pCur->azVal[i] = 0;
pCur->aLen[i] = 0;
}
}
static int csvtabCreate(
sqlite3 *db,
void *pAux,
int argc, const char *const*argv,
sqlite3_vtab **ppVtab,
char **pzErr
){
return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
}
static int csvtabClose(sqlite3_vtab_cursor *cur){
CsvCursor *pCur = (CsvCursor*)cur;
csvtabCursorRowReset(pCur);
csv_reader_reset(&pCur->rdr);
sqlite3_free(cur);
return SQLITE_OK;
}
static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
CsvTable *pTab = (CsvTable*)p;
CsvCursor *pCur;
size_t nByte;
nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
pCur = sqlite3_malloc64( nByte );
if( pCur==0 ) return SQLITE_NOMEM;
memset(pCur, 0, nByte);
pCur->azVal = (char**)&pCur[1];
pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
*ppCursor = &pCur->base;
if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
csv_xfer_error(pTab, &pCur->rdr);
return SQLITE_ERROR;
}
return SQLITE_OK;
}
static int csvtabNext(sqlite3_vtab_cursor *cur){
CsvCursor *pCur = (CsvCursor*)cur;
CsvTable *pTab = (CsvTable*)cur->pVtab;
int i = 0;
char *z;
do{
z = csv_read_one_field(&pCur->rdr);
if( z==0 ){
break;
}
if( i<pTab->nCol ){
if( pCur->aLen[i] < pCur->rdr.n+1 ){
char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1);
if( zNew==0 ){
csv_errmsg(&pCur->rdr, "out of memory");
csv_xfer_error(pTab, &pCur->rdr);
break;
}
pCur->azVal[i] = zNew;
pCur->aLen[i] = pCur->rdr.n+1;
}
memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
i++;
}
}while( pCur->rdr.cTerm==',' );
if( z==0 && i==0 ){
pCur->iRowid = -1;
}else{
pCur->iRowid++;
while( i<pTab->nCol ){
sqlite3_free(pCur->azVal[i]);
pCur->azVal[i] = 0;
pCur->aLen[i] = 0;
i++;
}
}
return SQLITE_OK;
}
static int csvtabColumn(
sqlite3_vtab_cursor *cur,
sqlite3_context *ctx,
int i
){
CsvCursor *pCur = (CsvCursor*)cur;
CsvTable *pTab = (CsvTable*)cur->pVtab;
if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT);
}
return SQLITE_OK;
}
static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
CsvCursor *pCur = (CsvCursor*)cur;
*pRowid = pCur->iRowid;
return SQLITE_OK;
}
static int csvtabEof(sqlite3_vtab_cursor *cur){
CsvCursor *pCur = (CsvCursor*)cur;
return pCur->iRowid<0;
}
static int csvtabFilter(
sqlite3_vtab_cursor *pVtabCursor,
int idxNum, const char *idxStr,
int argc, sqlite3_value **argv
){
CsvCursor *pCur = (CsvCursor*)pVtabCursor;
CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
pCur->iRowid = 0;
if( csv_append(&pCur->rdr, 0) ) return SQLITE_NOMEM;
if( pCur->rdr.in==0 ){
assert( pCur->rdr.zIn==pTab->zData );
assert( pTab->iStart>=0 );
assert( (size_t)pTab->iStart<=pCur->rdr.nIn );
pCur->rdr.iIn = pTab->iStart;
}else{
fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
pCur->rdr.iIn = 0;
pCur->rdr.nIn = 0;
}
return csvtabNext(pVtabCursor);
}
static int csvtabBestIndex(
sqlite3_vtab *tab,
sqlite3_index_info *pIdxInfo
){
pIdxInfo->estimatedCost = 1000000;
#ifdef SQLITE_TEST
if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
int i;
int nConst = 0;
for(i=0; i<pIdxInfo->nConstraint; i++){
unsigned char op;
if( pIdxInfo->aConstraint[i].usable==0 ) continue;
op = pIdxInfo->aConstraint[i].op;
if( op==SQLITE_INDEX_CONSTRAINT_EQ
|| op==SQLITE_INDEX_CONSTRAINT_LIKE
|| op==SQLITE_INDEX_CONSTRAINT_GLOB
){
pIdxInfo->estimatedCost = 10;
pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
nConst++;
}
}
}
#endif
return SQLITE_OK;
}
static sqlite3_module CsvModule = {
0,
csvtabCreate,
csvtabConnect,
csvtabBestIndex,
csvtabDisconnect,
csvtabDisconnect,
csvtabOpen,
csvtabClose,
csvtabFilter,
csvtabNext,
csvtabEof,
csvtabColumn,
csvtabRowid,
0,
0,
0,
0,
0,
0,
0,
};
#ifdef SQLITE_TEST
static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){
return SQLITE_READONLY;
}
static sqlite3_module CsvModuleFauxWrite = {
0,
csvtabCreate,
csvtabConnect,
csvtabBestIndex,
csvtabDisconnect,
csvtabDisconnect,
csvtabOpen,
csvtabClose,
csvtabFilter,
csvtabNext,
csvtabEof,
csvtabColumn,
csvtabRowid,
csvtabUpdate,
0,
0,
0,
0,
0,
0,
};
#endif
#endif
#ifdef _WIN32
__declspec(dllexport)
#endif
int sqlite3_csv_init(
sqlite3 *db,
char **pzErrMsg,
const sqlite3_api_routines *pApi
){
#ifndef SQLITE_OMIT_VIRTUALTABLE
int rc;
SQLITE_EXTENSION_INIT2(pApi);
rc = sqlite3_create_module(db, "csv", &CsvModule, 0);
#ifdef SQLITE_TEST
if( rc==SQLITE_OK ){
rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
}
#endif
return rc;
#else
return SQLITE_OK;
#endif
}