Class: DWH::Adapters::Sqlite

Inherits:
Adapter
  • Object
show all
Defined in:
lib/dwh/adapters/sqlite.rb

Overview

Note:

This adapter enables WAL mode by default for better concurrent read performance. Set enable_wal: false to disable this behavior.

SQLite adapter optimized for analytical workloads.

This requires the ruby sqlite3 gem.

Generally, adapters should be created using DWH.create. Where a configuration is passed in as options hash or argument list.

Examples:

Basic connection with required only options

DWH.create(:sqlite, {file: 'path/to/my/database.db' })

Open in read only mode

DWH.create(:sqlite, {file: 'path/to/my/database.db', readonly: true})

Configure with custom performance pragmas

DWH.create(:sqlite, {file: 'path/to/my/database.db',
  pragmas: { cache_size: -128000, mmap_size: 268435456 }})

Constant Summary collapse

DEFAULT_PRAGMAS =

Default pragmas optimized for analytical workloads

{
  cache_size: -64_000, # 64MB cache (negative means KB)
  temp_store: 'MEMORY', # Store temp tables in memory
  mmap_size: 134_217_728, # 128MB memory-mapped I/O
  page_size: 4096,         # Standard page size
  synchronous: 'NORMAL'    # Faster than FULL, safe with WAL
}.freeze

Constants included from Settings

Settings::BASE_SETTINGS_FILE

Constants included from Functions::Dates

Functions::Dates::DATE_CLASSES, Functions::Dates::TIMESTAMPABLE_UNITS

Instance Attribute Summary

Attributes inherited from Adapter

#config, #settings

Attributes included from Settings

#adapter_settings

Instance Method Summary collapse

Methods inherited from Adapter

#adapter_name, #alter_settings, config, configuration, #connect!, #connect?, #extra_connection_params, #extra_query_params, #initialize, #reset_settings, #table?, #token_expired?, #with_debug, #with_retry

Methods included from Settings

#adapter_name, #load_settings, #settings_file, #settings_file_path, #using_base_settings?

Methods included from Logger

#logger, logger

Methods included from Behaviors

#apply_advanced_filtering_on_array_projections?, #cross_universe_measure_filtering_strategy, #extend_ending_date_to_last_hour_of_day?, #final_measure_filter?, #final_pass_measure_join_type, #greedy_apply_date_filters, #intermediate_measure_filter?, #temp_table_prefix, #temp_table_type

Methods included from Functions

#cross_join, #gsk, #lower_case, #quote, #string_lit, #trim, #upper_case

Methods included from Functions::Arrays

#array_exclude_list, #array_in_list, #array_unnest_join

Methods included from Functions::Nulls

#if_null, #null_if, #null_if_zero

Methods included from Functions::ExtractDatePart

#extract_day_of_month, #extract_day_of_week, #extract_day_of_year, #extract_hour, #extract_minute, #extract_month, #extract_quarter, #extract_week_of_year, #extract_year, #extract_year_month

Methods included from Functions::Dates

#adjust_week_start_day, #adjust_week_start_day?, #current_date, #current_time, #current_timestamp, #date_add, #date_data_type, #date_diff, #date_format, #date_format_sql, #date_int?, #date_lit, #date_literal, #date_time_format, #date_time_literal, #date_time_tz_format, #default_week_start_day, #timestamp_lit, #timestamp_literal, #week_start_day, #week_starts_on_sunday?

Methods included from Capabilities

#supports_array_functions?, #supports_common_table_expressions?, #supports_cross_join?, #supports_full_join?, #supports_sub_queries?, #supports_table_join?, #supports_temp_tables?, #supports_window_functions?

Constructor Details

This class inherits a constructor from DWH::Adapters::Adapter

Instance Method Details

#cast(exp, type) ⇒ Object

SQLite's CAST(... AS DATE) doesn't work properly - it just extracts the year We need to override cast to use the date() function for DATE types



303
304
305
306
307
308
309
# File 'lib/dwh/adapters/sqlite.rb', line 303

def cast(exp, type)
  if type.to_s.downcase == 'date'
    "date(#{exp})"
  else
    super
  end
end

#closeObject

Close the connection if it was created.



66
67
68
69
70
71
# File 'lib/dwh/adapters/sqlite.rb', line 66

def close
  return if @connection.nil?

  @connection.close unless @connection.closed?
  @connection = nil
end

#connectionObject

Creates a connection to the target database and returns the connection object or self



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/dwh/adapters/sqlite.rb', line 39

def connection
  return @connection if @connection

  options = build_open_options
  @connection = SQLite3::Database.new(config[:file], options)

  # Set busy timeout to handle concurrent access
  @connection.busy_timeout(config[:timeout])

  # Don't return results as hash by default for performance
  @connection.results_as_hash = false

  # Enable WAL mode for concurrent reads (unless disabled or readonly)
  @connection.execute('PRAGMA journal_mode = WAL') if config[:enable_wal] && !config[:readonly]

  # Apply default pragmas
  apply_pragmas(DEFAULT_PRAGMAS)

  # Apply user-specified pragmas (will override defaults)
  apply_pragmas(config[:pragmas]) if config.key?(:pragmas)

  @connection
rescue StandardError => e
  raise ConfigError, e.message
end

#execute(sql, format: :array, retries: 0) ⇒ Array<Array>, ...

Execute sql on the target database.

Parameters:

  • sql (String)

    actual sql

  • format (Symbol, String) (defaults to: :array)

    return format type

    • array returns array of array
    • object returns array of Hashes
    • csv returns as csv
    • native returns the native result from any clients used
      • For example: Postgres using pg client will return PG::Result
      • Http clients will returns the HTTP response object
  • retries (Integer) (defaults to: 0)

    number of retries in case of failure. Default is 0

Returns:

  • (Array<Array>, Hash, CSV, Native)

Raises:



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/dwh/adapters/sqlite.rb', line 134

def execute(sql, format: :array, retries: 0)
  begin
    result = with_debug(sql) { with_retry(retries) { connection.execute(sql) } }
  rescue StandardError => e
    raise ExecutionError, e.message
  end

  format = format.downcase if format.is_a?(String)
  case format.to_sym
  when :array
    result
  when :object
    result_to_hash(sql, result)
  when :csv
    result_to_csv(sql, result)
  when :native
    result
  else
    raise UnsupportedCapability, "Unsupported format: #{format} for this #{name}"
  end
end

#execute_stream(sql, io, stats: nil, retries: 0) ⇒ IO

Execute sql and stream responses back. Data is writtent out in CSV format to the provided IO object.

Parameters:

  • sql (String)

    actual sql

  • io (IO)

    IO object to write records to

  • stats (StreamingStats) (defaults to: nil)

    collect stats and preview data this is optional

  • retries (Integer) (defaults to: 0)

    number of retries in case of failure

Returns:

  • (IO)

Raises:



157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/dwh/adapters/sqlite.rb', line 157

def execute_stream(sql, io, stats: nil, retries: 0)
  with_debug(sql) do
    with_retry(retries) do
      stmt = connection.prepare(sql)
      columns = stmt.columns

      io.write(CSV.generate_line(columns))

      stmt.execute.each do |row|
        stats << row unless stats.nil?
        io.write(CSV.generate_line(row))
      end

      stmt.close
    end
  end

  io.rewind
  io
rescue StandardError => e
  raise ExecutionError, e.message
end

#extract_day_name(exp, abbreviate: false) ⇒ Object

SQLite's strftime doesn't support %A (day name) or %B (month name) We need to implement these using CASE statements based on day/month numbers



235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
# File 'lib/dwh/adapters/sqlite.rb', line 235

def extract_day_name(exp, abbreviate: false)
  day_num = "CAST(strftime('%w', #{exp}) AS INTEGER)"

  if abbreviate
    # Abbreviated day names: SUN, MON, TUE, etc.
    "(CASE #{day_num} " \
    "WHEN 0 THEN 'SUN' " \
    "WHEN 1 THEN 'MON' " \
    "WHEN 2 THEN 'TUE' " \
    "WHEN 3 THEN 'WED' " \
    "WHEN 4 THEN 'THU' " \
    "WHEN 5 THEN 'FRI' " \
    "WHEN 6 THEN 'SAT' " \
    'END)'
  else
    # Full day names: SUNDAY, MONDAY, TUESDAY, etc.
    "(CASE #{day_num} " \
    "WHEN 0 THEN 'SUNDAY' " \
    "WHEN 1 THEN 'MONDAY' " \
    "WHEN 2 THEN 'TUESDAY' " \
    "WHEN 3 THEN 'WEDNESDAY' " \
    "WHEN 4 THEN 'THURSDAY' " \
    "WHEN 5 THEN 'FRIDAY' " \
    "WHEN 6 THEN 'SATURDAY' " \
    'END)'
  end
end

#extract_month_name(exp, abbreviate: false) ⇒ Object



263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
# File 'lib/dwh/adapters/sqlite.rb', line 263

def extract_month_name(exp, abbreviate: false)
  month_num = "CAST(strftime('%m', #{exp}) AS INTEGER)"

  if abbreviate
    # Abbreviated month names: JAN, FEB, MAR, etc.
    "(CASE #{month_num} " \
    "WHEN 1 THEN 'JAN' " \
    "WHEN 2 THEN 'FEB' " \
    "WHEN 3 THEN 'MAR' " \
    "WHEN 4 THEN 'APR' " \
    "WHEN 5 THEN 'MAY' " \
    "WHEN 6 THEN 'JUN' " \
    "WHEN 7 THEN 'JUL' " \
    "WHEN 8 THEN 'AUG' " \
    "WHEN 9 THEN 'SEP' " \
    "WHEN 10 THEN 'OCT' " \
    "WHEN 11 THEN 'NOV' " \
    "WHEN 12 THEN 'DEC' " \
    'END)'
  else
    # Full month names: JANUARY, FEBRUARY, MARCH, etc.
    "(CASE #{month_num} " \
    "WHEN 1 THEN 'JANUARY' " \
    "WHEN 2 THEN 'FEBRUARY' " \
    "WHEN 3 THEN 'MARCH' " \
    "WHEN 4 THEN 'APRIL' " \
    "WHEN 5 THEN 'MAY' " \
    "WHEN 6 THEN 'JUNE' " \
    "WHEN 7 THEN 'JULY' " \
    "WHEN 8 THEN 'AUGUST' " \
    "WHEN 9 THEN 'SEPTEMBER' " \
    "WHEN 10 THEN 'OCTOBER' " \
    "WHEN 11 THEN 'NOVEMBER' " \
    "WHEN 12 THEN 'DECEMBER' " \
    'END)'
  end
end

#metadata(table, **qualifiers) ⇒ DWH::Table

Get the schema structure of a given a given table_name. Pass in optional catalog and schema info.

Example: metadata("public.big_table") metadata("big_table") metadata("big_table",schema: "public")

Parameters:

  • table (String)
    • table name
  • qualifiers (Hash)

    a customizable set of options

Options Hash (**qualifiers):

  • :catalog (String)

    optional catalog or equivalent name space. will be ignored if the adapter doesn't support

  • :schema (String)

    optional schema to scope to. will be ignored if the adapter doesn't support

Returns:



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/dwh/adapters/sqlite.rb', line 112

def (table, **qualifiers)
  db_table = Table.new table, **qualifiers

  # SQLite uses PRAGMA table_info for metadata
  sql = "PRAGMA table_info(#{db_table.physical_name})"

  cols = execute(sql)
  cols.each do |col|
    # PRAGMA table_info returns: cid, name, type, notnull, dflt_value, pk
    db_table << Column.new(
      name: col[1],
      data_type: col[2],
      precision: nil,
      scale: nil,
      max_char_length: nil
    )
  end

  db_table
end

#stats(table, date_column: nil, **qualifiers) ⇒ DWH::Table

Returns basic stats of a given table. Will typically include row_count, date_start, and date_end.

Examples:

stats("public.big_table", date_column: "fact_date")
stats("big_table")
stats("big_table",schema: "public")

Parameters:

  • table (String)

    table name

  • date_column (String) (defaults to: nil)

    optional date column to use to find range

  • qualifiers (Hash)

    a customizable set of options

Options Hash (**qualifiers):

  • :catalog (String)

    optional catalog or equivalent name space. will be ignored if the adapter doesn't support

  • :schema (String)

    optional schema to scope to. will be ignored if the adapter doesn't support

Returns:

Raises:



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/dwh/adapters/sqlite.rb', line 93

def stats(table, date_column: nil, **qualifiers)
  db_table = Table.new table, **qualifiers

  sql = <<-SQL
  SELECT count(*) AS ROW_COUNT
  #{date_column.nil? ? '' : ", min(#{date_column}) AS DATE_START"}
  #{date_column.nil? ? '' : ", max(#{date_column}) AS DATE_END"}
  FROM #{db_table.physical_name}
  SQL

  result = execute(sql)
  TableStats.new(
    row_count: result.first[0],
    date_start: date_column ? result.first[1] : nil,
    date_end: date_column ? result.first[2] : nil
  )
end

#stream(sql) {|chunk| ... } ⇒ Object

Executes the given sql and yields the streamed results to the given block.

Parameters:

  • sql (String)

    actual sql

Yields:

  • (chunk)

    Yields a streamed chunk as it streams in. The chunk type might vary depending on the target db and settings

Raises:



181
182
183
184
185
186
187
188
189
# File 'lib/dwh/adapters/sqlite.rb', line 181

def stream(sql, &block)
  with_debug(sql) do
    stmt = connection.prepare(sql)
    stmt.execute.each do |row|
      block.call(row)
    end
    stmt.close
  end
end

#tables(**qualifiers) ⇒ Array<String>

Get all tables available in the target db. It will use the default catalog and schema config only specified here.

Parameters:

  • qualifiers (Hash)

    a customizable set of options

Options Hash (**qualifiers):

  • :catalog (String)

    optional catalog or equivalent name space. will be ignored if the adapter doesn't support

  • :schema (String)

    optional schema to scope to. will be ignored if the adapter doesn't support

Returns:

  • (Array<String>)


85
86
87
88
89
90
# File 'lib/dwh/adapters/sqlite.rb', line 85

def tables(**qualifiers)
  sql = "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' ORDER BY name"

  res = execute(sql)
  res.flatten
end

#test_connection(raise_exception: false) ⇒ Boolean

Tests the connection to the target database and returns true if successful, or raise Exception or false connection object or self

Returns:

Raises:



74
75
76
77
78
79
80
81
82
# File 'lib/dwh/adapters/sqlite.rb', line 74

def test_connection(raise_exception: false)
  connection
  connection.execute('SELECT 1')
  true
rescue StandardError => e
  raise ConnectionError, e.message if raise_exception

  false
end

#truncate_date(unit, exp) ⇒ Object

Custom date truncation implementation. SQLite doesn't offer a native DATE_TRUNC function. We use 'start of' modifiers for year, month, and day, and custom logic for quarter and week.



195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# File 'lib/dwh/adapters/sqlite.rb', line 195

def truncate_date(unit, exp)
  unit = unit.strip.downcase

  case unit
  when 'year'
    "date(#{exp}, 'start of year')"
  when 'quarter'
    # Calculate quarter start using CASE statement
    # Q1: Jan-Mar (months 1-3) -> start of year
    # Q2: Apr-Jun (months 4-6) -> start of year + 3 months
    # Q3: Jul-Sep (months 7-9) -> start of year + 6 months
    # Q4: Oct-Dec (months 10-12) -> start of year + 9 months
    '(CASE ' \
    "WHEN CAST(strftime('%m', #{exp}) AS INTEGER) BETWEEN 1 AND 3 THEN date(#{exp}, 'start of year') " \
    "WHEN CAST(strftime('%m', #{exp}) AS INTEGER) BETWEEN 4 AND 6 THEN date(#{exp}, 'start of year', '+3 months') " \
    "WHEN CAST(strftime('%m', #{exp}) AS INTEGER) BETWEEN 7 AND 9 THEN date(#{exp}, 'start of year', '+6 months') " \
    "ELSE date(#{exp}, 'start of year', '+9 months') " \
    'END)'
  when 'month'
    "date(#{exp}, 'start of month')"
  when 'week'
    # Use week start day from settings
    gsk("#{settings[:week_start_day].downcase}_week_start_day")
      .gsub(/@exp/i, exp)
  when 'day', 'date'
    "date(#{exp})"
  when 'hour'
    # SQLite datetime returns timestamp, truncate to hour
    "datetime(strftime('%Y-%m-%d %H:00:00', #{exp}))"
  when 'minute'
    "datetime(strftime('%Y-%m-%d %H:%M:00', #{exp}))"
  when 'second'
    "datetime(strftime('%Y-%m-%d %H:%M:%S', #{exp}))"
  else
    raise UnsupportedCapability, "Currently not supporting truncation at #{unit} level"
  end
end

#valid_config?Boolean

Returns:



311
312
313
314
315
316
# File 'lib/dwh/adapters/sqlite.rb', line 311

def valid_config?
  super
  require 'sqlite3'
rescue LoadError
  raise ConfigError, "Required 'sqlite3' gem missing. Please add it to your Gemfile."
end