sql_support/
each_chunk.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use lazy_static::lazy_static;
use rusqlite::{self, limits::Limit, types::ToSql};
use std::iter::Map;
use std::slice::Iter;

/// Returns SQLITE_LIMIT_VARIABLE_NUMBER as read from an in-memory connection and cached.
/// connection and cached. That means this will return the wrong value if it's set to a lower
/// value for a connection using this will return the wrong thing, but doing so is rare enough
/// that we explicitly don't support it (why would you want to lower this at runtime?).
///
/// If you call this and the actual value was set to a negative number or zero (nothing prevents
/// this beyond a warning in the SQLite documentation), we panic. However, it's unlikely you can
/// run useful queries if this happened anyway.
pub fn default_max_variable_number() -> usize {
    lazy_static! {
        static ref MAX_VARIABLE_NUMBER: usize = {
            let conn = rusqlite::Connection::open_in_memory()
                .expect("Failed to initialize in-memory connection (out of memory?)");

            let limit = conn.limit(Limit::SQLITE_LIMIT_VARIABLE_NUMBER);
            assert!(
                limit > 0,
                "Illegal value for SQLITE_LIMIT_VARIABLE_NUMBER (must be > 0) {}",
                limit
            );
            limit as usize
        };
    }
    *MAX_VARIABLE_NUMBER
}

/// Helper for the case where you have a `&[impl ToSql]` of arbitrary length, but need one
/// of no more than the connection's `MAX_VARIABLE_NUMBER` (rather,
/// `default_max_variable_number()`). This is useful when performing batched updates.
///
/// The `do_chunk` callback is called with a slice of no more than `default_max_variable_number()`
/// items as it's first argument, and the offset from the start as it's second.
///
/// See `each_chunk_mapped` for the case where `T` doesn't implement `ToSql`, but can be
/// converted to something that does.
pub fn each_chunk<'a, T, E, F>(items: &'a [T], do_chunk: F) -> Result<(), E>
where
    T: 'a,
    F: FnMut(&'a [T], usize) -> Result<(), E>,
{
    each_sized_chunk(items, default_max_variable_number(), do_chunk)
}

/// A version of `each_chunk` for the case when the conversion to `to_sql` requires an custom
/// intermediate step. For example, you might want to grab a property off of an array of records
pub fn each_chunk_mapped<'a, T, U, E, Mapper, DoChunk>(
    items: &'a [T],
    to_sql: Mapper,
    do_chunk: DoChunk,
) -> Result<(), E>
where
    T: 'a,
    U: ToSql + 'a,
    Mapper: Fn(&'a T) -> U,
    DoChunk: FnMut(Map<Iter<'a, T>, &'_ Mapper>, usize) -> Result<(), E>,
{
    each_sized_chunk_mapped(items, default_max_variable_number(), to_sql, do_chunk)
}

// Split out for testing. Separate so that we can pass an actual slice
// to the callback if they don't need mapping. We could probably unify
// this with each_sized_chunk_mapped with a lot of type system trickery,
// but one of the benefits to each_chunk over the mapped versions is
// that the declaration is simpler.
pub fn each_sized_chunk<'a, T, E, F>(
    items: &'a [T],
    chunk_size: usize,
    mut do_chunk: F,
) -> Result<(), E>
where
    T: 'a,
    F: FnMut(&'a [T], usize) -> Result<(), E>,
{
    if items.is_empty() {
        return Ok(());
    }
    let mut offset = 0;
    for chunk in items.chunks(chunk_size) {
        do_chunk(chunk, offset)?;
        offset += chunk.len();
    }
    Ok(())
}

/// Utility to help perform batched updates, inserts, queries, etc. This is the low-level version
/// of this utility which is wrapped by `each_chunk` and `each_chunk_mapped`, and it allows you to
/// provide both the mapping function, and the chunk size.
///
/// Note: `mapped` basically just refers to the translating of `T` to some `U` where `U: ToSql`
/// using the `to_sql` function. This is useful for e.g. inserting the IDs of a large list
/// of records.
pub fn each_sized_chunk_mapped<'a, T, U, E, Mapper, DoChunk>(
    items: &'a [T],
    chunk_size: usize,
    to_sql: Mapper,
    mut do_chunk: DoChunk,
) -> Result<(), E>
where
    T: 'a,
    U: ToSql + 'a,
    Mapper: Fn(&'a T) -> U,
    DoChunk: FnMut(Map<Iter<'a, T>, &'_ Mapper>, usize) -> Result<(), E>,
{
    if items.is_empty() {
        return Ok(());
    }
    let mut offset = 0;
    for chunk in items.chunks(chunk_size) {
        let mapped = chunk.iter().map(&to_sql);
        do_chunk(mapped, offset)?;
        offset += chunk.len();
    }
    Ok(())
}

#[cfg(test)]
fn check_chunk<T, C>(items: C, expect: &[T], desc: &str)
where
    C: IntoIterator,
    <C as IntoIterator>::Item: ToSql,
    T: ToSql,
{
    let items = items.into_iter().collect::<Vec<_>>();
    assert_eq!(items.len(), expect.len());
    // Can't quite make the borrowing work out here w/o a loop, oh well.
    for (idx, (got, want)) in items.iter().zip(expect.iter()).enumerate() {
        assert_eq!(
            got.to_sql().unwrap(),
            want.to_sql().unwrap(),
            // ToSqlOutput::Owned(Value::Integer(*num)),
            "{}: Bad value at index {}",
            desc,
            idx
        );
    }
}

#[cfg(test)]
mod test_mapped {
    use super::*;

    #[test]
    fn test_separate() {
        let mut iteration = 0;
        each_sized_chunk_mapped(
            &[1, 2, 3, 4, 5],
            3,
            |item| item as &dyn ToSql,
            |chunk, offset| {
                match offset {
                    0 => {
                        assert_eq!(iteration, 0);
                        check_chunk(chunk, &[1, 2, 3], "first chunk");
                    }
                    3 => {
                        assert_eq!(iteration, 1);
                        check_chunk(chunk, &[4, 5], "second chunk");
                    }
                    n => {
                        panic!("Unexpected offset {}", n);
                    }
                }
                iteration += 1;
                Ok::<(), ()>(())
            },
        )
        .unwrap();
    }

    #[test]
    fn test_leq_chunk_size() {
        for &check_size in &[5, 6] {
            let mut iteration = 0;
            each_sized_chunk_mapped(
                &[1, 2, 3, 4, 5],
                check_size,
                |item| item as &dyn ToSql,
                |chunk, offset| {
                    assert_eq!(iteration, 0);
                    iteration += 1;
                    assert_eq!(offset, 0);
                    check_chunk(chunk, &[1, 2, 3, 4, 5], "only iteration");
                    Ok::<(), ()>(())
                },
            )
            .unwrap();
        }
    }

    #[test]
    fn test_empty_chunk() {
        let items: &[i64] = &[];
        each_sized_chunk_mapped::<_, _, (), _, _>(
            items,
            100,
            |item| item as &dyn ToSql,
            |_, _| {
                panic!("Should never be called");
            },
        )
        .unwrap();
    }

    #[test]
    fn test_error() {
        let mut iteration = 0;
        let e = each_sized_chunk_mapped(
            &[1, 2, 3, 4, 5, 6, 7],
            3,
            |item| item as &dyn ToSql,
            |_, offset| {
                if offset == 0 {
                    assert_eq!(iteration, 0);
                    iteration += 1;
                    Ok(())
                } else if offset == 3 {
                    assert_eq!(iteration, 1);
                    iteration += 1;
                    Err("testing".to_string())
                } else {
                    // Make sure we stopped after the error.
                    panic!("Shouldn't get called with offset of {}", offset);
                }
            },
        )
        .expect_err("Should be an error");
        assert_eq!(e, "testing");
    }
}

#[cfg(test)]
mod test_unmapped {
    use super::*;

    #[test]
    fn test_separate() {
        let mut iteration = 0;
        each_sized_chunk(&[1, 2, 3, 4, 5], 3, |chunk, offset| {
            match offset {
                0 => {
                    assert_eq!(iteration, 0);
                    check_chunk(chunk, &[1, 2, 3], "first chunk");
                }
                3 => {
                    assert_eq!(iteration, 1);
                    check_chunk(chunk, &[4, 5], "second chunk");
                }
                n => {
                    panic!("Unexpected offset {}", n);
                }
            }
            iteration += 1;
            Ok::<(), ()>(())
        })
        .unwrap();
    }

    #[test]
    fn test_leq_chunk_size() {
        for &check_size in &[5, 6] {
            let mut iteration = 0;
            each_sized_chunk(&[1, 2, 3, 4, 5], check_size, |chunk, offset| {
                assert_eq!(iteration, 0);
                iteration += 1;
                assert_eq!(offset, 0);
                check_chunk(chunk, &[1, 2, 3, 4, 5], "only iteration");
                Ok::<(), ()>(())
            })
            .unwrap();
        }
    }

    #[test]
    fn test_empty_chunk() {
        let items: &[i64] = &[];
        each_sized_chunk::<_, (), _>(items, 100, |_, _| {
            panic!("Should never be called");
        })
        .unwrap();
    }

    #[test]
    fn test_error() {
        let mut iteration = 0;
        let e = each_sized_chunk(&[1, 2, 3, 4, 5, 6, 7], 3, |_, offset| {
            if offset == 0 {
                assert_eq!(iteration, 0);
                iteration += 1;
                Ok(())
            } else if offset == 3 {
                assert_eq!(iteration, 1);
                iteration += 1;
                Err("testing".to_string())
            } else {
                // Make sure we stopped after the error.
                panic!("Shouldn't get called with offset of {}", offset);
            }
        })
        .expect_err("Should be an error");
        assert_eq!(e, "testing");
    }
}