jiff/tz/db/concatenated/
enabled.rs

1use alloc::{
2    string::{String, ToString},
3    vec,
4    vec::Vec,
5};
6
7use std::{
8    ffi::OsString,
9    fs::File,
10    path::{Path, PathBuf},
11    sync::{Arc, RwLock},
12    time::Duration,
13};
14
15use crate::{
16    error::{err, Error},
17    timestamp::Timestamp,
18    tz::{concatenated::ConcatenatedTzif, TimeZone, TimeZoneNameIter},
19    util::{self, array_str::ArrayStr, cache::Expiration, utf8},
20};
21
22const DEFAULT_TTL: Duration = Duration::new(5 * 60, 0);
23
24/// The places to look for a concatenated `tzdata` file.
25static TZDATA_LOCATIONS: &[TzdataLocation] = &[
26    TzdataLocation::Env {
27        name: "ANDROID_ROOT",
28        default: "/system",
29        suffix: "usr/share/zoneinfo/tzdata",
30    },
31    TzdataLocation::Env {
32        name: "ANDROID_DATA",
33        default: "/data/misc",
34        suffix: "zoneinfo/current/tzdata",
35    },
36];
37
38pub(crate) struct Database {
39    path: Option<PathBuf>,
40    names: Option<Names>,
41    zones: RwLock<CachedZones>,
42}
43
44impl Database {
45    pub(crate) fn from_env() -> Database {
46        let mut attempted = vec![];
47        for loc in TZDATA_LOCATIONS {
48            let path = loc.to_path_buf();
49            trace!(
50                "opening concatenated tzdata database at {}",
51                path.display()
52            );
53            match Database::from_path(&path) {
54                Ok(db) => return db,
55                Err(_err) => {
56                    trace!("failed opening {}: {_err}", path.display());
57                }
58            }
59            attempted.push(path.to_string_lossy().into_owned());
60        }
61        debug!(
62            "could not find concatenated tzdata database at any of the \
63             following paths: {}",
64            attempted.join(", "),
65        );
66        Database::none()
67    }
68
69    pub(crate) fn from_path(path: &Path) -> Result<Database, Error> {
70        let names = Some(Names::new(path)?);
71        let zones = RwLock::new(CachedZones::new());
72        Ok(Database { path: Some(path.to_path_buf()), names, zones })
73    }
74
75    /// Creates a "dummy" zoneinfo database in which all lookups fail.
76    pub(crate) fn none() -> Database {
77        let path = None;
78        let names = None;
79        let zones = RwLock::new(CachedZones::new());
80        Database { path, names, zones }
81    }
82
83    pub(crate) fn reset(&self) {
84        let mut zones = self.zones.write().unwrap();
85        if let Some(ref names) = self.names {
86            names.reset();
87        }
88        zones.reset();
89    }
90
91    pub(crate) fn get(&self, query: &str) -> Option<TimeZone> {
92        // We just always assume UTC exists and map it to our special const
93        // TimeZone::UTC value.
94        if query == "UTC" {
95            return Some(TimeZone::UTC);
96        }
97        // Similarly for the special `Etc/Unknown` value.
98        if query == "Etc/Unknown" {
99            return Some(TimeZone::unknown());
100        }
101        let path = self.path.as_ref()?;
102        // The fast path is when the query matches a pre-existing unexpired
103        // time zone.
104        {
105            let zones = self.zones.read().unwrap();
106            if let Some(czone) = zones.get(query) {
107                if !czone.is_expired() {
108                    trace!(
109                        "for time zone query `{query}`, \
110                         found cached zone `{}` \
111                         (expiration={}, last_modified={:?})",
112                        czone.tz.diagnostic_name(),
113                        czone.expiration,
114                        czone.last_modified,
115                    );
116                    return Some(czone.tz.clone());
117                }
118            }
119        }
120        // At this point, one of three possible cases is true:
121        //
122        // 1. The given query does not match any time zone in this database.
123        // 2. A time zone exists, but isn't cached.
124        // 3. A zime exists and is cached, but needs to be revalidated.
125        //
126        // While (3) is probably the common case since our TTLs are pretty
127        // short, both (2) and (3) require write access. Thus we rule out (1)
128        // before acquiring a write lock on the entire database. Plus, we'll
129        // need the zone info for case (2) and possibly for (3) if cache
130        // revalidation fails.
131        //
132        // I feel kind of bad about all this because it seems to me like there
133        // is too much work being done while holding on to the write lock.
134        // In particular, it seems like bad juju to do any I/O of any kind
135        // while holding any lock at all. I think I could design something
136        // that avoids doing I/O while holding a lock, but it seems a lot more
137        // complicated. (And what happens if the I/O becomes outdated by the
138        // time you acquire the lock?)
139        let mut zones = self.zones.write().unwrap();
140        let ttl = zones.ttl;
141        match zones.get_zone_index(query) {
142            Ok(i) => {
143                let czone = &mut zones.zones[i];
144                if czone.revalidate(path, ttl) {
145                    // Metadata on the file didn't change, so we assume the
146                    // file hasn't either.
147                    return Some(czone.tz.clone());
148                }
149                // Revalidation failed. Re-read the TZif data.
150                let (scratch1, scratch2) = zones.scratch();
151                let czone = match CachedTimeZone::new(
152                    path, query, ttl, scratch1, scratch2,
153                ) {
154                    Ok(Some(czone)) => czone,
155                    Ok(None) => return None,
156                    Err(_err) => {
157                        warn!(
158                            "failed to re-cache time zone {query} \
159                             from {path}: {_err}",
160                            path = path.display(),
161                        );
162                        return None;
163                    }
164                };
165                let tz = czone.tz.clone();
166                zones.zones[i] = czone;
167                Some(tz)
168            }
169            Err(i) => {
170                let (scratch1, scratch2) = zones.scratch();
171                let czone = match CachedTimeZone::new(
172                    path, query, ttl, scratch1, scratch2,
173                ) {
174                    Ok(Some(czone)) => czone,
175                    Ok(None) => return None,
176                    Err(_err) => {
177                        warn!(
178                            "failed to cache time zone {query} \
179                             from {path}: {_err}",
180                            path = path.display(),
181                        );
182                        return None;
183                    }
184                };
185                let tz = czone.tz.clone();
186                zones.zones.insert(i, czone);
187                Some(tz)
188            }
189        }
190    }
191
192    pub(crate) fn available<'d>(&'d self) -> TimeZoneNameIter<'d> {
193        let Some(path) = self.path.as_ref() else {
194            return TimeZoneNameIter::empty();
195        };
196        let Some(names) = self.names.as_ref() else {
197            return TimeZoneNameIter::empty();
198        };
199        TimeZoneNameIter::from_iter(names.available(path).into_iter())
200    }
201
202    pub(crate) fn is_definitively_empty(&self) -> bool {
203        self.names.is_none()
204    }
205}
206
207impl core::fmt::Debug for Database {
208    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
209        write!(f, "Concatenated(")?;
210        if let Some(ref path) = self.path {
211            write!(f, "{}", path.display())?;
212        } else {
213            write!(f, "unavailable")?;
214        }
215        write!(f, ")")
216    }
217}
218
219#[derive(Debug)]
220struct CachedZones {
221    zones: Vec<CachedTimeZone>,
222    ttl: Duration,
223    scratch1: Vec<u8>,
224    scratch2: Vec<u8>,
225}
226
227impl CachedZones {
228    const DEFAULT_TTL: Duration = DEFAULT_TTL;
229
230    fn new() -> CachedZones {
231        CachedZones {
232            zones: vec![],
233            ttl: CachedZones::DEFAULT_TTL,
234            scratch1: vec![],
235            scratch2: vec![],
236        }
237    }
238
239    fn get(&self, query: &str) -> Option<&CachedTimeZone> {
240        self.get_zone_index(query).ok().map(|i| &self.zones[i])
241    }
242
243    fn get_zone_index(&self, query: &str) -> Result<usize, usize> {
244        self.zones.binary_search_by(|zone| {
245            utf8::cmp_ignore_ascii_case(zone.name(), query)
246        })
247    }
248
249    fn reset(&mut self) {
250        self.zones.clear();
251    }
252
253    fn scratch(&mut self) -> (&mut Vec<u8>, &mut Vec<u8>) {
254        (&mut self.scratch1, &mut self.scratch2)
255    }
256}
257
258#[derive(Clone, Debug)]
259struct CachedTimeZone {
260    tz: TimeZone,
261    expiration: Expiration,
262    last_modified: Option<Timestamp>,
263}
264
265impl CachedTimeZone {
266    /// Create a new cached time zone.
267    ///
268    /// `path` should be a concatenated `tzdata` file. `query` is the IANA time
269    /// zone identifier we're looing for. The `ttl` says how long
270    /// the cached time zone should minimally remain fresh for.
271    ///
272    /// The `scratch1` and `scratch2` given are used to help amortize
273    /// allocation when deserializing TZif data from the concatenated `tzdata`
274    /// file.
275    ///
276    /// If no such time zone exists and no other error occurred, then
277    /// `Ok(None)` is returned.
278    fn new(
279        path: &Path,
280        query: &str,
281        ttl: Duration,
282        scratch1: &mut Vec<u8>,
283        scratch2: &mut Vec<u8>,
284    ) -> Result<Option<CachedTimeZone>, Error> {
285        let file = File::open(path).map_err(|e| Error::io(e).path(path))?;
286        let db = ConcatenatedTzif::open(&file)?;
287        let Some(tz) = db.get(query, scratch1, scratch2)? else {
288            return Ok(None);
289        };
290        let last_modified = util::fs::last_modified_from_file(path, &file);
291        let expiration = Expiration::after(ttl);
292        Ok(Some(CachedTimeZone { tz, expiration, last_modified }))
293    }
294
295    /// Returns true if this time zone has gone stale and should, at minimum,
296    /// be revalidated.
297    fn is_expired(&self) -> bool {
298        self.expiration.is_expired()
299    }
300
301    /// Returns the IANA time zone identifier of this cached time zone.
302    fn name(&self) -> &str {
303        // OK because `ConcatenatedTzif` guarantees all `TimeZone` values it
304        // returns have an IANA name.
305        self.tz.iana_name().unwrap()
306    }
307
308    /// Attempts to revalidate this cached time zone.
309    ///
310    /// Upon successful revalidation (that is, the cached time zone is still
311    /// fresh and okay to use), this returns true. Otherwise, the cached time
312    /// zone should be considered stale and must be re-created.
313    ///
314    /// Note that technically another layer of revalidation could be done.
315    /// For example, we could keep a checksum of the TZif data, and only
316    /// consider rebuilding the time zone when the checksum changes. But I
317    /// think the last modified metadata will in practice be good enough, and
318    /// parsing TZif data should be quite fast.
319    ///
320    /// `path` should be a concatenated `tzdata` file.
321    fn revalidate(&mut self, path: &Path, ttl: Duration) -> bool {
322        // If we started with no last modified timestamp, then I guess we
323        // should always fail revalidation? I suppose a case could be made to
324        // do the opposite: always pass revalidation.
325        let Some(old_last_modified) = self.last_modified else {
326            trace!(
327                "revalidation for {name} in {path} failed because \
328                 old last modified time is unavailable",
329                name = self.name(),
330                path = path.display(),
331            );
332            return false;
333        };
334        let Some(new_last_modified) = util::fs::last_modified_from_path(path)
335        else {
336            trace!(
337                "revalidation for {name} in {path} failed because \
338                 new last modified time is unavailable",
339                name = self.name(),
340                path = path.display(),
341            );
342            return false;
343        };
344        // We consider any change to invalidate cache.
345        if old_last_modified != new_last_modified {
346            trace!(
347                "revalidation for {name} in {path} failed because \
348                 last modified times do not match: old = {old} != {new} = new",
349                name = self.name(),
350                path = path.display(),
351                old = old_last_modified,
352                new = new_last_modified,
353            );
354            return false;
355        }
356        trace!(
357            "revalidation for {name} in {path} succeeded because \
358             last modified times match: old = {old} == {new} = new",
359            name = self.name(),
360            path = path.display(),
361            old = old_last_modified,
362            new = new_last_modified,
363        );
364        self.expiration = Expiration::after(ttl);
365        true
366    }
367}
368
369/// A collection of time zone names extracted from a concatenated tzdata file.
370///
371/// This type is responsible not just for providing the names, but also for
372/// updating them periodically.
373///
374/// Every name _should_ correspond to an entry in the data block of the
375/// corresponding `tzdata` file, but we generally don't take advantage of this.
376/// The reason is that the file could theoretically change. Between when we
377/// extract the names and when we do a TZif lookup later. This is all perfectly
378/// manageable, but it should only be done if there's a benchmark demanding
379/// more effort be spent here. As it stands, we do have a rudimentary caching
380/// mechanism, so not all time zone lookups go through this slower path. (This
381/// is also why `Names` has no lookup routine. There's just a routine to return
382/// all names.)
383#[derive(Debug)]
384struct Names {
385    inner: RwLock<NamesInner>,
386}
387
388#[derive(Debug)]
389struct NamesInner {
390    /// All available names from the `tzdata` file.
391    names: Vec<Arc<str>>,
392    /// The version string read from the `tzdata` file.
393    version: ArrayStr<5>,
394    /// Scratch space used to help amortize allocation when extracting names
395    /// from a `tzdata` file.
396    scratch: Vec<u8>,
397    /// The expiration time of these cached names.
398    ///
399    /// Note that this is a necessary but not sufficient criterion for
400    /// invalidating the cached value.
401    ttl: Duration,
402    /// The time at which the data in `names` becomes stale.
403    expiration: Expiration,
404}
405
406impl Names {
407    /// See commnents in `tz/db/zoneinfo/enabled.rs` about this. We just copied
408    /// it from there.
409    const DEFAULT_TTL: Duration = DEFAULT_TTL;
410
411    /// Create a new collection of names from the concatenated `tzdata` file
412    /// path given.
413    ///
414    /// If no names of time zones could be found in the given directory, then
415    /// an error is returned.
416    fn new(path: &Path) -> Result<Names, Error> {
417        let path = path.to_path_buf();
418        let mut scratch = vec![];
419        let (names, version) = read_names_and_version(&path, &mut scratch)?;
420        trace!(
421            "found concatenated tzdata at {path} \
422             with version {version} and {len} \
423             IANA time zone identifiers",
424            path = path.display(),
425            len = names.len(),
426        );
427        let ttl = Names::DEFAULT_TTL;
428        let expiration = Expiration::after(ttl);
429        let inner = NamesInner { names, version, scratch, ttl, expiration };
430        Ok(Names { inner: RwLock::new(inner) })
431    }
432
433    /// Returns all available time zone names after attempting a refresh of
434    /// the underlying data if it's stale.
435    fn available(&self, path: &Path) -> Vec<String> {
436        let mut inner = self.inner.write().unwrap();
437        inner.attempt_refresh(path);
438        inner.available()
439    }
440
441    fn reset(&self) {
442        self.inner.write().unwrap().reset();
443    }
444}
445
446impl NamesInner {
447    /// Returns all available time zone names.
448    fn available(&self) -> Vec<String> {
449        self.names.iter().map(|name| name.to_string()).collect()
450    }
451
452    /// Attempts a refresh, but only follows through if the TTL has been
453    /// exceeded.
454    ///
455    /// The caller must ensure that the other cache invalidation criteria
456    /// have been upheld. For example, this should only be called for a missed
457    /// zone name lookup.
458    fn attempt_refresh(&mut self, path: &Path) {
459        if self.expiration.is_expired() {
460            self.refresh(path);
461        }
462    }
463
464    /// Forcefully refreshes the cached names with possibly new data from disk.
465    /// If an error occurs when fetching the names, then no names are updated
466    /// (but the `expires_at` is updated). This will also emit a warning log on
467    /// failure.
468    fn refresh(&mut self, path: &Path) {
469        // PERF: Should we try to move this tzdb handling to run outside of a
470        // lock? It probably happens pretty rarely, so it might not matter.
471        let result = read_names_and_version(path, &mut self.scratch);
472        self.expiration = Expiration::after(self.ttl);
473        match result {
474            Ok((names, version)) => {
475                trace!(
476                    "refreshed concatenated tzdata at {path} \
477                     with version {version} and {len} \
478                     IANA time zone identifiers",
479                    path = path.display(),
480                    len = names.len(),
481                );
482                self.names = names;
483                self.version = version;
484            }
485            Err(_err) => {
486                warn!(
487                    "failed to refresh concatenated time zone name cache \
488                     for {path}: {_err}",
489                    path = path.display(),
490                )
491            }
492        }
493    }
494
495    /// Resets the state such that the next lookup is guaranteed to force a
496    /// cache refresh, and that it is impossible for any data to be stale.
497    fn reset(&mut self) {
498        // This will force the next lookup to fail.
499        self.names.clear();
500        // And this will force the next failed lookup to result in a refresh.
501        self.expiration = Expiration::expired();
502    }
503}
504
505/// A type representing how to find a `tzdata` file.
506///
507/// This currently only supports an Android-centric lookup via env vars, but if
508/// we wanted to check a fixed path like we do for `ZoneInfo`, then adding a
509/// `Fixed` variant here would be appropriate.
510#[derive(Debug)]
511enum TzdataLocation {
512    Env { name: &'static str, default: &'static str, suffix: &'static str },
513}
514
515impl TzdataLocation {
516    /// Converts this location to an actual path, which might involve an
517    /// environment variable lookup.
518    fn to_path_buf(&self) -> PathBuf {
519        match *self {
520            TzdataLocation::Env { name, default, suffix } => {
521                let var = std::env::var_os(name)
522                    .unwrap_or_else(|| OsString::from(default));
523                let prefix = PathBuf::from(var);
524                prefix.join(suffix)
525            }
526        }
527    }
528}
529
530/// Reads only the IANA time zone identifiers from the given path (and the
531/// version of the database).
532///
533/// The `scratch` given is used to help amortize allocation when deserializing
534/// names from the concatenated `tzdata` file.
535///
536/// This returns an error if reading was successful but no names were found.
537fn read_names_and_version(
538    path: &Path,
539    scratch: &mut Vec<u8>,
540) -> Result<(Vec<Arc<str>>, ArrayStr<5>), Error> {
541    let file = File::open(path).map_err(|e| Error::io(e).path(path))?;
542    let db = ConcatenatedTzif::open(file)?;
543    let names: Vec<Arc<str>> =
544        db.available(scratch)?.into_iter().map(Arc::from).collect();
545    if names.is_empty() {
546        return Err(err!(
547            "found no IANA time zone identifiers in \
548             concatenated tzdata file at {path}",
549            path = path.display(),
550        ));
551    }
552    Ok((names, db.version()))
553}
554
555#[cfg(test)]
556mod tests {
557    use super::*;
558
559    /// DEBUG COMMAND
560    ///
561    /// Takes environment variable `JIFF_DEBUG_ZONEINFO_DIR` as input and
562    /// prints a list of all time zone names in the directory (one per line).
563    ///
564    /// Callers may also set `RUST_LOG` to get extra debugging output.
565    #[test]
566    fn debug_tzdata_list() -> anyhow::Result<()> {
567        let _ = crate::logging::Logger::init();
568
569        const ENV: &str = "JIFF_DEBUG_CONCATENATED_TZDATA";
570        let Some(val) = std::env::var_os(ENV) else { return Ok(()) };
571        let path = PathBuf::from(val);
572        let db = Database::from_path(&path)?;
573        for name in db.available() {
574            std::eprintln!("{name}");
575        }
576        Ok(())
577    }
578}