jiff/tz/db/zoneinfo/
enabled.rs

1use alloc::{
2    string::{String, ToString},
3    vec,
4    vec::Vec,
5};
6
7use std::{
8    fs::File,
9    io::Read,
10    path::{Path, PathBuf},
11    sync::{Arc, RwLock},
12    time::Duration,
13};
14
15use crate::{
16    error::{err, Error},
17    timestamp::Timestamp,
18    tz::{tzif::is_possibly_tzif, TimeZone, TimeZoneNameIter},
19    util::{self, cache::Expiration, parse, utf8},
20};
21
22const DEFAULT_TTL: Duration = Duration::new(5 * 60, 0);
23
24static ZONEINFO_DIRECTORIES: &[&str] =
25    &["/usr/share/zoneinfo", "/etc/zoneinfo"];
26
27pub(crate) struct Database {
28    dir: Option<PathBuf>,
29    names: Option<ZoneInfoNames>,
30    zones: RwLock<CachedZones>,
31}
32
33impl Database {
34    pub(crate) fn from_env() -> Database {
35        if let Some(tzdir) = std::env::var_os("TZDIR") {
36            let tzdir = PathBuf::from(tzdir);
37            trace!("opening zoneinfo database at TZDIR={}", tzdir.display());
38            match Database::from_dir(&tzdir) {
39                Ok(db) => return db,
40                Err(_err) => {
41                    // This is a WARN because it represents a failure to
42                    // satisfy a more direct request, which should be louder
43                    // than failures related to auto-detection.
44                    warn!("failed opening TZDIR={}: {_err}", tzdir.display());
45                    // fall through to attempt default directories
46                }
47            }
48        }
49        for dir in ZONEINFO_DIRECTORIES {
50            let tzdir = Path::new(dir);
51            trace!("opening zoneinfo database at {}", tzdir.display());
52            match Database::from_dir(&tzdir) {
53                Ok(db) => return db,
54                Err(_err) => {
55                    trace!("failed opening {}: {_err}", tzdir.display());
56                }
57            }
58        }
59        debug!(
60            "could not find zoneinfo database at any of the following \
61             paths: {}",
62            ZONEINFO_DIRECTORIES.join(", "),
63        );
64        Database::none()
65    }
66
67    pub(crate) fn from_dir(dir: &Path) -> Result<Database, Error> {
68        let names = Some(ZoneInfoNames::new(dir)?);
69        let zones = RwLock::new(CachedZones::new());
70        Ok(Database { dir: Some(dir.to_path_buf()), names, zones })
71    }
72
73    /// Creates a "dummy" zoneinfo database in which all lookups fail.
74    pub(crate) fn none() -> Database {
75        let dir = None;
76        let names = None;
77        let zones = RwLock::new(CachedZones::new());
78        Database { dir, names, zones }
79    }
80
81    pub(crate) fn reset(&self) {
82        let mut zones = self.zones.write().unwrap();
83        if let Some(ref names) = self.names {
84            names.reset();
85        }
86        zones.reset();
87    }
88
89    pub(crate) fn get(&self, query: &str) -> Option<TimeZone> {
90        // We just always assume UTC exists and map it to our special const
91        // TimeZone::UTC value.
92        if query == "UTC" {
93            return Some(TimeZone::UTC);
94        }
95        // Similarly for the special `Etc/Unknown` value.
96        if query == "Etc/Unknown" {
97            return Some(TimeZone::unknown());
98        }
99        // If we couldn't build any time zone names, then every lookup will
100        // fail. So just bail now.
101        let names = self.names.as_ref()?;
102        // The fast path is when the query matches a pre-existing unexpired
103        // time zone.
104        {
105            let zones = self.zones.read().unwrap();
106            if let Some(czone) = zones.get(query) {
107                if !czone.is_expired() {
108                    trace!(
109                        "for time zone query `{query}`, \
110                         found cached zone `{}` \
111                         (expiration={}, last_modified={:?})",
112                        czone.tz.diagnostic_name(),
113                        czone.expiration,
114                        czone.last_modified,
115                    );
116                    return Some(czone.tz.clone());
117                }
118            }
119        }
120        // At this point, one of three possible cases is true:
121        //
122        // 1. The given query does not match any time zone in this database.
123        // 2. A time zone exists, but isn't cached.
124        // 3. A zime exists and is cached, but needs to be revalidated.
125        //
126        // While (3) is probably the common case since our TTLs are pretty
127        // short, both (2) and (3) require write access. Thus we rule out (1)
128        // before acquiring a write lock on the entire database. Plus, we'll
129        // need the zone info for case (2) and possibly for (3) if cache
130        // revalidation fails.
131        //
132        // I feel kind of bad about all this because it seems to me like there
133        // is too much work being done while holding on to the write lock.
134        // In particular, it seems like bad juju to do any I/O of any kind
135        // while holding any lock at all. I think I could design something
136        // that avoids doing I/O while holding a lock, but it seems a lot more
137        // complicated. (And what happens if the I/O becomes outdated by the
138        // time you acquire the lock?)
139        let info = names.get(query)?;
140        let mut zones = self.zones.write().unwrap();
141        let ttl = zones.ttl;
142        match zones.get_zone_index(query) {
143            Ok(i) => {
144                let czone = &mut zones.zones[i];
145                if czone.revalidate(&info, ttl) {
146                    // Metadata on the file didn't change, so we assume the
147                    // file hasn't either.
148                    return Some(czone.tz.clone());
149                }
150                // Revalidation failed. Re-read the TZif data.
151                let czone = match CachedTimeZone::new(&info, zones.ttl) {
152                    Ok(czone) => czone,
153                    Err(_err) => {
154                        warn!(
155                            "failed to re-cache time zone from file {}: {_err}",
156                            info.inner.full.display(),
157                        );
158                        return None;
159                    }
160                };
161                let tz = czone.tz.clone();
162                zones.zones[i] = czone;
163                Some(tz)
164            }
165            Err(i) => {
166                let czone = match CachedTimeZone::new(&info, ttl) {
167                    Ok(czone) => czone,
168                    Err(_err) => {
169                        warn!(
170                            "failed to cache time zone from file {}: {_err}",
171                            info.inner.full.display(),
172                        );
173                        return None;
174                    }
175                };
176                let tz = czone.tz.clone();
177                zones.zones.insert(i, czone);
178                Some(tz)
179            }
180        }
181    }
182
183    pub(crate) fn available<'d>(&'d self) -> TimeZoneNameIter<'d> {
184        let Some(names) = self.names.as_ref() else {
185            return TimeZoneNameIter::empty();
186        };
187        TimeZoneNameIter::from_iter(names.available().into_iter())
188    }
189
190    pub(crate) fn is_definitively_empty(&self) -> bool {
191        self.names.is_none()
192    }
193}
194
195impl core::fmt::Debug for Database {
196    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
197        write!(f, "ZoneInfo(")?;
198        if let Some(ref dir) = self.dir {
199            write!(f, "{}", dir.display())?;
200        } else {
201            write!(f, "unavailable")?;
202        }
203        write!(f, ")")
204    }
205}
206
207#[derive(Debug)]
208struct CachedZones {
209    zones: Vec<CachedTimeZone>,
210    ttl: Duration,
211}
212
213impl CachedZones {
214    const DEFAULT_TTL: Duration = DEFAULT_TTL;
215
216    fn new() -> CachedZones {
217        CachedZones { zones: vec![], ttl: CachedZones::DEFAULT_TTL }
218    }
219
220    fn get(&self, query: &str) -> Option<&CachedTimeZone> {
221        self.get_zone_index(query).ok().map(|i| &self.zones[i])
222    }
223
224    fn get_zone_index(&self, query: &str) -> Result<usize, usize> {
225        // The common case is that our query matches the time zone name case
226        // sensitively, so check for that first. It's a bit cheaper than doing
227        // a case insensitive search.
228        if let Ok(i) = self
229            .zones
230            .binary_search_by(|zone| zone.name.original().cmp(&query))
231        {
232            return Ok(i);
233        }
234        self.zones.binary_search_by(|zone| {
235            utf8::cmp_ignore_ascii_case(zone.name.lower(), query)
236        })
237    }
238
239    fn reset(&mut self) {
240        self.zones.clear();
241    }
242}
243
244#[derive(Clone, Debug)]
245struct CachedTimeZone {
246    tz: TimeZone,
247    name: ZoneInfoName,
248    expiration: Expiration,
249    last_modified: Option<Timestamp>,
250}
251
252impl CachedTimeZone {
253    /// Create a new cached time zone.
254    ///
255    /// The `info` says which time zone to create and where to find it. The
256    /// `ttl` says how long the cached time zone should minimally remain fresh
257    /// for.
258    fn new(
259        info: &ZoneInfoName,
260        ttl: Duration,
261    ) -> Result<CachedTimeZone, Error> {
262        let path = &info.inner.full;
263        let mut file =
264            File::open(path).map_err(|e| Error::io(e).path(path))?;
265        let mut data = vec![];
266        file.read_to_end(&mut data).map_err(|e| Error::io(e).path(path))?;
267        let tz = TimeZone::tzif(&info.inner.original, &data)
268            .map_err(|e| e.path(path))?;
269        let name = info.clone();
270        let last_modified = util::fs::last_modified_from_file(path, &file);
271        let expiration = Expiration::after(ttl);
272        Ok(CachedTimeZone { tz, name, expiration, last_modified })
273    }
274
275    /// Returns true if this time zone has gone stale and should, at minimum,
276    /// be revalidated.
277    fn is_expired(&self) -> bool {
278        self.expiration.is_expired()
279    }
280
281    /// Attempts to revalidate this cached time zone.
282    ///
283    /// Upon successful revalidation (that is, the cached time zone is still
284    /// fresh and okay to use), this returns true. Otherwise, the cached time
285    /// zone should be considered stale and must be re-created.
286    ///
287    /// Note that technically another layer of revalidation could be done.
288    /// For example, we could keep a checksum of the TZif data, and only
289    /// consider rebuilding the time zone when the checksum changes. But I
290    /// think the last modified metadata will in practice be good enough, and
291    /// parsing a TZif file should be quite fast.
292    fn revalidate(&mut self, info: &ZoneInfoName, ttl: Duration) -> bool {
293        // If we started with no last modified timestamp, then I guess we
294        // should always fail revalidation? I suppose a case could be made to
295        // do the opposite: always pass revalidation.
296        let Some(old_last_modified) = self.last_modified else {
297            trace!(
298                "revalidation for {} failed because old last modified time \
299                 is unavailable",
300                info.inner.full.display(),
301            );
302            return false;
303        };
304        let Some(new_last_modified) =
305            util::fs::last_modified_from_path(&info.inner.full)
306        else {
307            trace!(
308                "revalidation for {} failed because new last modified time \
309                 is unavailable",
310                info.inner.full.display(),
311            );
312            return false;
313        };
314        // We consider any change to invalidate cache.
315        if old_last_modified != new_last_modified {
316            trace!(
317                "revalidation for {} failed because last modified times \
318                 do not match: old = {} != {} = new",
319                info.inner.full.display(),
320                old_last_modified,
321                new_last_modified,
322            );
323            return false;
324        }
325        trace!(
326            "revalidation for {} succeeded because last modified times \
327             match: old = {} == {} = new",
328            info.inner.full.display(),
329            old_last_modified,
330            new_last_modified,
331        );
332        self.expiration = Expiration::after(ttl);
333        true
334    }
335}
336
337/// A collection of time zone names extracted from a zoneinfo directory.
338///
339/// Each time zone name maps to a full path on the file system corresponding
340/// to the TZif formatted data file for that time zone.
341///
342/// This type is responsible not just for providing the names, but also for
343/// updating them periodically.
344#[derive(Debug)]
345struct ZoneInfoNames {
346    inner: RwLock<ZoneInfoNamesInner>,
347}
348
349#[derive(Debug)]
350struct ZoneInfoNamesInner {
351    /// The directory from which we collected time zone names.
352    dir: PathBuf,
353    /// All available names from the `zoneinfo` directory.
354    ///
355    /// Each name corresponds to the suffix of a file path
356    /// starting with `dir`. For example, `America/New_York` in
357    /// `/usr/share/zoneinfo/America/New_York`. Each name also has a normalized
358    /// lowercase version of the name for easy case insensitive lookup.
359    names: Vec<ZoneInfoName>,
360    /// The expiration time of this cached value.
361    ///
362    /// Note that this is a necessary but not sufficient criterion for
363    /// invalidating the cached value.
364    ttl: Duration,
365    /// The time at which the data in `names` becomes stale.
366    expiration: Expiration,
367}
368
369impl ZoneInfoNames {
370    /// The default amount of time to wait before checking for added/removed
371    /// time zones.
372    ///
373    /// Note that this TTL is a necessary but not sufficient criterion to
374    /// provoke cache invalidation. Namely, since we don't expect the set of
375    /// possible time zone names to change often, we only invalidate the cache
376    /// under these circumstances:
377    ///
378    /// 1. The TTL or more has passed since the last time the names were
379    /// attempted to be refreshed (even if it wasn't successful).
380    /// 2. A name lookup is attempted and it isn't found. This is required
381    /// because otherwise there isn't much point in refreshing the names.
382    ///
383    /// This logic does not deal as well with removals from the underlying time
384    /// zone database. That in turn is covered by the TTL on constructing the
385    /// `TimeZone` values themselves.
386    ///
387    /// We could just use the second criterion on its own, but we require the
388    /// TTL to expire out of "good sense." Namely, if there is something borked
389    /// in the environment, the TTL will prevent doing a full scan of the
390    /// zoneinfo directory for every missed time zone lookup.
391    const DEFAULT_TTL: Duration = DEFAULT_TTL;
392
393    /// Create a new collection of names from the zoneinfo database directory
394    /// given.
395    ///
396    /// If no names of time zones with corresponding TZif data files could be
397    /// found in the given directory, then an error is returned.
398    fn new(dir: &Path) -> Result<ZoneInfoNames, Error> {
399        let names = walk(dir)?;
400        let dir = dir.to_path_buf();
401        let ttl = ZoneInfoNames::DEFAULT_TTL;
402        let expiration = Expiration::after(ttl);
403        let inner = ZoneInfoNamesInner { dir, names, ttl, expiration };
404        Ok(ZoneInfoNames { inner: RwLock::new(inner) })
405    }
406
407    /// Attempts to find the name entry for the given query using a case
408    /// insensitive search.
409    ///
410    /// If no match is found and the data is stale, then the time zone names
411    /// are refreshed from the file system before doing another check.
412    fn get(&self, query: &str) -> Option<ZoneInfoName> {
413        {
414            let inner = self.inner.read().unwrap();
415            if let Some(zone_info_name) = inner.get(query) {
416                return Some(zone_info_name);
417            }
418            drop(inner); // unlock
419        }
420        let mut inner = self.inner.write().unwrap();
421        inner.attempt_refresh();
422        inner.get(query)
423    }
424
425    /// Returns all available time zone names after attempting a refresh of
426    /// the underlying data if it's stale.
427    fn available(&self) -> Vec<String> {
428        let mut inner = self.inner.write().unwrap();
429        inner.attempt_refresh();
430        inner.available()
431    }
432
433    fn reset(&self) {
434        self.inner.write().unwrap().reset();
435    }
436}
437
438impl ZoneInfoNamesInner {
439    /// Attempts to find the name entry for the given query using a case
440    /// insensitive search.
441    ///
442    /// `None` is returned if one isn't found.
443    fn get(&self, query: &str) -> Option<ZoneInfoName> {
444        self.names
445            .binary_search_by(|n| {
446                utf8::cmp_ignore_ascii_case(&n.inner.lower, query)
447            })
448            .ok()
449            .map(|i| self.names[i].clone())
450    }
451
452    /// Returns all available time zone names.
453    fn available(&self) -> Vec<String> {
454        self.names.iter().map(|n| n.inner.original.clone()).collect()
455    }
456
457    /// Attempts a refresh, but only follows through if the TTL has been
458    /// exceeded.
459    ///
460    /// The caller must ensure that the other cache invalidation criteria
461    /// have been upheld. For example, this should only be called for a missed
462    /// zone name lookup.
463    fn attempt_refresh(&mut self) {
464        if self.expiration.is_expired() {
465            self.refresh();
466        }
467    }
468
469    /// Forcefully refreshes the cached names with possibly new data from disk.
470    /// If an error occurs when fetching the names, then no names are updated
471    /// (but the `expires_at` is updated). This will also emit a warning log on
472    /// failure.
473    fn refresh(&mut self) {
474        // PERF: Should we try to move this `walk` call to run outside of a
475        // lock? It probably happens pretty rarely, so it might not matter.
476        let result = walk(&self.dir);
477        self.expiration = Expiration::after(self.ttl);
478        match result {
479            Ok(names) => {
480                self.names = names;
481            }
482            Err(_err) => {
483                warn!(
484                    "failed to refresh zoneinfo time zone name cache \
485                     for {}: {_err}",
486                    self.dir.display(),
487                )
488            }
489        }
490    }
491
492    /// Resets the state such that the next lookup is guaranteed to force a
493    /// cache refresh, and that it is impossible for any data to be stale.
494    fn reset(&mut self) {
495        // This will force the next lookup to fail.
496        self.names.clear();
497        // And this will force the next failed lookup to result in a refresh.
498        self.expiration = Expiration::expired();
499    }
500}
501
502/// A single TZif entry in a zoneinfo database directory.
503#[derive(Clone, Debug)]
504struct ZoneInfoName {
505    inner: Arc<ZoneInfoNameInner>,
506}
507
508#[derive(Clone, Debug)]
509struct ZoneInfoNameInner {
510    /// A file path resolvable to the corresponding file relative to the
511    /// working directory of this program.
512    ///
513    /// Should we canonicalize this to a absolute path? I guess in practice it
514    /// is an absolute path in most cases.
515    full: PathBuf,
516    /// The original name of this time zone taken from the file path with
517    /// no additional changes.
518    original: String,
519    /// The lowercase version of `original`. This is how we determine name
520    /// equality.
521    lower: String,
522}
523
524impl ZoneInfoName {
525    /// Create a new time zone info name.
526    ///
527    /// `base` should corresponding to the zoneinfo directory from which the
528    /// suffix `time_zone_name` path was returned.
529    fn new(base: &Path, time_zone_name: &Path) -> Result<ZoneInfoName, Error> {
530        let full = base.join(time_zone_name);
531        let original = parse::os_str_utf8(time_zone_name.as_os_str())
532            .map_err(|err| err.path(base))?;
533        let lower = original.to_ascii_lowercase();
534        let inner =
535            ZoneInfoNameInner { full, original: original.to_string(), lower };
536        Ok(ZoneInfoName { inner: Arc::new(inner) })
537    }
538
539    /// Returns the original name of this time zone.
540    fn original(&self) -> &str {
541        &self.inner.original
542    }
543
544    /// Returns the lowercase name of this time zone.
545    fn lower(&self) -> &str {
546        &self.inner.lower
547    }
548}
549
550impl Eq for ZoneInfoName {}
551
552impl PartialEq for ZoneInfoName {
553    fn eq(&self, rhs: &ZoneInfoName) -> bool {
554        self.inner.lower == rhs.inner.lower
555    }
556}
557
558impl Ord for ZoneInfoName {
559    fn cmp(&self, rhs: &ZoneInfoName) -> core::cmp::Ordering {
560        self.inner.lower.cmp(&rhs.inner.lower)
561    }
562}
563
564impl PartialOrd for ZoneInfoName {
565    fn partial_cmp(&self, rhs: &ZoneInfoName) -> Option<core::cmp::Ordering> {
566        Some(self.cmp(rhs))
567    }
568}
569
570impl core::hash::Hash for ZoneInfoName {
571    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
572        self.inner.lower.hash(state);
573    }
574}
575
576/// Recursively walks the given directory and returns the names of all time
577/// zones found.
578///
579/// This is guaranteed to return either one or more time zone names OR an
580/// error. That is, `Ok(vec![])` is an impossible result.
581///
582/// This will attempt to collect as many names as possible, even if some I/O
583/// operations fail.
584///
585/// The names returned are sorted in lexicographic order according to the
586/// lowercase form of each name.
587fn walk(start: &Path) -> Result<Vec<ZoneInfoName>, Error> {
588    let mut first_err: Option<Error> = None;
589    let mut seterr = |path: &Path, err: Error| {
590        if first_err.is_none() {
591            first_err = Some(err.path(path));
592        }
593    };
594
595    let mut names = vec![];
596    let mut stack = vec![start.to_path_buf()];
597    while let Some(dir) = stack.pop() {
598        let readdir = match dir.read_dir() {
599            Ok(readdir) => readdir,
600            Err(err) => {
601                trace!(
602                    "error when reading {} as a directory: {err}",
603                    dir.display()
604                );
605                seterr(&dir, Error::io(err));
606                continue;
607            }
608        };
609        for result in readdir {
610            let dent = match result {
611                Ok(dent) => dent,
612                Err(err) => {
613                    trace!(
614                        "error when reading directory entry from {}: {err}",
615                        dir.display()
616                    );
617                    seterr(&dir, Error::io(err));
618                    continue;
619                }
620            };
621            let file_type = match dent.file_type() {
622                Ok(file_type) => file_type,
623                Err(err) => {
624                    let path = dent.path();
625                    trace!(
626                        "error when reading file type from {}: {err}",
627                        path.display()
628                    );
629                    seterr(&path, Error::io(err));
630                    continue;
631                }
632            };
633            let path = dent.path();
634            if file_type.is_dir() {
635                stack.push(path);
636                continue;
637            }
638            // We assume symlinks are files, although this may not be
639            // appropriate. If we need to also handle the case when they're
640            // directories, then we'll need to add symlink loop detection.
641            //
642            // Otherwise, at this point, we peek at the first few bytes of a
643            // file to do a low false positive and never false negative check
644            // for a TZif file.
645
646            let mut f = match File::open(&path) {
647                Ok(f) => f,
648                Err(err) => {
649                    trace!("failed to open {}: {err}", path.display());
650                    seterr(&path, Error::io(err));
651                    continue;
652                }
653            };
654            let mut buf = [0; 4];
655            if let Err(err) = f.read_exact(&mut buf) {
656                trace!(
657                    "failed to read first 4 bytes of {}: {err}",
658                    path.display()
659                );
660                seterr(&path, Error::io(err));
661                continue;
662            }
663            if !is_possibly_tzif(&buf) {
664                // This is a trace because it's perfectly normal for a
665                // non-TZif file to be in a zoneinfo directory. But it could
666                // still be potentially useful debugging info.
667                trace!(
668                    "found file {} that isn't TZif since its first \
669                     four bytes are {:?}",
670                    path.display(),
671                    crate::util::escape::Bytes(&buf),
672                );
673                continue;
674            }
675            let time_zone_name = match path.strip_prefix(start) {
676                Ok(time_zone_name) => time_zone_name,
677                Err(err) => {
678                    trace!(
679                        "failed to extract time zone name from {} \
680                         using {} as a base: {err}",
681                        path.display(),
682                        start.display(),
683                    );
684                    seterr(&path, Error::adhoc(err));
685                    continue;
686                }
687            };
688            let zone_info_name =
689                match ZoneInfoName::new(&start, time_zone_name) {
690                    Ok(zone_info_name) => zone_info_name,
691                    Err(err) => {
692                        seterr(&path, err);
693                        continue;
694                    }
695                };
696            names.push(zone_info_name);
697        }
698    }
699    if names.is_empty() {
700        let err = first_err
701            .take()
702            .unwrap_or_else(|| err!("{}: no TZif files", start.display()));
703        Err(err)
704    } else {
705        // If we found at least one valid name, then we declare success and
706        // drop any error we might have found. They do all get logged above
707        // though.
708        names.sort();
709        Ok(names)
710    }
711}
712
713#[cfg(test)]
714mod tests {
715    use super::*;
716
717    /// DEBUG COMMAND
718    ///
719    /// Takes environment variable `JIFF_DEBUG_ZONEINFO_DIR` as input and
720    /// prints a list of all time zone names in the directory (one per line).
721    ///
722    /// Callers may also set `RUST_LOG` to get extra debugging output.
723    #[test]
724    fn debug_zoneinfo_walk() -> anyhow::Result<()> {
725        let _ = crate::logging::Logger::init();
726
727        const ENV: &str = "JIFF_DEBUG_ZONEINFO_DIR";
728        let Some(val) = std::env::var_os(ENV) else { return Ok(()) };
729        let dir = PathBuf::from(val);
730        let names = walk(&dir)?;
731        for n in names {
732            std::eprintln!("{}", n.inner.original);
733        }
734        Ok(())
735    }
736}