jiff/tz/db/zoneinfo/enabled.rs
1use alloc::{
2 string::{String, ToString},
3 vec,
4 vec::Vec,
5};
6
7use std::{
8 fs::File,
9 io::Read,
10 path::{Path, PathBuf},
11 sync::{Arc, RwLock},
12 time::Duration,
13};
14
15use crate::{
16 error::{err, Error},
17 timestamp::Timestamp,
18 tz::{tzif::is_possibly_tzif, TimeZone, TimeZoneNameIter},
19 util::{self, cache::Expiration, parse, utf8},
20};
21
22const DEFAULT_TTL: Duration = Duration::new(5 * 60, 0);
23
24static ZONEINFO_DIRECTORIES: &[&str] =
25 &["/usr/share/zoneinfo", "/etc/zoneinfo"];
26
27pub(crate) struct Database {
28 dir: Option<PathBuf>,
29 names: Option<ZoneInfoNames>,
30 zones: RwLock<CachedZones>,
31}
32
33impl Database {
34 pub(crate) fn from_env() -> Database {
35 if let Some(tzdir) = std::env::var_os("TZDIR") {
36 let tzdir = PathBuf::from(tzdir);
37 trace!("opening zoneinfo database at TZDIR={}", tzdir.display());
38 match Database::from_dir(&tzdir) {
39 Ok(db) => return db,
40 Err(_err) => {
41 // This is a WARN because it represents a failure to
42 // satisfy a more direct request, which should be louder
43 // than failures related to auto-detection.
44 warn!("failed opening TZDIR={}: {_err}", tzdir.display());
45 // fall through to attempt default directories
46 }
47 }
48 }
49 for dir in ZONEINFO_DIRECTORIES {
50 let tzdir = Path::new(dir);
51 trace!("opening zoneinfo database at {}", tzdir.display());
52 match Database::from_dir(&tzdir) {
53 Ok(db) => return db,
54 Err(_err) => {
55 trace!("failed opening {}: {_err}", tzdir.display());
56 }
57 }
58 }
59 debug!(
60 "could not find zoneinfo database at any of the following \
61 paths: {}",
62 ZONEINFO_DIRECTORIES.join(", "),
63 );
64 Database::none()
65 }
66
67 pub(crate) fn from_dir(dir: &Path) -> Result<Database, Error> {
68 let names = Some(ZoneInfoNames::new(dir)?);
69 let zones = RwLock::new(CachedZones::new());
70 Ok(Database { dir: Some(dir.to_path_buf()), names, zones })
71 }
72
73 /// Creates a "dummy" zoneinfo database in which all lookups fail.
74 pub(crate) fn none() -> Database {
75 let dir = None;
76 let names = None;
77 let zones = RwLock::new(CachedZones::new());
78 Database { dir, names, zones }
79 }
80
81 pub(crate) fn reset(&self) {
82 let mut zones = self.zones.write().unwrap();
83 if let Some(ref names) = self.names {
84 names.reset();
85 }
86 zones.reset();
87 }
88
89 pub(crate) fn get(&self, query: &str) -> Option<TimeZone> {
90 // We just always assume UTC exists and map it to our special const
91 // TimeZone::UTC value.
92 if query == "UTC" {
93 return Some(TimeZone::UTC);
94 }
95 // Similarly for the special `Etc/Unknown` value.
96 if query == "Etc/Unknown" {
97 return Some(TimeZone::unknown());
98 }
99 // If we couldn't build any time zone names, then every lookup will
100 // fail. So just bail now.
101 let names = self.names.as_ref()?;
102 // The fast path is when the query matches a pre-existing unexpired
103 // time zone.
104 {
105 let zones = self.zones.read().unwrap();
106 if let Some(czone) = zones.get(query) {
107 if !czone.is_expired() {
108 trace!(
109 "for time zone query `{query}`, \
110 found cached zone `{}` \
111 (expiration={}, last_modified={:?})",
112 czone.tz.diagnostic_name(),
113 czone.expiration,
114 czone.last_modified,
115 );
116 return Some(czone.tz.clone());
117 }
118 }
119 }
120 // At this point, one of three possible cases is true:
121 //
122 // 1. The given query does not match any time zone in this database.
123 // 2. A time zone exists, but isn't cached.
124 // 3. A zime exists and is cached, but needs to be revalidated.
125 //
126 // While (3) is probably the common case since our TTLs are pretty
127 // short, both (2) and (3) require write access. Thus we rule out (1)
128 // before acquiring a write lock on the entire database. Plus, we'll
129 // need the zone info for case (2) and possibly for (3) if cache
130 // revalidation fails.
131 //
132 // I feel kind of bad about all this because it seems to me like there
133 // is too much work being done while holding on to the write lock.
134 // In particular, it seems like bad juju to do any I/O of any kind
135 // while holding any lock at all. I think I could design something
136 // that avoids doing I/O while holding a lock, but it seems a lot more
137 // complicated. (And what happens if the I/O becomes outdated by the
138 // time you acquire the lock?)
139 let info = names.get(query)?;
140 let mut zones = self.zones.write().unwrap();
141 let ttl = zones.ttl;
142 match zones.get_zone_index(query) {
143 Ok(i) => {
144 let czone = &mut zones.zones[i];
145 if czone.revalidate(&info, ttl) {
146 // Metadata on the file didn't change, so we assume the
147 // file hasn't either.
148 return Some(czone.tz.clone());
149 }
150 // Revalidation failed. Re-read the TZif data.
151 let czone = match CachedTimeZone::new(&info, zones.ttl) {
152 Ok(czone) => czone,
153 Err(_err) => {
154 warn!(
155 "failed to re-cache time zone from file {}: {_err}",
156 info.inner.full.display(),
157 );
158 return None;
159 }
160 };
161 let tz = czone.tz.clone();
162 zones.zones[i] = czone;
163 Some(tz)
164 }
165 Err(i) => {
166 let czone = match CachedTimeZone::new(&info, ttl) {
167 Ok(czone) => czone,
168 Err(_err) => {
169 warn!(
170 "failed to cache time zone from file {}: {_err}",
171 info.inner.full.display(),
172 );
173 return None;
174 }
175 };
176 let tz = czone.tz.clone();
177 zones.zones.insert(i, czone);
178 Some(tz)
179 }
180 }
181 }
182
183 pub(crate) fn available<'d>(&'d self) -> TimeZoneNameIter<'d> {
184 let Some(names) = self.names.as_ref() else {
185 return TimeZoneNameIter::empty();
186 };
187 TimeZoneNameIter::from_iter(names.available().into_iter())
188 }
189
190 pub(crate) fn is_definitively_empty(&self) -> bool {
191 self.names.is_none()
192 }
193}
194
195impl core::fmt::Debug for Database {
196 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
197 write!(f, "ZoneInfo(")?;
198 if let Some(ref dir) = self.dir {
199 write!(f, "{}", dir.display())?;
200 } else {
201 write!(f, "unavailable")?;
202 }
203 write!(f, ")")
204 }
205}
206
207#[derive(Debug)]
208struct CachedZones {
209 zones: Vec<CachedTimeZone>,
210 ttl: Duration,
211}
212
213impl CachedZones {
214 const DEFAULT_TTL: Duration = DEFAULT_TTL;
215
216 fn new() -> CachedZones {
217 CachedZones { zones: vec![], ttl: CachedZones::DEFAULT_TTL }
218 }
219
220 fn get(&self, query: &str) -> Option<&CachedTimeZone> {
221 self.get_zone_index(query).ok().map(|i| &self.zones[i])
222 }
223
224 fn get_zone_index(&self, query: &str) -> Result<usize, usize> {
225 // The common case is that our query matches the time zone name case
226 // sensitively, so check for that first. It's a bit cheaper than doing
227 // a case insensitive search.
228 if let Ok(i) = self
229 .zones
230 .binary_search_by(|zone| zone.name.original().cmp(&query))
231 {
232 return Ok(i);
233 }
234 self.zones.binary_search_by(|zone| {
235 utf8::cmp_ignore_ascii_case(zone.name.lower(), query)
236 })
237 }
238
239 fn reset(&mut self) {
240 self.zones.clear();
241 }
242}
243
244#[derive(Clone, Debug)]
245struct CachedTimeZone {
246 tz: TimeZone,
247 name: ZoneInfoName,
248 expiration: Expiration,
249 last_modified: Option<Timestamp>,
250}
251
252impl CachedTimeZone {
253 /// Create a new cached time zone.
254 ///
255 /// The `info` says which time zone to create and where to find it. The
256 /// `ttl` says how long the cached time zone should minimally remain fresh
257 /// for.
258 fn new(
259 info: &ZoneInfoName,
260 ttl: Duration,
261 ) -> Result<CachedTimeZone, Error> {
262 let path = &info.inner.full;
263 let mut file =
264 File::open(path).map_err(|e| Error::io(e).path(path))?;
265 let mut data = vec![];
266 file.read_to_end(&mut data).map_err(|e| Error::io(e).path(path))?;
267 let tz = TimeZone::tzif(&info.inner.original, &data)
268 .map_err(|e| e.path(path))?;
269 let name = info.clone();
270 let last_modified = util::fs::last_modified_from_file(path, &file);
271 let expiration = Expiration::after(ttl);
272 Ok(CachedTimeZone { tz, name, expiration, last_modified })
273 }
274
275 /// Returns true if this time zone has gone stale and should, at minimum,
276 /// be revalidated.
277 fn is_expired(&self) -> bool {
278 self.expiration.is_expired()
279 }
280
281 /// Attempts to revalidate this cached time zone.
282 ///
283 /// Upon successful revalidation (that is, the cached time zone is still
284 /// fresh and okay to use), this returns true. Otherwise, the cached time
285 /// zone should be considered stale and must be re-created.
286 ///
287 /// Note that technically another layer of revalidation could be done.
288 /// For example, we could keep a checksum of the TZif data, and only
289 /// consider rebuilding the time zone when the checksum changes. But I
290 /// think the last modified metadata will in practice be good enough, and
291 /// parsing a TZif file should be quite fast.
292 fn revalidate(&mut self, info: &ZoneInfoName, ttl: Duration) -> bool {
293 // If we started with no last modified timestamp, then I guess we
294 // should always fail revalidation? I suppose a case could be made to
295 // do the opposite: always pass revalidation.
296 let Some(old_last_modified) = self.last_modified else {
297 trace!(
298 "revalidation for {} failed because old last modified time \
299 is unavailable",
300 info.inner.full.display(),
301 );
302 return false;
303 };
304 let Some(new_last_modified) =
305 util::fs::last_modified_from_path(&info.inner.full)
306 else {
307 trace!(
308 "revalidation for {} failed because new last modified time \
309 is unavailable",
310 info.inner.full.display(),
311 );
312 return false;
313 };
314 // We consider any change to invalidate cache.
315 if old_last_modified != new_last_modified {
316 trace!(
317 "revalidation for {} failed because last modified times \
318 do not match: old = {} != {} = new",
319 info.inner.full.display(),
320 old_last_modified,
321 new_last_modified,
322 );
323 return false;
324 }
325 trace!(
326 "revalidation for {} succeeded because last modified times \
327 match: old = {} == {} = new",
328 info.inner.full.display(),
329 old_last_modified,
330 new_last_modified,
331 );
332 self.expiration = Expiration::after(ttl);
333 true
334 }
335}
336
337/// A collection of time zone names extracted from a zoneinfo directory.
338///
339/// Each time zone name maps to a full path on the file system corresponding
340/// to the TZif formatted data file for that time zone.
341///
342/// This type is responsible not just for providing the names, but also for
343/// updating them periodically.
344#[derive(Debug)]
345struct ZoneInfoNames {
346 inner: RwLock<ZoneInfoNamesInner>,
347}
348
349#[derive(Debug)]
350struct ZoneInfoNamesInner {
351 /// The directory from which we collected time zone names.
352 dir: PathBuf,
353 /// All available names from the `zoneinfo` directory.
354 ///
355 /// Each name corresponds to the suffix of a file path
356 /// starting with `dir`. For example, `America/New_York` in
357 /// `/usr/share/zoneinfo/America/New_York`. Each name also has a normalized
358 /// lowercase version of the name for easy case insensitive lookup.
359 names: Vec<ZoneInfoName>,
360 /// The expiration time of this cached value.
361 ///
362 /// Note that this is a necessary but not sufficient criterion for
363 /// invalidating the cached value.
364 ttl: Duration,
365 /// The time at which the data in `names` becomes stale.
366 expiration: Expiration,
367}
368
369impl ZoneInfoNames {
370 /// The default amount of time to wait before checking for added/removed
371 /// time zones.
372 ///
373 /// Note that this TTL is a necessary but not sufficient criterion to
374 /// provoke cache invalidation. Namely, since we don't expect the set of
375 /// possible time zone names to change often, we only invalidate the cache
376 /// under these circumstances:
377 ///
378 /// 1. The TTL or more has passed since the last time the names were
379 /// attempted to be refreshed (even if it wasn't successful).
380 /// 2. A name lookup is attempted and it isn't found. This is required
381 /// because otherwise there isn't much point in refreshing the names.
382 ///
383 /// This logic does not deal as well with removals from the underlying time
384 /// zone database. That in turn is covered by the TTL on constructing the
385 /// `TimeZone` values themselves.
386 ///
387 /// We could just use the second criterion on its own, but we require the
388 /// TTL to expire out of "good sense." Namely, if there is something borked
389 /// in the environment, the TTL will prevent doing a full scan of the
390 /// zoneinfo directory for every missed time zone lookup.
391 const DEFAULT_TTL: Duration = DEFAULT_TTL;
392
393 /// Create a new collection of names from the zoneinfo database directory
394 /// given.
395 ///
396 /// If no names of time zones with corresponding TZif data files could be
397 /// found in the given directory, then an error is returned.
398 fn new(dir: &Path) -> Result<ZoneInfoNames, Error> {
399 let names = walk(dir)?;
400 let dir = dir.to_path_buf();
401 let ttl = ZoneInfoNames::DEFAULT_TTL;
402 let expiration = Expiration::after(ttl);
403 let inner = ZoneInfoNamesInner { dir, names, ttl, expiration };
404 Ok(ZoneInfoNames { inner: RwLock::new(inner) })
405 }
406
407 /// Attempts to find the name entry for the given query using a case
408 /// insensitive search.
409 ///
410 /// If no match is found and the data is stale, then the time zone names
411 /// are refreshed from the file system before doing another check.
412 fn get(&self, query: &str) -> Option<ZoneInfoName> {
413 {
414 let inner = self.inner.read().unwrap();
415 if let Some(zone_info_name) = inner.get(query) {
416 return Some(zone_info_name);
417 }
418 drop(inner); // unlock
419 }
420 let mut inner = self.inner.write().unwrap();
421 inner.attempt_refresh();
422 inner.get(query)
423 }
424
425 /// Returns all available time zone names after attempting a refresh of
426 /// the underlying data if it's stale.
427 fn available(&self) -> Vec<String> {
428 let mut inner = self.inner.write().unwrap();
429 inner.attempt_refresh();
430 inner.available()
431 }
432
433 fn reset(&self) {
434 self.inner.write().unwrap().reset();
435 }
436}
437
438impl ZoneInfoNamesInner {
439 /// Attempts to find the name entry for the given query using a case
440 /// insensitive search.
441 ///
442 /// `None` is returned if one isn't found.
443 fn get(&self, query: &str) -> Option<ZoneInfoName> {
444 self.names
445 .binary_search_by(|n| {
446 utf8::cmp_ignore_ascii_case(&n.inner.lower, query)
447 })
448 .ok()
449 .map(|i| self.names[i].clone())
450 }
451
452 /// Returns all available time zone names.
453 fn available(&self) -> Vec<String> {
454 self.names.iter().map(|n| n.inner.original.clone()).collect()
455 }
456
457 /// Attempts a refresh, but only follows through if the TTL has been
458 /// exceeded.
459 ///
460 /// The caller must ensure that the other cache invalidation criteria
461 /// have been upheld. For example, this should only be called for a missed
462 /// zone name lookup.
463 fn attempt_refresh(&mut self) {
464 if self.expiration.is_expired() {
465 self.refresh();
466 }
467 }
468
469 /// Forcefully refreshes the cached names with possibly new data from disk.
470 /// If an error occurs when fetching the names, then no names are updated
471 /// (but the `expires_at` is updated). This will also emit a warning log on
472 /// failure.
473 fn refresh(&mut self) {
474 // PERF: Should we try to move this `walk` call to run outside of a
475 // lock? It probably happens pretty rarely, so it might not matter.
476 let result = walk(&self.dir);
477 self.expiration = Expiration::after(self.ttl);
478 match result {
479 Ok(names) => {
480 self.names = names;
481 }
482 Err(_err) => {
483 warn!(
484 "failed to refresh zoneinfo time zone name cache \
485 for {}: {_err}",
486 self.dir.display(),
487 )
488 }
489 }
490 }
491
492 /// Resets the state such that the next lookup is guaranteed to force a
493 /// cache refresh, and that it is impossible for any data to be stale.
494 fn reset(&mut self) {
495 // This will force the next lookup to fail.
496 self.names.clear();
497 // And this will force the next failed lookup to result in a refresh.
498 self.expiration = Expiration::expired();
499 }
500}
501
502/// A single TZif entry in a zoneinfo database directory.
503#[derive(Clone, Debug)]
504struct ZoneInfoName {
505 inner: Arc<ZoneInfoNameInner>,
506}
507
508#[derive(Clone, Debug)]
509struct ZoneInfoNameInner {
510 /// A file path resolvable to the corresponding file relative to the
511 /// working directory of this program.
512 ///
513 /// Should we canonicalize this to a absolute path? I guess in practice it
514 /// is an absolute path in most cases.
515 full: PathBuf,
516 /// The original name of this time zone taken from the file path with
517 /// no additional changes.
518 original: String,
519 /// The lowercase version of `original`. This is how we determine name
520 /// equality.
521 lower: String,
522}
523
524impl ZoneInfoName {
525 /// Create a new time zone info name.
526 ///
527 /// `base` should corresponding to the zoneinfo directory from which the
528 /// suffix `time_zone_name` path was returned.
529 fn new(base: &Path, time_zone_name: &Path) -> Result<ZoneInfoName, Error> {
530 let full = base.join(time_zone_name);
531 let original = parse::os_str_utf8(time_zone_name.as_os_str())
532 .map_err(|err| err.path(base))?;
533 let lower = original.to_ascii_lowercase();
534 let inner =
535 ZoneInfoNameInner { full, original: original.to_string(), lower };
536 Ok(ZoneInfoName { inner: Arc::new(inner) })
537 }
538
539 /// Returns the original name of this time zone.
540 fn original(&self) -> &str {
541 &self.inner.original
542 }
543
544 /// Returns the lowercase name of this time zone.
545 fn lower(&self) -> &str {
546 &self.inner.lower
547 }
548}
549
550impl Eq for ZoneInfoName {}
551
552impl PartialEq for ZoneInfoName {
553 fn eq(&self, rhs: &ZoneInfoName) -> bool {
554 self.inner.lower == rhs.inner.lower
555 }
556}
557
558impl Ord for ZoneInfoName {
559 fn cmp(&self, rhs: &ZoneInfoName) -> core::cmp::Ordering {
560 self.inner.lower.cmp(&rhs.inner.lower)
561 }
562}
563
564impl PartialOrd for ZoneInfoName {
565 fn partial_cmp(&self, rhs: &ZoneInfoName) -> Option<core::cmp::Ordering> {
566 Some(self.cmp(rhs))
567 }
568}
569
570impl core::hash::Hash for ZoneInfoName {
571 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
572 self.inner.lower.hash(state);
573 }
574}
575
576/// Recursively walks the given directory and returns the names of all time
577/// zones found.
578///
579/// This is guaranteed to return either one or more time zone names OR an
580/// error. That is, `Ok(vec![])` is an impossible result.
581///
582/// This will attempt to collect as many names as possible, even if some I/O
583/// operations fail.
584///
585/// The names returned are sorted in lexicographic order according to the
586/// lowercase form of each name.
587fn walk(start: &Path) -> Result<Vec<ZoneInfoName>, Error> {
588 let mut first_err: Option<Error> = None;
589 let mut seterr = |path: &Path, err: Error| {
590 if first_err.is_none() {
591 first_err = Some(err.path(path));
592 }
593 };
594
595 let mut names = vec![];
596 let mut stack = vec![start.to_path_buf()];
597 while let Some(dir) = stack.pop() {
598 let readdir = match dir.read_dir() {
599 Ok(readdir) => readdir,
600 Err(err) => {
601 trace!(
602 "error when reading {} as a directory: {err}",
603 dir.display()
604 );
605 seterr(&dir, Error::io(err));
606 continue;
607 }
608 };
609 for result in readdir {
610 let dent = match result {
611 Ok(dent) => dent,
612 Err(err) => {
613 trace!(
614 "error when reading directory entry from {}: {err}",
615 dir.display()
616 );
617 seterr(&dir, Error::io(err));
618 continue;
619 }
620 };
621 let file_type = match dent.file_type() {
622 Ok(file_type) => file_type,
623 Err(err) => {
624 let path = dent.path();
625 trace!(
626 "error when reading file type from {}: {err}",
627 path.display()
628 );
629 seterr(&path, Error::io(err));
630 continue;
631 }
632 };
633 let path = dent.path();
634 if file_type.is_dir() {
635 stack.push(path);
636 continue;
637 }
638 // We assume symlinks are files, although this may not be
639 // appropriate. If we need to also handle the case when they're
640 // directories, then we'll need to add symlink loop detection.
641 //
642 // Otherwise, at this point, we peek at the first few bytes of a
643 // file to do a low false positive and never false negative check
644 // for a TZif file.
645
646 let mut f = match File::open(&path) {
647 Ok(f) => f,
648 Err(err) => {
649 trace!("failed to open {}: {err}", path.display());
650 seterr(&path, Error::io(err));
651 continue;
652 }
653 };
654 let mut buf = [0; 4];
655 if let Err(err) = f.read_exact(&mut buf) {
656 trace!(
657 "failed to read first 4 bytes of {}: {err}",
658 path.display()
659 );
660 seterr(&path, Error::io(err));
661 continue;
662 }
663 if !is_possibly_tzif(&buf) {
664 // This is a trace because it's perfectly normal for a
665 // non-TZif file to be in a zoneinfo directory. But it could
666 // still be potentially useful debugging info.
667 trace!(
668 "found file {} that isn't TZif since its first \
669 four bytes are {:?}",
670 path.display(),
671 crate::util::escape::Bytes(&buf),
672 );
673 continue;
674 }
675 let time_zone_name = match path.strip_prefix(start) {
676 Ok(time_zone_name) => time_zone_name,
677 Err(err) => {
678 trace!(
679 "failed to extract time zone name from {} \
680 using {} as a base: {err}",
681 path.display(),
682 start.display(),
683 );
684 seterr(&path, Error::adhoc(err));
685 continue;
686 }
687 };
688 let zone_info_name =
689 match ZoneInfoName::new(&start, time_zone_name) {
690 Ok(zone_info_name) => zone_info_name,
691 Err(err) => {
692 seterr(&path, err);
693 continue;
694 }
695 };
696 names.push(zone_info_name);
697 }
698 }
699 if names.is_empty() {
700 let err = first_err
701 .take()
702 .unwrap_or_else(|| err!("{}: no TZif files", start.display()));
703 Err(err)
704 } else {
705 // If we found at least one valid name, then we declare success and
706 // drop any error we might have found. They do all get logged above
707 // though.
708 names.sort();
709 Ok(names)
710 }
711}
712
713#[cfg(test)]
714mod tests {
715 use super::*;
716
717 /// DEBUG COMMAND
718 ///
719 /// Takes environment variable `JIFF_DEBUG_ZONEINFO_DIR` as input and
720 /// prints a list of all time zone names in the directory (one per line).
721 ///
722 /// Callers may also set `RUST_LOG` to get extra debugging output.
723 #[test]
724 fn debug_zoneinfo_walk() -> anyhow::Result<()> {
725 let _ = crate::logging::Logger::init();
726
727 const ENV: &str = "JIFF_DEBUG_ZONEINFO_DIR";
728 let Some(val) = std::env::var_os(ENV) else { return Ok(()) };
729 let dir = PathBuf::from(val);
730 let names = walk(&dir)?;
731 for n in names {
732 std::eprintln!("{}", n.inner.original);
733 }
734 Ok(())
735 }
736}