Turi Create  4.0
file_download_cache.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_FILEIO_FILE_DOWNLOAD_CACHE_HPP
7 #define TURI_FILEIO_FILE_DOWNLOAD_CACHE_HPP
8 #include <string>
9 #include <unordered_map>
10 // this is a circular dependency that needs to be eliminated
11 #include <core/parallel/mutex.hpp>
12 namespace turi {
13 
14 /**
15  * \ingroup fileio
16  * Provides URL download and caching capabilities.
17  * \ref file_download_cache::get_instance() provides a singleton instance of
18  * the file_download_cache. Using the file_download_cache simply involves
19  * calling the \ref file_download_cache::get_file function which take as its
20  * first argument, a general URL (s3, https, http, file, etc) and returns
21  * a local file name which can be used to access the file downloaded from the
22  * URL.
23  *
24  * The file_download_cache will cache all the temporary files and avoid
25  * re-downloading identical URLs. \ref file_download_cache::release_cache
26  * can be used to force a file to be uncached.
27  *
28  * \ref get_file() is safe to use concurrently. \ref release_cache() has to be
29  * used carefully since there are race condition concerns if the downloaded
30  * file is still being used by another thread.
31  *
32  * For s3 files, cache will be updated based on last modification time.
33  */
35  public:
36  /// deletes all downloaded temporary files
38 
39  /**
40  * downloads the URL (it can be s3, https, http, file, or even local file)
41  * and returns a local file name at which the contents at the URL can be
42  * read from.
43  *
44  * This function can be safely run in parallel. Though if the same file is
45  * requested in two threads simultaneously, the file may be downloaded
46  * twice.
47  *
48  * May throw exceptions if the URL cannot be downloaded.
49  */
50  std::string get_file(const std::string& url);
51 
52  /**
53  * Releases the cached copy of the contents of a given URL.
54  *
55  * This function can be safely run in parallel, but there is the risk that
56  * the file may will be used / referenced by another thread. The caller must
57  * be careful to guarantee that the local file can be deleted.
58  */
59  void release_cache(const std::string& url);
60 
61  /**
62  * Release all cached files
63  */
64  void clear();
65 
66  /**
67  * Obtains the global singleton instance of the file download cache
68  */
70 
71  private:
72  struct file_metadata {
73  std::string filename;
74  std::string last_modified;
75  };
76 
77  private:
78  std::unordered_map<std::string, file_metadata> url_to_file;
79  mutex lock;
80 };
81 
82 
83 } // namespace turi
84 #endif
static file_download_cache & get_instance()
void release_cache(const std::string &url)
~file_download_cache()
deletes all downloaded temporary files
std::string get_file(const std::string &url)