Turi Create  4.0
fixed_size_cache_manager.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef FILEIO_FIXED_SIZE_CACHE_MANAGER_HPP
7 #define FILEIO_FIXED_SIZE_CACHE_MANAGER_HPP
8 
9 #include <vector>
10 #include <string>
11 #include <core/parallel/pthread_tools.hpp>
12 #include <unordered_map>
13 #include <core/parallel/atomic.hpp>
14 #include <core/storage/fileio/temp_files.hpp>
15 #include <core/storage/fileio/general_fstream_sink.hpp>
16 #include <core/storage/fileio/fileio_constants.hpp>
17 
18 namespace turi {
19 namespace fileio {
20 
21 // forward declaration
22 class fixed_size_cache_manager;
23 
24 typedef std::string cache_id_type;
25 /**
26  * \ingroup fileio
27  *
28  * A struct that contains either an array buffer or a file resource.
29  * This struct cannot be created by anything else other than the
30  * fixed_size_cache_manager.
31  *
32  * This struct is not generally thread safe.
33  */
34 struct cache_block {
35  private:
36  // Construct an in-memory cache block
37  cache_block(cache_id_type cache_id, size_t max_capacity,
38  fixed_size_cache_manager* owning_cache_manager);
39 
40  public:
41  cache_block(const cache_block&) = delete;
42  cache_block& operator=(const cache_block&) = delete;
43 
44  /**
45  * If this points to an in-memory cache, attempts to extend the in-memory
46  * cache have capacity at least new_capacity. Returns true on success,
47  * false on failure. If false, this means that the cache block size has
48  * reached the maximum capacity permitted.
49  */
50  bool extend_capacity(size_t new_capacity);
51 
52  inline cache_id_type get_cache_id() const { return cache_id; }
53 
54  /**
55  * Returns true if this points to an in memory cache.
56  */
57  inline bool is_pointer() const { return filename.empty(); }
58 
59  /**
60  * Returns true if this points to a file
61  */
62  inline bool is_file() const { return !filename.empty(); }
63 
64  /**
65  * Returns the pointer to the in memory cache.
66  */
67  inline char* get_pointer() const { return data; }
68 
69  /**
70  * Returns the total capacity of the in memory cache
71  */
72  inline const size_t get_pointer_capacity() const { return capacity; }
73 
74  /**
75  * Returns the used capacity of the in memory cache
76  */
77  inline const size_t get_pointer_size() const { return size; }
78 
79  /**
80  * Returns the disk backed filename.
81  */
82  inline const std::string& get_filename() const { return filename; }
83 
84  /**
85  * If this is an in memory cache, writes bufsize bytes to it. Returns true
86  * on success, false on failure.
87  */
88  inline bool write_bytes_to_memory_cache(const char* c,
89  std::streamsize bufsize) {
90  if (data == NULL) return false;
91  // either we have enough capacity
92  // or we are able to extend enough capacity to write it
93  if (size + bufsize <= capacity || extend_capacity(size + bufsize)) {
94  memcpy(data + size, c, bufsize);
95  size += bufsize;
96  return true;
97  } else {
98  return false;
99  }
100  }
101 
102  /**
103  * If this cache block is an in memory cache, dumps it to a file returning
104  * the output file handle. Fails if the cache block is not an in memory cache.
105  * Thread safe.
106  */
107  std::shared_ptr<fileio_impl::general_fstream_sink> write_to_file();
108 
109  /**
110  * Destructor. Clears all the memory in the cache block.
111  */
112  ~cache_block();
113 
114  private:
115  // id of the block
116  cache_id_type cache_id = 0;
117  // maximum capacity we are allows to resize to
118  size_t maximum_capacity = 0;
119  // current capacity of the data in memory
120  size_t capacity = 0;
121  // actual content size
122  size_t size = 0;
123  // begin of the data in memory
124  char* data = NULL;
125  // name of the file on disk
126  std::string filename;
127  // the cache manager which created this block
128  fixed_size_cache_manager* owning_cache_manager = NULL;
129 
130  /**
131  * Clears, and reinitializes the cache block with a new maximum capacity.
132  */
133  void initialize_memory(size_t max_capacity);
134  /**
135  * If this points to an in memory cache, release the memory in it.
136  */
137  void release_memory();
138 
139  /**
140  * If this points to an in memory cache, release the memory.
141  * If this points to a file, delete it.
142  */
143  void clear();
144 
145  friend class fixed_size_cache_manager;
146 };
147 
148 /**
149  * \ingroup fileio
150  *
151  * A global singleton object managing the allocation/deallocation
152  * of cache blocks. The basic mechanism of operation is such:
153  *
154  * - For every new cache block requested:
155  * - If there is FILEIO_MAXIMUM_CACHE_CAPACITY_PER_FILE free bytes,
156  * a new cache block of FILEIO_INITIAL_CAPACITY_PER_FILE is allocated,
157  * where the new cache block is permitted to grow up to
158  * FILEIO_MAXIMUM_CACHE_CAPACITY_PER_FILE. The capacity is not charged as
159  * utilization until it is actually used. i.e. utilization is only incremented
160  * by FILEIO_INITIAL_CACHE_CAPACITY_PER_FILE. Then as more memory is allocated
161  * for the cache, then utilization is incremented again.
162  * - If there is < FILEIO_MAXIMUM_CACHE_CAPACITY_PER_FILE free bytes
163  * available: The largest cache block is evicted. If there is
164  * FILEIO_MAXIMUM_CACHE_CAPACITY_PER_FILE free bytes, Goto the first case.
165  * Otherwise, create a new cache block with all the remaining free bytes.
166  *
167  * The relevant constants are thus:
168  * FILEIO_MAXIMUM_CACHE_CAPACITY : the maximum total size of all cache blocks
169  * FILEIO_MAXIMUM_CACHE_CAPACITY_PER_FILE : the maximum size of each cache
170  * blocks FILEIO_INITIAL_CAPACITY_PER_FILE : the initial size of each cache
171  * blocks
172  *
173  * Overcommit Behavior
174  * -------------------
175  * We try our best to maintain cache utilization below the maximum. However,
176  * it is possible to exceed maximum cache utilization under certain race
177  * conditions since we avoid locking on the cache utilization counter.
178  */
180  public:
181  static fixed_size_cache_manager& get_instance();
182  // for dependency injection
183  static std::shared_ptr<const fixed_size_cache_manager> hold_instance();
184 
185  /**
186  * Returns a temporary cache id that is not yet used by the
187  * manager.
188  */
189  cache_id_type get_temp_cache_id(std::string suffix = "");
190 
191  /**
192  * Allocate a new cache block of up to some maximum capcity.
193  * If the cache_id already exists, the data will be lost.
194  *
195  * Returns a pointer to the cache block.
196  * Thread safe.
197  */
198  std::shared_ptr<cache_block> new_cache(cache_id_type cache_id);
199 
200  /**
201  * Returns the pointer to the cache_block assocaited with the cache_id,
202  * Throws std::out_of_range if the cache_id does not exist.
203  *
204  * Thread safe.
205  */
206  std::shared_ptr<cache_block> get_cache(cache_id_type cache_id);
207 
208  /**
209  * Free the data in the cache block. Delete the allocated memory or temp file
210  * associated with the cache.
211  *
212  * NOT thread safe to call on the same block.
213  */
214  void free(std::shared_ptr<cache_block> block);
215 
216  /**
217  * Clear all cache blocks in the manager. Reset to initial state.
218  */
219  void clear();
220 
221  /**
222  * Returns the amount of memory being used by the caches.
223  */
224  inline size_t get_cache_utilization() {
225  return current_cache_utilization.value;
226  }
227 
228  private:
230 
231  public:
233 
234  private:
235  fixed_size_cache_manager(const fixed_size_cache_manager& other) = delete;
236 
237  fixed_size_cache_manager& operator=(const fixed_size_cache_manager& other) =
238  delete;
239 
240  private:
241  size_t temp_cache_counter = 0;
242 
243  atomic<size_t> current_cache_utilization;
244 
246  std::unordered_map<std::string, std::shared_ptr<cache_block> > cache_blocks;
247 
248  /**
249  * Increments cache utilization counter
250  */
251  void increment_utilization(ssize_t increment);
252  /**
253  * Decrements cache utilization counter
254  */
255  void decrement_utilization(ssize_t decrement);
256 
257  /**
258  * Tries to evict some stuff out of cache.
259  * Lock must be acquired when this function is called.
260  */
261  void try_cache_evict();
262 
263  friend struct cache_block;
264 };
265 
266 } // namespace fileio
267 } // namespace turi
268 #endif
const std::string & get_filename() const
const size_t get_pointer_capacity() const
bool write_bytes_to_memory_cache(const char *c, std::streamsize bufsize)
bool extend_capacity(size_t new_capacity)
std::shared_ptr< fileio_impl::general_fstream_sink > write_to_file()