Turi Create  4.0
sgraph_fast_triple_apply.hpp
1 /* Copyright © 2017 Apple Inc. All rights reserved.
2  *
3  * Use of this source code is governed by a BSD-3-clause license that can
4  * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
5  */
6 #ifndef TURI_SGRAPH_SGRAPH_FAST_TRIPLE_APPLY
7 #define TURI_SGRAPH_SGRAPH_FAST_TRIPLE_APPLY
8 
9 #include<core/data/flexible_type/flexible_type.hpp>
10 #include<core/storage/sgraph_data/sgraph.hpp>
11 #include<core/storage/sgraph_data/sgraph_compute_vertex_block.hpp>
12 
13 namespace turi {
14 
15 
16 /**
17  * \ingroup sgraph_physical
18  * \addtogroup sgraph_compute SGraph Compute
19  * \{
20  */
21 
22 /**
23  * Graph Computation Functions
24  */
25 namespace sgraph_compute {
26 
27 // Vertex address is represented by its partition id, and local index in the partition.
28 struct vertex_address {
29  size_t partition_id;
30  size_t local_id;
31 };
32 
33 typedef std::vector<flexible_type> edge_data;
34 
35 /**
36  * Provide access to an edge scope (Vertex, Edge, Vertex);
37  * The scope object permits read, modify both vertex data
38  * and the edge data. See \ref fast_triple_apply
39  */
41  public:
42  /// Provide edge data access
43  edge_data& edge() { return *m_edge; }
44 
45  const edge_data& edge() const { return *m_edge; }
46 
47  vertex_address source_vertex_address() { return m_source_addr; }
48 
49  vertex_address target_vertex_address() { return m_target_addr; }
50 
51  /// Do not construct edge_scope directly. Used by triple_apply_impl.
52  fast_edge_scope(const vertex_address& source_addr,
53  const vertex_address& target_addr,
54  edge_data* edge) :
55  m_source_addr(source_addr), m_target_addr(target_addr), m_edge(edge) { }
56 
57  private:
58  vertex_address m_source_addr;
59  vertex_address m_target_addr;
60  edge_data* m_edge;
61 };
62 
63 typedef std::function<void(fast_edge_scope&)> fast_triple_apply_fn_type;
64 
65 /**
66  * A faster and simplified version of triple_apply.
67  *
68  * The "faster" assumption is based on that vertex data can be loaded entirey
69  * into memory and accessed by the apply function through addressing.
70  *
71  * The interface made it possible for vertex data to stay in memory *across*
72  * multiple triple applies before commiting to the disk.
73  *
74  * Main interface difference:
75  * 1. Vertex data are provided as vertex address, allowing user to specify
76  * their own vertex data storage.
77  * 2. Allowing user to explicitly specify which edge fields are required
78  * to read and mutate.
79  * 3. Vertex locking is ommited for simplification. (we can add it later if needed).
80  *
81  * \param g The target graph to perform the transformation.
82  * \param apply_fn The user defined function that will be applied on each edge scope.
83  * \param vertex_fields A subset of vertex data columns that the apply_fn will access.
84  * \param mutated_vertex_fields A subset of columns in \ref vertex_fields that the apply_fn will modify.
85  */
87  fast_triple_apply_fn_type apply_fn,
88  const std::vector<std::string>& edge_fields,
89  const std::vector<std::string>& mutated_edge_fields);
90 
91 
92 /**
93  * Utility function
94  */
95 template<typename T>
96 std::vector<std::vector<T>> create_vertex_data(const sgraph& g) {
97  std::vector<std::vector<T>> ret(g.get_num_partitions());
98  for (size_t i = 0; i < g.get_num_partitions(); ++i) {
99  ret[i] = std::vector<T>(g.vertex_partition(i).size());
100  }
101  return ret;
102 }
103 
104 template<typename T>
105 std::vector<std::vector<T>> create_vertex_data_from_const(const sgraph& g, const T& init) {
106  std::vector<std::vector<T>> ret(g.get_num_partitions());
107  for (size_t i = 0; i < g.get_num_partitions(); ++i) {
108  ret[i] = std::vector<T>(g.vertex_partition(i).size(), init);
109  }
110  return ret;
111 }
112 
113 
114 }
115 
116 /// \}
117 }
118 
119 #endif
fast_edge_scope(const vertex_address &source_addr, const vertex_address &target_addr, edge_data *edge)
Do not construct edge_scope directly. Used by triple_apply_impl.
size_t size() const
Definition: sframe.hpp:354
size_t get_num_partitions() const
Definition: sgraph.hpp:492
edge_data & edge()
Provide edge data access.
void fast_triple_apply(sgraph &g, fast_triple_apply_fn_type apply_fn, const std::vector< std::string > &edge_fields, const std::vector< std::string > &mutated_edge_fields)
std::vector< std::vector< T > > create_vertex_data(const sgraph &g)
sframe & vertex_partition(size_t partition, size_t groupid=0)
Definition: sgraph.hpp:330