Turi Create
4.0
|
#include <core/storage/sgraph_data/sgraph.hpp>
Public Member Functions | |
sframe | get_vertices (const std::vector< flexible_type > &vid_vec={}, const options_map_t &field_constraint=options_map_t(), size_t groupid=0) const |
sframe | get_edges (const std::vector< flexible_type > &source_vids={}, const std::vector< flexible_type > &target_vids={}, const options_map_t &field_constraint=options_map_t(), size_t groupa=0, size_t groupb=0) const |
std::vector< std::string > | get_vertex_fields (size_t groupid=0) const |
std::vector< flex_type_enum > | get_vertex_field_types (size_t groupid=0) const |
std::vector< std::string > | get_edge_fields (size_t groupa=0, size_t groupb=0) const |
std::vector< flex_type_enum > | get_edge_field_types (size_t groupa=0, size_t groupb=0) const |
bool | add_vertices (const dataframe_t &vertices, const std::string &id_field_name, size_t group=0) |
bool | add_vertices (sframe vertices, const std::string &id_field_name, size_t group=0) |
bool | add_edges (const dataframe_t &edges, const std::string &source_field_name, const std::string &target_field_name, size_t groupa=0, size_t groupb=0) |
bool | add_edges (sframe edges, const std::string &source_field_name, const std::string &target_field_name, size_t groupa=0, size_t groupb=0) |
bool | copy_vertex_field (const std::string &field, const std::string &new_field, size_t group=0) |
bool | copy_edge_field (const std::string &field, const std::string &new_field, size_t groupa=0, size_t groupb=0) |
bool | init_vertex_field (const std::string &field, const flexible_type &init_value, size_t group=0) |
bool | remove_vertex_field (const std::string &field, size_t group=0) |
bool | select_vertex_fields (const std::vector< std::string > &fields, size_t group=0) |
bool | remove_edge_field (const std::string &field, size_t groupa=0, size_t groupb=0) |
bool | init_edge_field (const std::string &field, const flexible_type &init_value, size_t groupa=0, size_t groupb=0) |
bool | select_edge_fields (const std::vector< std::string > &fields, size_t groupa=0, size_t groupb=0) |
bool | clear () |
std::vector< sframe > & | vertex_group (size_t groupid=0) |
const std::vector< sframe > & | vertex_group (size_t groupid=0) const |
std::vector< sframe > & | edge_group (size_t groupa=0, size_t groupb=0) |
const std::vector< sframe > & | edge_group (size_t groupa, size_t groupb) const |
sframe & | vertex_partition (size_t partition, size_t groupid=0) |
const sframe & | vertex_partition (size_t partition, size_t groupid=0) const |
sframe & | vertex_partition (vertex_partition_address part) |
const sframe & | vertex_partition (vertex_partition_address part) const |
sframe & | edge_partition (size_t partition1, size_t partition2, size_t groupa=0, size_t groupb=0) |
const sframe & | edge_partition (size_t partition1, size_t partition2, size_t groupa=0, size_t groupb=0) const |
sframe & | edge_partition (edge_partition_address address) |
const sframe & | edge_partition (edge_partition_address address) const |
std::string | get_vertex_group_name (size_t idx) const |
size_t | get_vertex_group_id (std::string name) const |
size_t | num_edges (size_t groupa, size_t groupb) const |
size_t | num_edges () const |
size_t | num_vertices (size_t group) const |
size_t | num_vertices () const |
bool | empty () const |
size_t | get_num_partitions () const |
size_t | get_num_groups () const |
void | save (oarchive &oarc) const |
void | save_reference (oarchive &oarc) const |
void | load (iarchive &iarc) |
bool | replace_vertex_field (const std::vector< std::shared_ptr< sarray< flexible_type >>> &column, std::string column_name, size_t group=0) |
template<typename T , typename FLEX_TYPE = T> | |
bool | replace_vertex_field (std::vector< std::vector< T >> &column, std::string column_name, size_t group=0) |
bool | replace_edge_field (const std::vector< std::shared_ptr< sarray< flexible_type >>> &column, std::string column_name, size_t groupa=0, size_t groupb=0) |
bool | add_vertex_field (const std::vector< std::shared_ptr< sarray< flexible_type >>> &column, std::string column_name, size_t group=0) |
template<typename T , typename FLEX_TYPE = T> | |
bool | add_vertex_field (std::vector< std::vector< T >> &column, std::string column_name, flex_type_enum column_type, size_t group=0) |
bool | add_edge_field (const std::vector< std::shared_ptr< sarray< flexible_type >>> &column, std::string column_name, size_t groupa=0, size_t groupb=0) |
std::vector< std::shared_ptr< sarray< flexible_type > > > | fetch_vertex_data_field (std::string column_name, size_t group=0) const |
std::vector< std::vector< flexible_type > > | fetch_vertex_data_field_in_memory (std::string column_name, size_t groupid=0) const |
std::vector< std::shared_ptr< sarray< flexible_type > > > | fetch_edge_data_field (std::string column_name, size_t groupa=0, size_t groupb=0) const |
size_t | get_vertex_field_id (std::string column_name, size_t group=0) const |
size_t | get_edge_field_id (std::string column_name, size_t groupa=0, size_t groupb=0) |
An On disk representation of a graph.
The actual on disk representation looks like the following:
Where the partition size (partition_size) is 4, we shuffle all the vertices into 4 SFrames, each of 1 segment. The shuffling is performed by simply hashing the vertex ID into one of the buckets.
The edges however, are placed into 4*4=16 SFrames, each of 1 segment. Each edge (src,dst) is placed into the (hash(src) % 4) * 4 + hash(dst) % 4. Essentially the Edge SFrame can be thought of as cutting the adjacency matrix into a 4x4 grid.
The result is that the edges in the block (0,0) is adjacent only to the vertices in the first block (0), the edges in block (0,1) is adjacent only to the vertices in block 0 and 1 and so on.
* * Vertices Edges * +---+ +-------+-------+-------+-------+ * | | | | | | | * | 0 | | (0,0) | (0,1) | (0,2) | (0,3) | * +---+ +-------+-------+-------+-------+ * | | | | | | | * | 1 | | (1,0) | (1,1) | (1,2) | (1,3) | * +---+ +-------+-------+-------+-------+ * | | | | | | | * | 2 | | (2,0) | (2,1) | (2,2) | (2,3) | * +---+ +-------+-------+-------+-------+ * | | | | | | | * | 3 | | (3,0) | (3,1) | (3,2) | (3,3) | * +---+ +-------+-------+-------+-------+ * *
Vertices are partitioned into user-defined semantic groups. Each vertex can only show up in one group, and each vertex is uniquely identified by the combination of the group ID and the Vertex ID. The vertex ID type MUST be consistent and identical across all groups.
Vertex grouping is implemented by having multiple of the vertex blocks, one for each group. Thus m_vertex_groups[0] contains a vector of SFrames for vertex group 0 and so on.
Edges are not grouped and they may span any collection of vertices. However, to be able to efficiently slice vertices and edges across groups, there are g*g edges groups, where m_edge_groups[{a,b}] contain all the edges between group a and group b.
Definition at line 74 of file sgraph.hpp.
bool turi::sgraph::add_edge_field | ( | const std::vector< std::shared_ptr< sarray< flexible_type >>> & | column, |
std::string | column_name, | ||
size_t | groupa = 0 , |
||
size_t | groupb = 0 |
||
) |
Add a particular edge in all partitions of a particular group of edges. The column must not exist. Returns true on success.
bool turi::sgraph::add_edges | ( | const dataframe_t & | edges, |
const std::string & | source_field_name, | ||
const std::string & | target_field_name, | ||
size_t | groupa = 0 , |
||
size_t | groupb = 0 |
||
) |
Adds edges to the graph.
Note: The dataframe must contain the {source, target}_field_name
bool turi::sgraph::add_edges | ( | sframe | edges, |
const std::string & | source_field_name, | ||
const std::string & | target_field_name, | ||
size_t | groupa = 0 , |
||
size_t | groupb = 0 |
||
) |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
bool turi::sgraph::add_vertex_field | ( | const std::vector< std::shared_ptr< sarray< flexible_type >>> & | column, |
std::string | column_name, | ||
size_t | group = 0 |
||
) |
Add a particular column in all partitions of a particular group of vertices. The column must not exist. Returns true on success.
bool turi::sgraph::add_vertices | ( | const dataframe_t & | vertices, |
const std::string & | id_field_name, | ||
size_t | group = 0 |
||
) |
Adds vertices to the graph.
Note: The dataframe must contain the id_field_name
bool turi::sgraph::add_vertices | ( | sframe | vertices, |
const std::string & | id_field_name, | ||
size_t | group = 0 |
||
) |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
bool turi::sgraph::clear | ( | ) |
Resets the graph
bool turi::sgraph::copy_edge_field | ( | const std::string & | field, |
const std::string & | new_field, | ||
size_t | groupa = 0 , |
||
size_t | groupb = 0 |
||
) |
Similar to copy_vertex_field but work on edge data. If the new_field already exists, it will be replaced.
bool turi::sgraph::copy_vertex_field | ( | const std::string & | field, |
const std::string & | new_field, | ||
size_t | group = 0 |
||
) |
Copies data from "field" to a new field with name "new_field" for a vertex group. If the new_field already exists, it will be replaced.
|
inline |
Returns the collection of SFrames containing all the edges between vertex group groupa and vertex group groupb.
This function can be used as the left hand side of an assignment. i.e.
The caller must guarantee that blah is of the right size. (i.e. blah.size() == get_num_partitions() * get_num_partitions() )
Definition at line 301 of file sgraph.hpp.
|
inline |
Returns the collection of SFrames containing all the edges between vertex group groupa and vertex group groupb.
Definition at line 312 of file sgraph.hpp.
|
inline |
Returns the SFrame containing all edges in the partition (partition1, partition2) between vertex group groupa and vertex group groupb.
This function can be used as the left hand side of an assignment. i.e.
Definition at line 381 of file sgraph.hpp.
|
inline |
Returns the SFrame containing all edges in the partition (partition1, partition2) between vertex group groupa and vertex group groupb.
Definition at line 395 of file sgraph.hpp.
|
inline |
Returns the SFrame containing all edges in the partition (partition1, partition2) between vertex group groupa and vertex group groupb.
This function can be used as the left hand side of an assignment. i.e.
Definition at line 415 of file sgraph.hpp.
|
inline |
Returns the SFrame containing all edges in the partition (partition1, partition2) between vertex group groupa and vertex group groupb.
Definition at line 425 of file sgraph.hpp.
|
inline |
Returns true if the graph is empty.
Definition at line 487 of file sgraph.hpp.
std::vector<std::shared_ptr<sarray<flexible_type> > > turi::sgraph::fetch_edge_data_field | ( | std::string | column_name, |
size_t | groupa = 0 , |
||
size_t | groupb = 0 |
||
) | const |
Extracts the data for a particular field of a group of edges. The column must exist. Assertion failure otherwise.
std::vector<std::shared_ptr<sarray<flexible_type> > > turi::sgraph::fetch_vertex_data_field | ( | std::string | column_name, |
size_t | group = 0 |
||
) | const |
Extracts the data for a particular field of a group of vertices. The column must exist. Assertion failure otherwise.
std::vector<std::vector<flexible_type> > turi::sgraph::fetch_vertex_data_field_in_memory | ( | std::string | column_name, |
size_t | groupid = 0 |
||
) | const |
Same as fetch_vertex_data_field, but store all values in memory and return std::vector<std::vector<flexible_type>> The column must exist. Assertion failure otherwise.
size_t turi::sgraph::get_edge_field_id | ( | std::string | column_name, |
size_t | groupa = 0 , |
||
size_t | groupb = 0 |
||
) |
Gets the offset of the edge field. Throws an exception on failure.
std::vector<flex_type_enum> turi::sgraph::get_edge_field_types | ( | size_t | groupa = 0 , |
size_t | groupb = 0 |
||
) | const |
Returns a list of field types for given edge group in the graph.
std::vector<std::string> turi::sgraph::get_edge_fields | ( | size_t | groupa = 0 , |
size_t | groupb = 0 |
||
) | const |
Returns a list of fields for given edge group in the graph.
sframe turi::sgraph::get_edges | ( | const std::vector< flexible_type > & | source_vids = {} , |
const std::vector< flexible_type > & | target_vids = {} , |
||
const options_map_t & | field_constraint = options_map_t() , |
||
size_t | groupa = 0 , |
||
size_t | groupb = 0 |
||
) | const |
Returns a dataframe of vertices satisfying the id and field constraints. source_vids may contain UNDEFINED as wildcards and target_vids may contain UNDEFINED as wildcards. Each edge will only be represented once in the output.
If source_vids and target_vids are empty, a universal "UNDEFINED-->UNDEFINED" query is assumed
|
inline |
Returns the number of vertex groups
Definition at line 499 of file sgraph.hpp.
|
inline |
Returns the number of vertex partitions
Definition at line 492 of file sgraph.hpp.
size_t turi::sgraph::get_vertex_field_id | ( | std::string | column_name, |
size_t | group = 0 |
||
) | const |
Gets the offset of the vertex field. Throws an exception on failure.
std::vector<flex_type_enum> turi::sgraph::get_vertex_field_types | ( | size_t | groupid = 0 | ) | const |
Returns a list of field types for given vertex group in the graph.
std::vector<std::string> turi::sgraph::get_vertex_fields | ( | size_t | groupid = 0 | ) | const |
Returns a list of fields for given vertex group in the graph.
|
inline |
Returns the id of the vertex group given the group name. Returns (size_t)(-1) on failure.
Definition at line 444 of file sgraph.hpp.
|
inline |
Returns the name of the vertex group given the group id. Assertion failure if the group does not exist.
Definition at line 435 of file sgraph.hpp.
sframe turi::sgraph::get_vertices | ( | const std::vector< flexible_type > & | vid_vec = {} , |
const options_map_t & | field_constraint = options_map_t() , |
||
size_t | groupid = 0 |
||
) | const |
Returns a sframe of vertices satisfying the id and field constraints.
bool turi::sgraph::init_edge_field | ( | const std::string & | field, |
const flexible_type & | init_value, | ||
size_t | groupa = 0 , |
||
size_t | groupb = 0 |
||
) |
Initialize an edge field of group with const value. Creates a new column if the field does not exist.
bool turi::sgraph::init_vertex_field | ( | const std::string & | field, |
const flexible_type & | init_value, | ||
size_t | group = 0 |
||
) |
Initialize a vertex field of group with const value. Creates a new column if the field does not exist.
void turi::sgraph::load | ( | iarchive & | iarc | ) |
Load from a directory oarchive.
|
inline |
Returns number of edges from groupa to groupb.
Definition at line 453 of file sgraph.hpp.
|
inline |
Returns the total number of edges.
Definition at line 465 of file sgraph.hpp.
|
inline |
Returns the number of vertices in the group.
Definition at line 470 of file sgraph.hpp.
|
inline |
Returns the total number of vertices.
Definition at line 482 of file sgraph.hpp.
bool turi::sgraph::remove_edge_field | ( | const std::string & | field, |
size_t | groupa = 0 , |
||
size_t | groupb = 0 |
||
) |
Deletes a field from edge data. Returns true on success. False on failure.
bool turi::sgraph::remove_vertex_field | ( | const std::string & | field, |
size_t | group = 0 |
||
) |
Deletes a field from vertex data. Returns true on success. False on failure.
bool turi::sgraph::replace_edge_field | ( | const std::vector< std::shared_ptr< sarray< flexible_type >>> & | column, |
std::string | column_name, | ||
size_t | groupa = 0 , |
||
size_t | groupb = 0 |
||
) |
Replaces a particular edge in all partitions of a particular group of edges. The column must exist. Returns true on success.
bool turi::sgraph::replace_vertex_field | ( | const std::vector< std::shared_ptr< sarray< flexible_type >>> & | column, |
std::string | column_name, | ||
size_t | group = 0 |
||
) |
Replaces a particular column in all partitions of a particular group of vertices. The column must exist. Returns true on success.
void turi::sgraph::save | ( | oarchive & | oarc | ) | const |
Save to a directory oarchive.
void turi::sgraph::save_reference | ( | oarchive & | oarc | ) | const |
Save to directory oarchive using sframe save reference.
bool turi::sgraph::select_edge_fields | ( | const std::vector< std::string > & | fields, |
size_t | groupa = 0 , |
||
size_t | groupb = 0 |
||
) |
Subselect fields in the edge sframe.
bool turi::sgraph::select_vertex_fields | ( | const std::vector< std::string > & | fields, |
size_t | group = 0 |
||
) |
Subselect fields in the vertex sframe.
|
inline |
Returns the collection of SFrames containing all the vertices in group groupid.
This function can be used as the left hand side of an assignment. i.e.
The caller must guarantee that blah is of the right size. (i.e. blah.size() == get_num_partitions() )
Definition at line 274 of file sgraph.hpp.
|
inline |
Returns the collection of SFrames containing all the vertices in group groupid
Definition at line 283 of file sgraph.hpp.
|
inline |
Returns the SFrame containing all the vertices in a given partition of a group groupid
This function can be used as the left hand side of an assignment. i.e.
Definition at line 330 of file sgraph.hpp.
|
inline |
Returns the SFrame containing all the vertices in a given partition of a group groupid
Definition at line 341 of file sgraph.hpp.
|
inline |
Returns the SFrame containing all the vertices in a given partition of a group groupid
This function can be used as the left hand side of an assignment. i.e.
Definition at line 358 of file sgraph.hpp.
|
inline |
Returns the SFrame containing all the vertices in a given partition of a group groupid
Definition at line 367 of file sgraph.hpp.