Turi Create  4.0
turi::sgraph Class Reference

#include <core/storage/sgraph_data/sgraph.hpp>

Public Member Functions

sframe get_vertices (const std::vector< flexible_type > &vid_vec={}, const options_map_t &field_constraint=options_map_t(), size_t groupid=0) const
 
sframe get_edges (const std::vector< flexible_type > &source_vids={}, const std::vector< flexible_type > &target_vids={}, const options_map_t &field_constraint=options_map_t(), size_t groupa=0, size_t groupb=0) const
 
std::vector< std::string > get_vertex_fields (size_t groupid=0) const
 
std::vector< flex_type_enumget_vertex_field_types (size_t groupid=0) const
 
std::vector< std::string > get_edge_fields (size_t groupa=0, size_t groupb=0) const
 
std::vector< flex_type_enumget_edge_field_types (size_t groupa=0, size_t groupb=0) const
 
bool add_vertices (const dataframe_t &vertices, const std::string &id_field_name, size_t group=0)
 
bool add_vertices (sframe vertices, const std::string &id_field_name, size_t group=0)
 
bool add_edges (const dataframe_t &edges, const std::string &source_field_name, const std::string &target_field_name, size_t groupa=0, size_t groupb=0)
 
bool add_edges (sframe edges, const std::string &source_field_name, const std::string &target_field_name, size_t groupa=0, size_t groupb=0)
 
bool copy_vertex_field (const std::string &field, const std::string &new_field, size_t group=0)
 
bool copy_edge_field (const std::string &field, const std::string &new_field, size_t groupa=0, size_t groupb=0)
 
bool init_vertex_field (const std::string &field, const flexible_type &init_value, size_t group=0)
 
bool remove_vertex_field (const std::string &field, size_t group=0)
 
bool select_vertex_fields (const std::vector< std::string > &fields, size_t group=0)
 
bool remove_edge_field (const std::string &field, size_t groupa=0, size_t groupb=0)
 
bool init_edge_field (const std::string &field, const flexible_type &init_value, size_t groupa=0, size_t groupb=0)
 
bool select_edge_fields (const std::vector< std::string > &fields, size_t groupa=0, size_t groupb=0)
 
bool clear ()
 
std::vector< sframe > & vertex_group (size_t groupid=0)
 
const std::vector< sframe > & vertex_group (size_t groupid=0) const
 
std::vector< sframe > & edge_group (size_t groupa=0, size_t groupb=0)
 
const std::vector< sframe > & edge_group (size_t groupa, size_t groupb) const
 
sframevertex_partition (size_t partition, size_t groupid=0)
 
const sframevertex_partition (size_t partition, size_t groupid=0) const
 
sframevertex_partition (vertex_partition_address part)
 
const sframevertex_partition (vertex_partition_address part) const
 
sframeedge_partition (size_t partition1, size_t partition2, size_t groupa=0, size_t groupb=0)
 
const sframeedge_partition (size_t partition1, size_t partition2, size_t groupa=0, size_t groupb=0) const
 
sframeedge_partition (edge_partition_address address)
 
const sframeedge_partition (edge_partition_address address) const
 
std::string get_vertex_group_name (size_t idx) const
 
size_t get_vertex_group_id (std::string name) const
 
size_t num_edges (size_t groupa, size_t groupb) const
 
size_t num_edges () const
 
size_t num_vertices (size_t group) const
 
size_t num_vertices () const
 
bool empty () const
 
size_t get_num_partitions () const
 
size_t get_num_groups () const
 
void save (oarchive &oarc) const
 
void save_reference (oarchive &oarc) const
 
void load (iarchive &iarc)
 
bool replace_vertex_field (const std::vector< std::shared_ptr< sarray< flexible_type >>> &column, std::string column_name, size_t group=0)
 
template<typename T , typename FLEX_TYPE = T>
bool replace_vertex_field (std::vector< std::vector< T >> &column, std::string column_name, size_t group=0)
 
bool replace_edge_field (const std::vector< std::shared_ptr< sarray< flexible_type >>> &column, std::string column_name, size_t groupa=0, size_t groupb=0)
 
bool add_vertex_field (const std::vector< std::shared_ptr< sarray< flexible_type >>> &column, std::string column_name, size_t group=0)
 
template<typename T , typename FLEX_TYPE = T>
bool add_vertex_field (std::vector< std::vector< T >> &column, std::string column_name, flex_type_enum column_type, size_t group=0)
 
bool add_edge_field (const std::vector< std::shared_ptr< sarray< flexible_type >>> &column, std::string column_name, size_t groupa=0, size_t groupb=0)
 
std::vector< std::shared_ptr< sarray< flexible_type > > > fetch_vertex_data_field (std::string column_name, size_t group=0) const
 
std::vector< std::vector< flexible_type > > fetch_vertex_data_field_in_memory (std::string column_name, size_t groupid=0) const
 
std::vector< std::shared_ptr< sarray< flexible_type > > > fetch_edge_data_field (std::string column_name, size_t groupa=0, size_t groupb=0) const
 
size_t get_vertex_field_id (std::string column_name, size_t group=0) const
 
size_t get_edge_field_id (std::string column_name, size_t groupa=0, size_t groupb=0)
 

Detailed Description

An On disk representation of a graph.

The actual on disk representation looks like the following:

Where the partition size (partition_size) is 4, we shuffle all the vertices into 4 SFrames, each of 1 segment. The shuffling is performed by simply hashing the vertex ID into one of the buckets.

The edges however, are placed into 4*4=16 SFrames, each of 1 segment. Each edge (src,dst) is placed into the (hash(src) % 4) * 4 + hash(dst) % 4. Essentially the Edge SFrame can be thought of as cutting the adjacency matrix into a 4x4 grid.

The result is that the edges in the block (0,0) is adjacent only to the vertices in the first block (0), the edges in block (0,1) is adjacent only to the vertices in block 0 and 1 and so on.

*
* Vertices                 Edges
*  +---+      +-------+-------+-------+-------+
*  |   |      |       |       |       |       |
*  | 0 |      | (0,0) | (0,1) | (0,2) | (0,3) |
*  +---+      +-------+-------+-------+-------+
*  |   |      |       |       |       |       |
*  | 1 |      | (1,0) | (1,1) | (1,2) | (1,3) |
*  +---+      +-------+-------+-------+-------+
*  |   |      |       |       |       |       |
*  | 2 |      | (2,0) | (2,1) | (2,2) | (2,3) |
*  +---+      +-------+-------+-------+-------+
*  |   |      |       |       |       |       |
*  | 3 |      | (3,0) | (3,1) | (3,2) | (3,3) |
*  +---+      +-------+-------+-------+-------+
*
* 

Vertices are partitioned into user-defined semantic groups. Each vertex can only show up in one group, and each vertex is uniquely identified by the combination of the group ID and the Vertex ID. The vertex ID type MUST be consistent and identical across all groups.

Vertex grouping is implemented by having multiple of the vertex blocks, one for each group. Thus m_vertex_groups[0] contains a vector of SFrames for vertex group 0 and so on.

Edges are not grouped and they may span any collection of vertices. However, to be able to efficiently slice vertices and edges across groups, there are g*g edges groups, where m_edge_groups[{a,b}] contain all the edges between group a and group b.

Definition at line 74 of file sgraph.hpp.

Member Function Documentation

◆ add_edge_field()

bool turi::sgraph::add_edge_field ( const std::vector< std::shared_ptr< sarray< flexible_type >>> &  column,
std::string  column_name,
size_t  groupa = 0,
size_t  groupb = 0 
)

Add a particular edge in all partitions of a particular group of edges. The column must not exist. Returns true on success.

◆ add_edges() [1/2]

bool turi::sgraph::add_edges ( const dataframe_t edges,
const std::string &  source_field_name,
const std::string &  target_field_name,
size_t  groupa = 0,
size_t  groupb = 0 
)

Adds edges to the graph.

Note: The dataframe must contain the {source, target}_field_name

◆ add_edges() [2/2]

bool turi::sgraph::add_edges ( sframe  edges,
const std::string &  source_field_name,
const std::string &  target_field_name,
size_t  groupa = 0,
size_t  groupb = 0 
)

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

◆ add_vertex_field()

bool turi::sgraph::add_vertex_field ( const std::vector< std::shared_ptr< sarray< flexible_type >>> &  column,
std::string  column_name,
size_t  group = 0 
)

Add a particular column in all partitions of a particular group of vertices. The column must not exist. Returns true on success.

◆ add_vertices() [1/2]

bool turi::sgraph::add_vertices ( const dataframe_t vertices,
const std::string &  id_field_name,
size_t  group = 0 
)

Adds vertices to the graph.

Note: The dataframe must contain the id_field_name

◆ add_vertices() [2/2]

bool turi::sgraph::add_vertices ( sframe  vertices,
const std::string &  id_field_name,
size_t  group = 0 
)

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

◆ clear()

bool turi::sgraph::clear ( )

Resets the graph

◆ copy_edge_field()

bool turi::sgraph::copy_edge_field ( const std::string &  field,
const std::string &  new_field,
size_t  groupa = 0,
size_t  groupb = 0 
)

Similar to copy_vertex_field but work on edge data. If the new_field already exists, it will be replaced.

◆ copy_vertex_field()

bool turi::sgraph::copy_vertex_field ( const std::string &  field,
const std::string &  new_field,
size_t  group = 0 
)

Copies data from "field" to a new field with name "new_field" for a vertex group. If the new_field already exists, it will be replaced.

◆ edge_group() [1/2]

std::vector<sframe>& turi::sgraph::edge_group ( size_t  groupa = 0,
size_t  groupb = 0 
)
inline

Returns the collection of SFrames containing all the edges between vertex group groupa and vertex group groupb.

This function can be used as the left hand side of an assignment. i.e.

The caller must guarantee that blah is of the right size. (i.e. blah.size() == get_num_partitions() * get_num_partitions() )

Definition at line 301 of file sgraph.hpp.

◆ edge_group() [2/2]

const std::vector<sframe>& turi::sgraph::edge_group ( size_t  groupa,
size_t  groupb 
) const
inline

Returns the collection of SFrames containing all the edges between vertex group groupa and vertex group groupb.

Definition at line 312 of file sgraph.hpp.

◆ edge_partition() [1/4]

sframe& turi::sgraph::edge_partition ( size_t  partition1,
size_t  partition2,
size_t  groupa = 0,
size_t  groupb = 0 
)
inline

Returns the SFrame containing all edges in the partition (partition1, partition2) between vertex group groupa and vertex group groupb.

This function can be used as the left hand side of an assignment. i.e.

edge_partition(part1, part2) = sframe

Definition at line 381 of file sgraph.hpp.

◆ edge_partition() [2/4]

const sframe& turi::sgraph::edge_partition ( size_t  partition1,
size_t  partition2,
size_t  groupa = 0,
size_t  groupb = 0 
) const
inline

Returns the SFrame containing all edges in the partition (partition1, partition2) between vertex group groupa and vertex group groupb.

Definition at line 395 of file sgraph.hpp.

◆ edge_partition() [3/4]

sframe& turi::sgraph::edge_partition ( edge_partition_address  address)
inline

Returns the SFrame containing all edges in the partition (partition1, partition2) between vertex group groupa and vertex group groupb.

This function can be used as the left hand side of an assignment. i.e.

edge_partition(part1, part2) = sframe

Definition at line 415 of file sgraph.hpp.

◆ edge_partition() [4/4]

const sframe& turi::sgraph::edge_partition ( edge_partition_address  address) const
inline

Returns the SFrame containing all edges in the partition (partition1, partition2) between vertex group groupa and vertex group groupb.

Definition at line 425 of file sgraph.hpp.

◆ empty()

bool turi::sgraph::empty ( ) const
inline

Returns true if the graph is empty.

Definition at line 487 of file sgraph.hpp.

◆ fetch_edge_data_field()

std::vector<std::shared_ptr<sarray<flexible_type> > > turi::sgraph::fetch_edge_data_field ( std::string  column_name,
size_t  groupa = 0,
size_t  groupb = 0 
) const

Extracts the data for a particular field of a group of edges. The column must exist. Assertion failure otherwise.

◆ fetch_vertex_data_field()

std::vector<std::shared_ptr<sarray<flexible_type> > > turi::sgraph::fetch_vertex_data_field ( std::string  column_name,
size_t  group = 0 
) const

Extracts the data for a particular field of a group of vertices. The column must exist. Assertion failure otherwise.

◆ fetch_vertex_data_field_in_memory()

std::vector<std::vector<flexible_type> > turi::sgraph::fetch_vertex_data_field_in_memory ( std::string  column_name,
size_t  groupid = 0 
) const

Same as fetch_vertex_data_field, but store all values in memory and return std::vector<std::vector<flexible_type>> The column must exist. Assertion failure otherwise.

◆ get_edge_field_id()

size_t turi::sgraph::get_edge_field_id ( std::string  column_name,
size_t  groupa = 0,
size_t  groupb = 0 
)

Gets the offset of the edge field. Throws an exception on failure.

◆ get_edge_field_types()

std::vector<flex_type_enum> turi::sgraph::get_edge_field_types ( size_t  groupa = 0,
size_t  groupb = 0 
) const

Returns a list of field types for given edge group in the graph.

◆ get_edge_fields()

std::vector<std::string> turi::sgraph::get_edge_fields ( size_t  groupa = 0,
size_t  groupb = 0 
) const

Returns a list of fields for given edge group in the graph.

◆ get_edges()

sframe turi::sgraph::get_edges ( const std::vector< flexible_type > &  source_vids = {},
const std::vector< flexible_type > &  target_vids = {},
const options_map_t &  field_constraint = options_map_t(),
size_t  groupa = 0,
size_t  groupb = 0 
) const

Returns a dataframe of vertices satisfying the id and field constraints. source_vids may contain UNDEFINED as wildcards and target_vids may contain UNDEFINED as wildcards. Each edge will only be represented once in the output.

If source_vids and target_vids are empty, a universal "UNDEFINED-->UNDEFINED" query is assumed

◆ get_num_groups()

size_t turi::sgraph::get_num_groups ( ) const
inline

Returns the number of vertex groups

Definition at line 499 of file sgraph.hpp.

◆ get_num_partitions()

size_t turi::sgraph::get_num_partitions ( ) const
inline

Returns the number of vertex partitions

Definition at line 492 of file sgraph.hpp.

◆ get_vertex_field_id()

size_t turi::sgraph::get_vertex_field_id ( std::string  column_name,
size_t  group = 0 
) const

Gets the offset of the vertex field. Throws an exception on failure.

◆ get_vertex_field_types()

std::vector<flex_type_enum> turi::sgraph::get_vertex_field_types ( size_t  groupid = 0) const

Returns a list of field types for given vertex group in the graph.

◆ get_vertex_fields()

std::vector<std::string> turi::sgraph::get_vertex_fields ( size_t  groupid = 0) const

Returns a list of fields for given vertex group in the graph.

◆ get_vertex_group_id()

size_t turi::sgraph::get_vertex_group_id ( std::string  name) const
inline

Returns the id of the vertex group given the group name. Returns (size_t)(-1) on failure.

Definition at line 444 of file sgraph.hpp.

◆ get_vertex_group_name()

std::string turi::sgraph::get_vertex_group_name ( size_t  idx) const
inline

Returns the name of the vertex group given the group id. Assertion failure if the group does not exist.

Definition at line 435 of file sgraph.hpp.

◆ get_vertices()

sframe turi::sgraph::get_vertices ( const std::vector< flexible_type > &  vid_vec = {},
const options_map_t &  field_constraint = options_map_t(),
size_t  groupid = 0 
) const

Returns a sframe of vertices satisfying the id and field constraints.

◆ init_edge_field()

bool turi::sgraph::init_edge_field ( const std::string &  field,
const flexible_type init_value,
size_t  groupa = 0,
size_t  groupb = 0 
)

Initialize an edge field of group with const value. Creates a new column if the field does not exist.

◆ init_vertex_field()

bool turi::sgraph::init_vertex_field ( const std::string &  field,
const flexible_type init_value,
size_t  group = 0 
)

Initialize a vertex field of group with const value. Creates a new column if the field does not exist.

◆ load()

void turi::sgraph::load ( iarchive iarc)

Load from a directory oarchive.

◆ num_edges() [1/2]

size_t turi::sgraph::num_edges ( size_t  groupa,
size_t  groupb 
) const
inline

Returns number of edges from groupa to groupb.

Definition at line 453 of file sgraph.hpp.

◆ num_edges() [2/2]

size_t turi::sgraph::num_edges ( ) const
inline

Returns the total number of edges.

Definition at line 465 of file sgraph.hpp.

◆ num_vertices() [1/2]

size_t turi::sgraph::num_vertices ( size_t  group) const
inline

Returns the number of vertices in the group.

Definition at line 470 of file sgraph.hpp.

◆ num_vertices() [2/2]

size_t turi::sgraph::num_vertices ( ) const
inline

Returns the total number of vertices.

Definition at line 482 of file sgraph.hpp.

◆ remove_edge_field()

bool turi::sgraph::remove_edge_field ( const std::string &  field,
size_t  groupa = 0,
size_t  groupb = 0 
)

Deletes a field from edge data. Returns true on success. False on failure.

◆ remove_vertex_field()

bool turi::sgraph::remove_vertex_field ( const std::string &  field,
size_t  group = 0 
)

Deletes a field from vertex data. Returns true on success. False on failure.

◆ replace_edge_field()

bool turi::sgraph::replace_edge_field ( const std::vector< std::shared_ptr< sarray< flexible_type >>> &  column,
std::string  column_name,
size_t  groupa = 0,
size_t  groupb = 0 
)

Replaces a particular edge in all partitions of a particular group of edges. The column must exist. Returns true on success.

◆ replace_vertex_field()

bool turi::sgraph::replace_vertex_field ( const std::vector< std::shared_ptr< sarray< flexible_type >>> &  column,
std::string  column_name,
size_t  group = 0 
)

Replaces a particular column in all partitions of a particular group of vertices. The column must exist. Returns true on success.

◆ save()

void turi::sgraph::save ( oarchive oarc) const

Save to a directory oarchive.

◆ save_reference()

void turi::sgraph::save_reference ( oarchive oarc) const

Save to directory oarchive using sframe save reference.

See also
sframe_save_weak_reference

◆ select_edge_fields()

bool turi::sgraph::select_edge_fields ( const std::vector< std::string > &  fields,
size_t  groupa = 0,
size_t  groupb = 0 
)

Subselect fields in the edge sframe.

◆ select_vertex_fields()

bool turi::sgraph::select_vertex_fields ( const std::vector< std::string > &  fields,
size_t  group = 0 
)

Subselect fields in the vertex sframe.

◆ vertex_group() [1/2]

std::vector<sframe>& turi::sgraph::vertex_group ( size_t  groupid = 0)
inline

Returns the collection of SFrames containing all the vertices in group groupid.

This function can be used as the left hand side of an assignment. i.e.

The caller must guarantee that blah is of the right size. (i.e. blah.size() == get_num_partitions() )

Definition at line 274 of file sgraph.hpp.

◆ vertex_group() [2/2]

const std::vector<sframe>& turi::sgraph::vertex_group ( size_t  groupid = 0) const
inline

Returns the collection of SFrames containing all the vertices in group groupid

Definition at line 283 of file sgraph.hpp.

◆ vertex_partition() [1/4]

sframe& turi::sgraph::vertex_partition ( size_t  partition,
size_t  groupid = 0 
)
inline

Returns the SFrame containing all the vertices in a given partition of a group groupid

This function can be used as the left hand side of an assignment. i.e.

vertex_partition(part, group) = sframe

Definition at line 330 of file sgraph.hpp.

◆ vertex_partition() [2/4]

const sframe& turi::sgraph::vertex_partition ( size_t  partition,
size_t  groupid = 0 
) const
inline

Returns the SFrame containing all the vertices in a given partition of a group groupid

Definition at line 341 of file sgraph.hpp.

◆ vertex_partition() [3/4]

sframe& turi::sgraph::vertex_partition ( vertex_partition_address  part)
inline

Returns the SFrame containing all the vertices in a given partition of a group groupid

This function can be used as the left hand side of an assignment. i.e.

vertex_partition(part, group) = sframe

Definition at line 358 of file sgraph.hpp.

◆ vertex_partition() [4/4]

const sframe& turi::sgraph::vertex_partition ( vertex_partition_address  part) const
inline

Returns the SFrame containing all the vertices in a given partition of a group groupid

Definition at line 367 of file sgraph.hpp.


The documentation for this class was generated from the following file: