Turi Create  4.0
turi::gl_sgraph Class Reference

#include <core/data/sframe/gl_sgraph.hpp>

Public Member Functions

 gl_sgraph (const gl_sframe &vertices, const gl_sframe &edges, const std::string &vid_field="__id", const std::string &src_field="__src_id", const std::string &dst_field="__dst_id")
 
 gl_sgraph (const std::string &directory)
 
gl_sframe get_edges (const std::vector< vid_pair > &ids=std::vector< vid_pair >(), const std::map< std::string, flexible_type > &fields=std::map< std::string, flexible_type >()) const
 
gl_sframe get_vertices (const std::vector< flexible_type > &ids=std::vector< flexible_type >(), const std::map< std::string, flexible_type > &fields=std::map< std::string, flexible_type >()) const
 
std::map< std::string, flexible_typesummary () const
 
size_t num_vertices () const
 
size_t num_edges () const
 
std::vector< std::string > get_fields () const
 
std::vector< std::string > get_vertex_fields () const
 
std::vector< std::string > get_edge_fields () const
 
std::vector< flex_type_enumget_vertex_field_types () const
 
std::vector< flex_type_enumget_edge_field_types () const
 
gl_sgraph add_vertices (const gl_sframe &vertices, const std::string &vid_field) const
 
gl_sgraph add_edges (const gl_sframe &edges, const std::string &src_field, const std::string &dst_field) const
 
gl_sgraph select_vertex_fields (const std::vector< std::string > &fields) const
 
gl_sgraph select_edge_fields (const std::vector< std::string > &fields) const
 
gl_sgraph select_fields (const std::vector< std::string > &fields) const
 
gl_gframe vertices ()
 
gl_gframe edges ()
 
gl_sgraph triple_apply (const lambda_triple_apply_fn &lambda, const std::vector< std::string > &mutated_fields) const
 
void save (const std::string &directory) const
 
void save_reference (const std::string &directory) const
 
void add_vertex_field (gl_sarray column_data, const std::string &field)
 
void add_vertex_field (const flexible_type &column_data, const std::string &field)
 
void remove_vertex_field (const std::string &field)
 
void rename_vertex_fields (const std::vector< std::string > &oldnames, const std::vector< std::string > &newnames)
 
void add_edge_field (gl_sarray column_data, const std::string &field)
 
void add_edge_field (const flexible_type &column_data, const std::string &field)
 
void remove_edge_field (const std::string &field)
 
void rename_edge_fields (const std::vector< std::string > &oldnames, const std::vector< std::string > &newnames)
 
virtual std::shared_ptr< unity_sgraphget_proxy () const
 

Detailed Description

A scalable graph data structure backed by persistent storage (gl_sframe).

The SGraph (gl_sgraph) data structure allows arbitrary dictionary attributes on vertices and edges, provides flexible vertex and edge query functions, and seamless transformation to and from SFrame.

Construction

There are several ways to create an SGraph. The simplest way is to make an empty graph then add vertices and edges with the add_vertices and add_edges methods.

gl_sframe vertices { {"vid", {1,2,3} };
gl_sframe edges { {"src", {1, 3}}, {"dst", {2, 2}} };
gl_sgraph g = gl_sgraph().add_vertices(vertices, "vid").add_edges(edges, "src", "dst");

Columns in the gl_sframes that are not used as id fields are assumed to be vertex or edge attributes.

gl_sgraph objects can also be created from vertex and edge lists stored in gl_sframe.

gl_sframe vertices { {"vid", {1,2,3}, {"vdata" : {"foo", "bar", "foobar"}} };
gl_sframe edges { {"src", {1, 3}}, {"dst", {2, 2}}, {"edata": {0., 0.}} };
gl_sgraph g = gl_sgraph(vertices, edges, "vid", "src", "dst");

Usage

The most convenient way to access vertex and edge data is through the vertices and edges. Both functions return a GFrame (gl_gframe) object. GFrame is like SFrame but is bound to the host SGraph, so that modification to GFrame is applied to SGraph, and vice versa.

For instance, the following code shows how to add/remove columns to/from the vertex data. The change is applied to SGraph.

// add a new edge attribute with const value.
g.edges().add_column("likes", 0);
// remove a vertex attribute.
g.vertices().remove_column("age");
// transforms one attribute to the other
g.vertices()["likes_fish"] = g.vertices()["__id"] == "cat";

You can also query for specific vertices and edges using the get_vertices and get_edges functionality.

For instance,

gl_sframe selected_edges = g.get_edges(
{ {0, UNDEFINED}, {UNDEFINED, 1}, {2, 3} },
{ {"likes_fish", 1} });

selects out going edges of 0, incoming edges of 1, edge 2->3, such that the edge attribute "like_fish" evaluates to 1.

In addition, you can perform other non-mutating gl_sframe operations like groupby, join, logical_filter in the same way, and the returned object will be gl_sframe.

In the case where you want to perform vertex-specified operations, such as "gather"/"scatter" over the neighborhood of each vertex, we provide triple_apply which is essentially a "parallel for" over (Vertex, Edge, Vertex) triplets.

For instance, the following code shows how to implement the update function for synchronous pagerank.

const double RESET_PROB = 0.15;
void pr_update(edge_triple& triple) {
triple.target["pagerank"] += triple.source["pagerank_prev"] / triple.source["out_degree"];
}
gl_sgraph pagerank(const gl_sgraph& g, size_t iters) {
// Count the out degree of each vertex into an gl_sframe.
gl_sframe out_degree = g.get_edges().groupby("__src_id", {{"out_degree", aggregate::COUNT()}});
// Add the computed "out_degree" to the graph as vertex attribute.
// We exploit that adding the same vertex will overwrite the vertex data.
gl_sgraph g2 = g.add_vertices(out_degree, "__src_id");
// Initialize pagerank value
g2.vertices()["pagerank"] = 0.0;
g2.vertices()["pagerank_prev"] = 1.0;
for (size_t i = 0; i < iters; ++i) {
g2.vertices()["pagerank"] = 0.0;
g2 = g2.triple_apply(pr_update, {"pagerank"});
g2.vertices()["pagerank"] = RESET_PROB + (1 - RESET_PROB) * g2.vertices()["pagerank"];
g2.vertices()["pagerank_prev"] = g2.vertices()["pagerank"];
}
g2.vertices().remove_column("pagerank_prev");
g2.vertices().remove_column("out_degree");
return g2;
}

Mutability

gl_sgraph is structurally immutable but data (or field) Mutable. You can add new vertices and edges, but the operation returns a new Ï

Example

Please checkout turicreate/sdk_example/sgraph_example.cpp for a concrete example.

Definition at line 148 of file gl_sgraph.hpp.

Constructor & Destructor Documentation

◆ gl_sgraph() [1/2]

turi::gl_sgraph::gl_sgraph ( const gl_sframe vertices,
const gl_sframe edges,
const std::string &  vid_field = "__id",
const std::string &  src_field = "__src_id",
const std::string &  dst_field = "__dst_id" 
)

Construct gl_sgraph with given vertex data and edge data.

Parameters
verticesVertex data. Must include an ID column with the name specified by "vid_field." Additional columns are treated as vertex attributes.
edgesEdge data. Must include source and destination ID columns as specified by "src_field" and "dst_field". Additional columns are treated as edge attributes.
vid_fieldOptional. The name of vertex ID column in the "vertices" gl_sframe.
src_fieldOptional. The name of source ID column in the "edges" gl_sframe.
dst_fieldOptional. The name of destination ID column in the "edges" gl_sframe.

Example

gl_sframe vertices { {"vid", {"cat", "dog", "fossa"}} };
gl_sframe edges { {"source", {"cat", "dog", "dog"}},
{"dest", {"dog", "cat", "foosa"}},
{"relationship", {"dislikes", "likes", "likes"}} };
gl_sgraph g = gl_sgraph(vertices, edges, "vid", "source", "dest");
std::cout << g.vertices() << std::endl;
std::cout << g.edges() << std::endl;
// This following code is equivalent.
// gl_sgraph g = gl_sgraph().add_vertices(vertices, "vid") \
// .add_edges(edges, "source", "dest");

Produces output:

vertices of the gl_sgraph
+-------+
| __id |
+-------+
| cat |
| dog |
| foosa |
+-------+
edges of the gl_sgraph
+----------+----------+--------------+
| __src_id | __dst_id | relationship |
+----------+----------+--------------+
| cat | dog | dislikes |
| dog | cat | likes |
| dog | foosa | likes |
+----------+----------+--------------+
See also
gl_sframe

◆ gl_sgraph() [2/2]

turi::gl_sgraph::gl_sgraph ( const std::string &  directory)
explicit

Constructs from a saved gl_sgraph.

See also
save

Member Function Documentation

◆ add_edge_field() [1/2]

void turi::gl_sgraph::add_edge_field ( gl_sarray  column_data,
const std::string &  field 
)

Add a new edge field with given field name and column_data. Using edges() is preferred.

Parameters
column_datagl_sarray of size equals to num_vertices. The order of column_data is aligned with the order which vertices are stored.
fieldname of the new edge field.

◆ add_edge_field() [2/2]

void turi::gl_sgraph::add_edge_field ( const flexible_type column_data,
const std::string &  field 
)

Add a new edge field filled with constant data. Using edges() is preferred.

Parameters
column_datathe constant data to fill the new field column.
fieldname of the new edge field.

◆ add_edges()

gl_sgraph turi::gl_sgraph::add_edges ( const gl_sframe edges,
const std::string &  src_field,
const std::string &  dst_field 
) const

Add edges to the gl_sgraph and return the new graph.

Input edges should be in the form of gl_sframe and "src_field" and "dst_field" specifies which two columns contain the id of source vertex IDs and target vertices. Remaining columns are assumed to hold additional vertex attributes. If these attributes are not already present in the graph's edge data, they are added, with existing edges acquiring the missing value FLEX_UNDEFINED.

Parameters
edgesEdge data. An gl_sframe whose "src_field" and "dst_field" columns contain the source and target vertex IDs. Additional columns are treated as edge attributes.
src_fieldOptional. Specifies the source id column in the edges gl_sframe.
dst_fieldOptional. Specifies the target id column in the edges gl_sframe.

Example:

gl_sframe edges { {"source", {"cat", "fish"}},
{"dest", {"fish", "cat"}},
{"relation", {"eat", "eaten"}} };
gl_sgraph g = gl_sgraph().add_edges(edges, "source", "dest");
gl_sgraph g2 = g.add_edges(gl_sframe { {"source", {"cat"}},
{"dest", {"fish"}},
{"relation", {"like"}} },
"source", "dest");
std::cout << g.get_edges() << std::endl;
std::cout << g2.get_edges() << std::endl;

Produces output:

edges of g
+----------+----------+----------+
| __src_id | __dst_id | relation |
+----------+----------+----------+
| cat | fish | eat |
| fish | cat | eaten |
+----------+----------+----------+
edges of g2
+----------+----------+----------+
| __src_id | __dst_id | relation |
+----------+----------+----------+
| cat | fish | eat |
| cat | fish | like |
| fish | cat | eaten |
+----------+----------+----------+
Note
The columns specified by "src_id" and "dst_id" will be renamed to "__src_id", and "__dst_id" respectively as the special edge attributes.
If an edge (identified by src and dst id) already exists in the graph, adding a new edge with the same src and dst will NOT overwrite the existing edge. The same edge is DUPLICATED.
If an edge contains new vertices, the new vertices will be automatically added to the graph with all attributes default to FLEX_UNDEFINED.
If adding edges to a non-empty graph, the types of the existing columns must be the same as those of the existing edges.
This function returns a new graph, and does not modify the current graph.
See also
gl_sframe
edges
get_edges
add_vertices

◆ add_vertex_field() [1/2]

void turi::gl_sgraph::add_vertex_field ( gl_sarray  column_data,
const std::string &  field 
)

Add a new vertex field with given field name and column_data. Using vertices() is preferred.

Parameters
column_datagl_sarray of size equals to num_vertices. The order of column_data is aligned with the order which vertices are stored.
fieldname of the new vertex field.

◆ add_vertex_field() [2/2]

void turi::gl_sgraph::add_vertex_field ( const flexible_type column_data,
const std::string &  field 
)

Add a new vertex field filled with constant data. Using vertices() is preferred.

Parameters
column_datathe constant data to fill the new field column.
fieldname of the new vertex field.

◆ add_vertices()

gl_sgraph turi::gl_sgraph::add_vertices ( const gl_sframe vertices,
const std::string &  vid_field 
) const

Add vertices to the gl_sgraph and return the new graph.

Input vertices should be in the form of gl_sframe and "vid_field" specifies which column contains the vertex ID. Remaining columns are assumed to hold additional vertex attributes. If these attributes are not already present in the graph's vertex data, they are added, with existing vertices acquiring the missing value FLEX_UNDEFINED.

Parameters
verticesVertex data. An gl_sframe whose "vid_field" column contains the vertex IDs. Additional columns are treated as vertex attributes.
vid_fieldOptional. Specifies the vertex id column in the vertices gl_sframe.

Example:

// Add three vertices to an empty graph
gl_sframe vertices { {"vid": {0, 1, 2}},
{"breed": {"labrador", "labrador", "vizsla"}} };
gl_sgraph g = gl_sgraph().add_vertices( vertices, "vid" );
// Overwrite existing vertex
gl_sgraph g2 = g.add_vertices ( gl_sframe { {"vid", {0}}, {"breed", "poodle"} },
"vid" );
// Add vertices with new attributes
gl_sgraph g3 = g2.add_vertices (gl_sframe { {"vid", {3}},
{"weight", "20 pounds"} },
"vid" );
std::cout << g.get_vertices() << std::endl;
std::cout << g2.get_vertices() << std::endl;
std::cout << g3.get_vertices() << std::endl;

Produces output:

vertices of g1
+------+----------+
| __id | breed |
+------+----------+
| 0 | labrador |
| 2 | vizsla |
| 1 | labrador |
+------+----------+
vertices of g2
+------+----------+
| __id | breed |
+------+----------+
| 0 | poodle |
| 2 | vizsla |
| 1 | labrador |
+------+----------+
vertices of g3
+------+----------+-----------+
| __id | breed | weight |
+------+----------+-----------+
| 0 | poodle | None |
| 2 | vizsla | None |
| 1 | labrador | None |
| 4 | None | 20 pounds |
+------+----------+-----------+
Note
The column specified by vid_field will be renamed to "__id" as the special vertex attribute.
If a vertex id already exists in the graph, adding a new vertex with the same id will overwrite the entire vertex attributes.
If adding vertices to a non-empty graph, the types of the existing columns must be the same as those of the existing vertices.
This function returns a new graph, and does not modify the current graph.
See also
gl_sframe
vertices
get_vertices
add_edges

◆ edges()

gl_gframe turi::gl_sgraph::edges ( )

Returns a convenient "SFrame like" handler for the edges in this gl_sgraph.

While a regular gl_sframe is independent of any gl_sgraph, a gl_gframe is bound (or points) to an gl_sgraph. Modifying fields of the returned gl_gframe changes the edge data of the gl_sgraph. Also, modifications to the fields in the gl_sgraph, will be reflected in the gl_gframe.

Example:

gl_sframe vertices { {"vid", {"cat", "dog", "fossa"}} };
gl_sframe edges { {"source", {"cat", "dog", "dog"}},
{"dest", {"dog", "cat", "foosa"}},
{"relationship", {"dislikes", "likes", "likes"}} };
gl_sgraph g = gl_sgraph(vertices, edges, "vid", "source", "dest");
// Add a new edge field "size":
g.edges()["size"] = { {"smaller than", "larger than", "equal to"} };
std::cout << g.edges() << std::endl;

Produces output:

Add a new edge field "size"
+----------+----------+--------------+--------------+
| __src_id | __dst_id | relationship | size |
+----------+----------+--------------+--------------+
| cat | dog | dislikes | smaller than |
| dog | cat | likes | larger than |
| dog | foosa | likes | equal to |
+----------+----------+--------------+--------------+
Note
To preserve the graph structure the "__src_id" and "__dst_id" column of this gl_sframe is read-only.
See also
vertices

◆ get_edge_field_types()

std::vector<flex_type_enum> turi::gl_sgraph::get_edge_field_types ( ) const

Return the types of edge fields in the graph.

◆ get_edge_fields()

std::vector<std::string> turi::gl_sgraph::get_edge_fields ( ) const

Return the names of edge fields in the graph.

◆ get_edges()

gl_sframe turi::gl_sgraph::get_edges ( const std::vector< vid_pair > &  ids = std::vector< vid_pair >(),
const std::map< std::string, flexible_type > &  fields = std::map< std::string, flexible_type >() 
) const

Return a collection of edges and their attributes.

This function is used to find edges by vertex IDs, filter on edge attributes, or list in-out * neighbors of vertex sets.

Parameters
idsOptional. Array of pairs of source and target vertices, each corresponding to an edge to fetch. Only edges in this list are returned. FLEX_UNDEFINED can be used to designate a wild card. For instance, {{1,3}, {2,FLEX_UNDEFINED}, {FLEX_UNDEFINED, 5}} will fetch the edge 1->3, all outgoing edges of 2 and all incoming edges of 5. ids may be left empty, which implies an array of all wild cards.
fieldsOptional. Dictionary specifying equality constraints on field values. For example, { {"relationship", "following"} }, returns only edges whose 'relationship' field equals 'following'. FLEX_UNDEFINED can be used as a value to designate a wild card. e.g. { {"relationship", FLEX_UNDEFINED} } will find all edges with the field 'relationship' regardless of the value.

Example:

gl_sframe edges{ {"__src_id", {0, 0, 1}},
{"__dst_id", {1, 2, 2}},
{"rating", {5, 2, FLEX_UNDEFINED}} };
gl_sgraph g = gl_sgraph().add_edges(edges);
std::cout << g.get_edges() << std::endl;
std::cout << g.get_edges({}, { {"rating": 5} }) << std::endl;
std::cout << g.get_edges({ {0, 1}, {1, 2} }) << std::endl;

Produces output:

Return all edges in the graph.
+----------+----------+--------+
| __src_id | __dst_id | rating |
+----------+----------+--------+
| 0 | 2 | 2 |
| 0 | 1 | 5 |
| 1 | 2 | None |
+----------+----------+--------+
Return edges with the attribute "rating" of 5.
+----------+----------+--------+
| __src_id | __dst_id | rating |
+----------+----------+--------+
| 0 | 1 | 5 |
+----------+----------+--------+
Return edges 0 --> 1 and 1 --> 2 (if present in the graph).
+----------+----------+--------+
| __src_id | __dst_id | rating |
+----------+----------+--------+
| 0 | 1 | 5 |
| 1 | 2 | None |
+----------+----------+--------+
See also
edges
get_vertices

◆ get_fields()

std::vector<std::string> turi::gl_sgraph::get_fields ( ) const

Return the names of both vertex fields and edge fields in the graph.

◆ get_proxy()

virtual std::shared_ptr<unity_sgraph> turi::gl_sgraph::get_proxy ( ) const
virtual

Retrieves a pointer to the underlying unity_sgraph

◆ get_vertex_field_types()

std::vector<flex_type_enum> turi::gl_sgraph::get_vertex_field_types ( ) const

Return the types of vertex fields in the graph.

◆ get_vertex_fields()

std::vector<std::string> turi::gl_sgraph::get_vertex_fields ( ) const

Return the names of vertex fields in the graph.

◆ get_vertices()

gl_sframe turi::gl_sgraph::get_vertices ( const std::vector< flexible_type > &  ids = std::vector< flexible_type >(),
const std::map< std::string, flexible_type > &  fields = std::map< std::string, flexible_type >() 
) const

Return a collection of vertices and their attributes.

Parameters
idsList of vertex IDs to retrieve. Only vertices in this list will be returned.
fieldsDictionary specifying equality constraint on field values. For example { {"gender", "M"} }, returns only vertices whose 'gender' field is 'M'. FLEX_UNDEFINED can be used to designate a wild card. For example, { {"relationship", FLEX_UNDEFINED } } will find all vertices with the field 'relationship' regardless of the value.

Example:

gl_sframe vertices { {"__id", {0, 1, 2}},
{"gender", {"M", "F", "F"}} };
g = gl_sgraph().add_vertices(vertices);
std::cout << g.get_vertices() << std::endl;
std::cout << g.get_vertices({0, 2}) << std::endl;
std::cout << g.get_vertices({}, { {"gender", "M"} }) << std::endl;

Produces output:

Return all vertices in the graph.
+------+--------+
| __id | gender |
+------+--------+
| 0 | M |
| 2 | F |
| 1 | F |
+------+--------+
Return vertices 0 and 2.
+------+--------+
| __id | gender |
+------+--------+
| 0 | M |
| 2 | F |
+------+--------+
Return vertices with the vertex attribute "gender" equal to "M".
+------+--------+
| __id | gender |
+------+--------+
| 0 | M |
+------+--------+
See also
vertices
get_edges

◆ num_edges()

size_t turi::gl_sgraph::num_edges ( ) const

Return the number of edges in the graph.

◆ num_vertices()

size_t turi::gl_sgraph::num_vertices ( ) const

Return the number of vertices in the graph.

◆ remove_edge_field()

void turi::gl_sgraph::remove_edge_field ( const std::string &  field)

Removes the edge field

Parameters
nameof the field to be removed

◆ remove_vertex_field()

void turi::gl_sgraph::remove_vertex_field ( const std::string &  field)

Removes the vertex field

Parameters
nameof the field to be removed

◆ rename_edge_fields()

void turi::gl_sgraph::rename_edge_fields ( const std::vector< std::string > &  oldnames,
const std::vector< std::string > &  newnames 
)

Renames the edge fields

Parameters
oldnameslist of names of the fields to be renamed
newnameslist of new names of the fields, aligned with oldnames.

◆ rename_vertex_fields()

void turi::gl_sgraph::rename_vertex_fields ( const std::vector< std::string > &  oldnames,
const std::vector< std::string > &  newnames 
)

Renames the vertex fields

Parameters
oldnameslist of names of the fields to be renamed
newnameslist of new names of the fields, aligned with oldnames.

◆ save()

void turi::gl_sgraph::save ( const std::string &  directory) const

Save the sgraph into a directory.

◆ save_reference()

void turi::gl_sgraph::save_reference ( const std::string &  directory) const

Save the sgraph using reference to other SFrames.

See also
gl_sframe::save_reference

◆ select_edge_fields()

gl_sgraph turi::gl_sgraph::select_edge_fields ( const std::vector< std::string > &  fields) const

Return a new gl_sgraph with only the selected edge fields. Other edge fields are discarded, while fields that do not exist in the gl_sgraph are ignored. Vertex fields remain the same in the new graph.

Parameters
fieldsA list of field names to select.

Example:

gl_sframe edges { {"source", {"Alice", "Bob"}},
{"dest", {"Bob", "Alice"}},
{"follows", {0, 1}},
{"likes", {5, 3}} };
g = SGraph().add_edges(edges, "source", "dest");
g2 = g.select_edge_fields({"follows"});
std::cout << g.edges() << std::endl;
std::cout << g2.edges() << std::endl;

Produces output:

vertices of g2
Note
"__src_id" and "__dst_id" will always be selected.
See also
get_fields
get_vertex_fields
get_edge_fields
select_vertex_fields
select_fields

◆ select_fields()

gl_sgraph turi::gl_sgraph::select_fields ( const std::vector< std::string > &  fields) const

Return a new gl_sgraph with only the selected fields (both vertex and edge fields. Other fields are discarded, while fields that do not exist in the gl_sgraph are ignored.

Parameters
fieldsA list of field names to select.
Note
"__id", "__src_id" and "__dst_id" will always be selected.
See also
select_vertex_fields
select_edge_fields

◆ select_vertex_fields()

gl_sgraph turi::gl_sgraph::select_vertex_fields ( const std::vector< std::string > &  fields) const

Return a new gl_sgraph with only the selected vertex fields. Other vertex fields are discarded, while fields that do not exist in the gl_sgraph are ignored. Edge fields remain the same in the new graph.

Parameters
fieldsA list of field names to select.

Example:

gl_sframe vertices { {"vid", {0, 1, 2}},
{"breed", {"labrador", "labrador", "vizsla"}},
{"age", {5, 3, 8}} };
g = SGraph().add_vertices(vertices, "vid");
g2 = g.select_vertex_fields({"breed"});
std::cout << g.vertices() << std::endl;
std::cout << g2.vertices() << std::endl;

Produces output:

+------+----------+-----+
| __id | breed | age |
+------+----------+-----+
| 0 | labrador | 5 |
| 2 | vizsla | 3 |
| 1 | labrador | 8 |
+------+----------+-----+
vertices of g2
+------+----------+
| __id | breed |
+------+----------+
| 0 | labrador |
| 2 | vizsla |
| 1 | labrador |
+------+----------+
Note
"__id" will always be selected.
See also
get_fields
get_vertex_fields
get_edge_fields
select_fields
select_edge_fields

◆ summary()

std::map<std::string, flexible_type> turi::gl_sgraph::summary ( ) const

Return the number of vertices and edges as a dictionary.

Example:

g = gl_sgraph();
std::cout << g.summary()['num_vertices'] << "\n"
<< g.summary()['num_edges'] << std::endl;;

Produces output:

0
0
See also
num_vertices
num_edges

◆ triple_apply()

gl_sgraph turi::gl_sgraph::triple_apply ( const lambda_triple_apply_fn lambda,
const std::vector< std::string > &  mutated_fields 
) const

Apply a user defined lambda function on each of the edge triples, and returns the new graph.

An edge_triple is a simple struct containing source, edge and target of type std::map<std::string, flexible_type>. The lambda function is applied once on each of the edge_triple in parallel, with locking on both source and target vertices to prevent race conditions. The following pseudo code describes the effect of the function:

INPUT: G
OUTPUT: G'
G' = copy(G)
PARALLEL FOR (source, edge, target) in G':
LOCK (source, target)
edge_triple triple(source, edge, target)
lambda(triple)
FOR f in mutated_fields:
source[f] = triple.source[f] // if f in source
target[f] = triple.target[f] // if f in target
edge[f] = triple.edge[f] // if f in edge
END FOR
UNLOCK (source, target)
END PARALLEL FOR
RETURN G'

This function enables super easy implementations of common graph computations like degree_count, weighted_pagerank, connected_component, etc.

Example

gl_sframe edges { {"source": {0,1,2,3,4}},
{"dest": {1,2,3,4,0}} };
g = turicreate.SGraph().add_edges(edges, "source", "dest");
g.vertices()['degree'] = 0
std::cout << g.vertices() << std::endl;
auto degree_count_fn = [](edge_triple& triple)->void {
triple.source["degree"] += 1;
triple.target["degree"] += 1;
};
g2 = g.triple_apply(degree_count_fn, {"degree"});
std::cout << g2.vertices() << std::endl;

Produces output:

Vertices of g
+------+--------+
| __id | degree |
+------+--------+
| 0 | 0 |
| 2 | 0 |
| 3 | 0 |
| 1 | 0 |
| 4 | 0 |
+------+--------+
Vertices of g2
+------+--------+
| __id | degree |
+------+--------+
| 0 | 2 |
| 2 | 2 |
| 3 | 2 |
| 1 | 2 |
| 4 | 2 |
+------+--------+
Note
mutated fields must be pre-allocated before triple_apply.
See also
edge_triple
lambda_triple_apply_fn

◆ vertices()

gl_gframe turi::gl_sgraph::vertices ( )

Returns a convenient "SFrame like" handler for the vertices in this gl_sgraph.

While a regular gl_sframe is independent of any gl_sgraph, a gl_gframe is bound (or points) to an gl_sgraph. Modifying fields of the returned gl_gframe changes the vertex data of the gl_sgraph. Also, modifications to the fields in the gl_sgraph, will be reflected in the gl_gframe.

Example:

gl_sframe vertices { {"vid", {"cat", "dog", "hippo"}},
{"fluffy", {1, 1, FLEX_UNDEFINED}},
{"woof", {FLEX_UNDEFINED, 1, FLEX_UNDEFINED}} };
gl_sgraph g = gl_sgraph().add_vertices(vertices, "vid");
// Let's modify the vertex data by operating on g.vertices():
// Copy the 'woof' vertex field into a new 'bark' vertex field.
g.vertices()["bark"] = g.vertices["woof"];
std::cout << g.vertices() << std::endl;
// Remove the 'woof' field.
g.vertices().remove_column("woof");
std::cout << g.vertices() << std::endl;
// Create a new field 'like_fish'.
g.vertices()['likes_fish'] = g.vertices()['__id'] == "cat";
std::cout << g.vertices() << std::endl;
// Replace missing values with zeros.
for (const auto& col : g.vertices().column_names()) {
if (col != "__id") {
g.vertices().fillna(col, 0);
}
}
std::cout << g.vertices() << std::endl;

Produces output:

Copy the 'woof' vertex attribute into a new 'bark' vertex attribute:
+-------+--------+------+------+
| __id | fluffy | woof | bark |
+-------+--------+------+ -----+
| dog | 1.0 | 1.0 | 1.0 |
| cat | 1.0 | NA | NA |
| hippo | NA | NA | NA |
+-------+--------+------+ -----+
Remove the 'woof' attribute:
+-------+--------+------+
| __id | fluffy | bark |
+-------+--------+------+
| dog | 1.0 | 1.0 |
| cat | 1.0 | NA |
| hippo | NA | NA |
+-------+--------+------+
Create a new field 'likes_fish':
+-------+--------+------+------------+
| __id | fluffy | bark | likes_fish |
+-------+--------+------+------------+
| dog | 1.0 | 1.0 | 0 |
| cat | 1.0 | NA | 1 |
| hippo | NA | NA | 0 |
+-------+--------+------+------------+
Replace missing values with zeros:
+-------+--------+------+------------+
| __id | fluffy | bark | likes_fish |
+-------+--------+------+------------+
| dog | 1.0 | 1.0 | 0 |
| cat | 1.0 | 0.0 | 1 |
| hippo | 0.0 | 0.0 | 0 |
+-------+--------+------+------------+
Note
To preserve the graph structure the "__id" column of this gl_sframe is read-only.
See also
edges

The documentation for this class was generated from the following file: