5 min read
Mastering Gremlin Query Language
Gremlin is the graph traversal language of Apache TinkerPop, and it powers Azure Cosmos DB’s graph database capabilities. This comprehensive guide covers Gremlin from basics to advanced patterns.
Gremlin Fundamentals
Basic Vertex Operations
// Create a vertex
g.addV('person')
.property('id', 'person-1')
.property('name', 'Alice')
.property('age', 30)
.property('pk', 'seattle')
// Read a vertex by ID
g.V('person-1')
// Read with filters
g.V().hasLabel('person').has('name', 'Alice')
// Update a vertex property
g.V('person-1').property('age', 31)
// Delete a vertex (and its edges)
g.V('person-1').drop()
Basic Edge Operations
// Create an edge between vertices
g.V('person-1').addE('knows').to(g.V('person-2')).property('since', 2020)
// Read edges from a vertex
g.V('person-1').outE('knows')
// Read the target of edges
g.V('person-1').out('knows')
// Update edge property
g.V('person-1').outE('knows').has('to', 'person-2').property('strength', 'strong')
// Delete an edge
g.V('person-1').outE('knows').where(inV().hasId('person-2')).drop()
Traversal Steps
// Outgoing edges and vertices
g.V('person-1').out('knows') // Get adjacent vertices via 'knows' edges
g.V('person-1').outE('knows') // Get outgoing 'knows' edges
g.V('person-1').outE().inV() // Get edges then their target vertices
// Incoming edges and vertices
g.V('person-1').in('knows') // Get vertices with 'knows' edges to person-1
g.V('person-1').inE('knows') // Get incoming 'knows' edges
g.V('person-1').inE().outV() // Get edges then their source vertices
// Both directions
g.V('person-1').both('knows') // Adjacent vertices in both directions
g.V('person-1').bothE('knows') // Edges in both directions
Filtering and Predicates
// Has step with predicates
g.V().has('age', gt(25)) // Greater than
g.V().has('age', gte(25)) // Greater than or equal
g.V().has('age', lt(40)) // Less than
g.V().has('age', lte(40)) // Less than or equal
g.V().has('age', between(25, 40)) // Between (inclusive start, exclusive end)
g.V().has('age', inside(25, 40)) // Inside range (exclusive both)
g.V().has('age', outside(25, 40)) // Outside range (exclusive both)
g.V().has('age', within(25, 30, 35)) // In list
g.V().has('age', without(25, 30, 35)) // Not in list
g.V().has('name', startingWith('Al')) // String starts with
g.V().has('name', endingWith('ce')) // String ends with
g.V().has('name', containing('lic')) // String contains
// Multiple filters
g.V().hasLabel('person')
.has('age', gt(25))
.has('city', 'Seattle')
// Or conditions
g.V().or(
has('city', 'Seattle'),
has('city', 'Portland')
)
// And conditions (default behavior, but explicit)
g.V().and(
has('age', gt(25)),
has('city', 'Seattle')
)
// Not condition
g.V().hasLabel('person').not(has('verified', true))
// Where with traversals
g.V().hasLabel('person')
.where(out('knows').count().is(gt(5))) // People with more than 5 connections
Aggregation and Grouping
// Count
g.V().hasLabel('person').count()
// Group by property
g.V().hasLabel('person')
.group()
.by('city')
.by(count())
// Group with value aggregation
g.V().hasLabel('person')
.group()
.by('city')
.by(values('age').mean())
// Multiple aggregations
g.V().hasLabel('person')
.group()
.by('city')
.by(
fold()
.project('count', 'avgAge', 'names')
.by(count(local))
.by(unfold().values('age').mean())
.by(unfold().values('name').fold())
)
// Sum, min, max, mean
g.V().hasLabel('order').values('total').sum()
g.V().hasLabel('order').values('total').min()
g.V().hasLabel('order').values('total').max()
g.V().hasLabel('order').values('total').mean()
Path Traversals
// Simple path
g.V('person-1')
.repeat(out('knows'))
.until(hasId('person-5'))
.path()
// Path with labels
g.V('person-1')
.repeat(out('knows'))
.until(hasId('person-5'))
.path()
.by('name')
// Shortest path (limit depth and results)
g.V('person-1')
.repeat(out('knows').simplePath())
.until(hasId('person-5').or().loops().is(6))
.hasId('person-5')
.path()
.by('name')
.limit(1)
// All paths up to depth N
g.V('person-1')
.repeat(out('knows').simplePath())
.times(3)
.path()
.by('name')
Projection and Transformation
// Project to custom shape
g.V().hasLabel('person')
.project('name', 'age', 'friendCount', 'company')
.by('name')
.by('age')
.by(out('knows').count())
.by(out('works_at').values('name').fold())
// Select labeled steps
g.V('person-1').as('source')
.out('knows').as('friend')
.out('knows').as('fof')
.select('source', 'friend', 'fof')
.by('name')
// Coalesce for default values
g.V('person-1')
.project('name', 'nickname')
.by('name')
.by(coalesce(values('nickname'), constant('N/A')))
// Map transformation
g.V().hasLabel('person')
.map(
project('fullName', 'location')
.by(values('firstName', 'lastName').fold())
.by('city')
)
Advanced Patterns
// Recommendation: Products bought by similar users
g.V('user-1')
.out('purchased').as('owned')
.in('purchased')
.where(neq('user-1'))
.out('purchased')
.where(without('owned'))
.groupCount()
.by('name')
.order(local)
.by(values, desc)
.limit(local, 10)
// Mutual friends
g.V('person-1').as('p1')
.out('knows').as('mutual')
.in('knows')
.hasId('person-2')
.select('mutual')
.values('name')
// Influence score (followers of followers)
g.V('person-1')
.project('direct', 'secondary', 'tertiary')
.by(in('follows').count())
.by(in('follows').in('follows').dedup().count())
.by(repeat(in('follows')).times(3).dedup().count())
// Community detection (shared connections)
g.V('person-1').as('start')
.out('knows').aggregate('friends')
.out('knows')
.where(within('friends'))
.path()
.by('name')
// Cycle detection
g.V().hasLabel('person')
.as('start')
.repeat(out('knows').simplePath())
.until(where(eq('start')).or().loops().is(5))
.where(eq('start'))
.path()
.by('name')
C# Gremlin Examples
public class GremlinQueryBuilder
{
private readonly StringBuilder _query = new();
public GremlinQueryBuilder V(string id = null)
{
_query.Append(id != null ? $"g.V('{id}')" : "g.V()");
return this;
}
public GremlinQueryBuilder HasLabel(string label)
{
_query.Append($".hasLabel('{label}')");
return this;
}
public GremlinQueryBuilder Has(string property, object value)
{
var valueStr = value is string ? $"'{value}'" : value.ToString();
_query.Append($".has('{property}', {valueStr})");
return this;
}
public GremlinQueryBuilder Out(string edge = null)
{
_query.Append(edge != null ? $".out('{edge}')" : ".out()");
return this;
}
public GremlinQueryBuilder In(string edge = null)
{
_query.Append(edge != null ? $".in('{edge}')" : ".in()");
return this;
}
public GremlinQueryBuilder Values(params string[] properties)
{
var props = string.Join("', '", properties);
_query.Append($".values('{props}')");
return this;
}
public GremlinQueryBuilder Project(params string[] fields)
{
var fieldList = string.Join("', '", fields);
_query.Append($".project('{fieldList}')");
return this;
}
public GremlinQueryBuilder By(string property)
{
_query.Append($".by('{property}')");
return this;
}
public GremlinQueryBuilder ByTraversal(string traversal)
{
_query.Append($".by({traversal})");
return this;
}
public GremlinQueryBuilder Count()
{
_query.Append(".count()");
return this;
}
public GremlinQueryBuilder Limit(int n)
{
_query.Append($".limit({n})");
return this;
}
public string Build() => _query.ToString();
}
// Usage
var query = new GremlinQueryBuilder()
.V()
.HasLabel("person")
.Has("city", "Seattle")
.Out("knows")
.Project("name", "connections")
.By("name")
.ByTraversal("out('knows').count()")
.Limit(10)
.Build();
// Result: g.V().hasLabel('person').has('city', 'Seattle').out('knows').project('name', 'connections').by('name').by(out('knows').count()).limit(10)
Best Practices
- Use simplePath() - Prevents cycles in traversals
- Set limits - Always limit unbounded queries
- Filter early - Apply filters as early as possible
- Use indexes - Create indexes for filtered properties
- Project only needed data - Reduce data transfer
Mastering Gremlin opens up powerful graph analytics capabilities in Azure Cosmos DB.