5 min read
Graph Database Fundamentals for Azure Developers
Graph databases represent data as nodes (vertices) and relationships (edges), making them ideal for scenarios where connections between data points are as important as the data itself. This post explores graph database fundamentals for Azure developers.
Understanding Graph Data Models
Core Concepts
Graph Structure:
[Vertex/Node] ---(Edge/Relationship)---> [Vertex/Node]
Properties:
- Vertices have labels and properties
- Edges have labels, direction, and properties
- Both can store key-value pairs
When to Use Graph Databases
// Example: Social Network Model
// Relational approach requires expensive JOINs
/*
SELECT DISTINCT f2.name
FROM users u
JOIN friendships f1 ON u.id = f1.user_id
JOIN friendships f2 ON f1.friend_id = f2.user_id
WHERE u.name = 'Alice'
AND f2.friend_id NOT IN (SELECT friend_id FROM friendships WHERE user_id = u.id)
*/
// Graph approach - natural traversal
/*
g.V().has('name', 'Alice')
.out('knows')
.out('knows')
.where(neq('Alice'))
.dedup()
.values('name')
*/
Modeling Entities as Vertices
public class GraphVertex
{
public string Id { get; set; }
public string Label { get; set; }
public string PartitionKey { get; set; }
public Dictionary<string, object> Properties { get; set; }
public string ToGremlinQuery()
{
var props = string.Join("",
Properties.Select(p => $".property('{p.Key}', {FormatValue(p.Value)})"));
return $"g.addV('{Label}').property('id', '{Id}').property('pk', '{PartitionKey}'){props}";
}
private string FormatValue(object value)
{
return value switch
{
string s => $"'{s}'",
int i => i.ToString(),
double d => d.ToString(),
bool b => b.ToString().ToLower(),
DateTime dt => $"'{dt:O}'",
_ => $"'{value}'"
};
}
}
// Domain-specific vertices
public class PersonVertex : GraphVertex
{
public PersonVertex(string id, string name, int age, string city)
{
Id = id;
Label = "person";
PartitionKey = city;
Properties = new Dictionary<string, object>
{
["name"] = name,
["age"] = age,
["city"] = city,
["createdAt"] = DateTime.UtcNow
};
}
}
public class ProductVertex : GraphVertex
{
public ProductVertex(string id, string name, decimal price, string category)
{
Id = id;
Label = "product";
PartitionKey = category;
Properties = new Dictionary<string, object>
{
["name"] = name,
["price"] = price,
["category"] = category
};
}
}
Modeling Relationships as Edges
public class GraphEdge
{
public string FromVertexId { get; set; }
public string ToVertexId { get; set; }
public string Label { get; set; }
public Dictionary<string, object> Properties { get; set; }
public string ToGremlinQuery()
{
var props = Properties != null
? string.Join("", Properties.Select(p => $".property('{p.Key}', '{p.Value}')"))
: "";
return $"g.V('{FromVertexId}').addE('{Label}').to(g.V('{ToVertexId}')){props}";
}
}
// Common relationship types
public static class RelationshipTypes
{
// Social
public const string Knows = "knows";
public const string Follows = "follows";
public const string BlockedBy = "blocked_by";
// Commerce
public const string Purchased = "purchased";
public const string Viewed = "viewed";
public const string AddedToCart = "added_to_cart";
public const string Reviewed = "reviewed";
// Organizational
public const string ReportsTo = "reports_to";
public const string MemberOf = "member_of";
public const string WorksAt = "works_at";
// Content
public const string CreatedBy = "created_by";
public const string TaggedWith = "tagged_with";
public const string RelatedTo = "related_to";
}
Graph Database Design Patterns
public class GraphDesignPatterns
{
private readonly GremlinClient _client;
// Pattern 1: Fan-out queries (one-to-many)
public async Task<List<string>> GetUserPurchasesAsync(string userId)
{
// Find all products a user has purchased
var query = $@"
g.V('{userId}')
.hasLabel('user')
.out('purchased')
.hasLabel('product')
.values('name')";
var result = await _client.SubmitAsync<string>(query);
return result.ToList();
}
// Pattern 2: Fan-in queries (many-to-one)
public async Task<List<string>> GetProductBuyersAsync(string productId)
{
// Find all users who purchased a product
var query = $@"
g.V('{productId}')
.hasLabel('product')
.in('purchased')
.hasLabel('user')
.values('name')";
var result = await _client.SubmitAsync<string>(query);
return result.ToList();
}
// Pattern 3: Path finding
public async Task<List<dynamic>> FindConnectionPathAsync(string fromId, string toId)
{
var query = $@"
g.V('{fromId}')
.repeat(both().simplePath())
.until(hasId('{toId}').or().loops().is(6))
.hasId('{toId}')
.path()
.by('name')
.limit(5)";
var result = await _client.SubmitAsync<dynamic>(query);
return result.ToList();
}
// Pattern 4: Recommendation through common connections
public async Task<List<dynamic>> GetRecommendationsAsync(string userId)
{
var query = $@"
g.V('{userId}')
.as('user')
.out('purchased')
.in('purchased')
.where(neq('user'))
.out('purchased')
.where(without(
g.V('{userId}').out('purchased').fold()
))
.groupCount()
.by('name')
.order(local)
.by(values, desc)
.limit(local, 10)";
var result = await _client.SubmitAsync<dynamic>(query);
return result.ToList();
}
// Pattern 5: Aggregation across paths
public async Task<dynamic> GetInfluenceScoreAsync(string userId)
{
var query = $@"
g.V('{userId}')
.project('direct', 'indirect', 'total')
.by(out('follows').count())
.by(out('follows').out('follows').dedup().count())
.by(repeat(out('follows')).times(3).dedup().count())";
var result = await _client.SubmitAsync<dynamic>(query);
return result.FirstOrDefault();
}
}
Performance Optimization
public class GraphPerformanceOptimizer
{
// 1. Use partition keys effectively
public string OptimizedLocalQuery(string partitionKey, string vertexId)
{
// Query within partition for better performance
return $@"
g.V()
.has('pk', '{partitionKey}')
.hasId('{vertexId}')
.out('knows')
.has('pk', '{partitionKey}')
.values('name')";
}
// 2. Limit traversal depth
public string BoundedTraversal(string startId, int maxDepth)
{
return $@"
g.V('{startId}')
.repeat(out().simplePath())
.until(loops().is({maxDepth}))
.path()
.limit(100)";
}
// 3. Use projections to reduce data transfer
public string EfficientProjection(string userId)
{
return $@"
g.V('{userId}')
.out('knows')
.project('id', 'name', 'connectionCount')
.by(id())
.by('name')
.by(out().count())";
}
// 4. Batch operations
public string BatchVertexCreation(List<(string id, string name, string pk)> vertices)
{
var queries = vertices.Select(v =>
$"g.addV('person').property('id', '{v.id}').property('name', '{v.name}').property('pk', '{v.pk}')");
return string.Join("; ", queries);
}
}
Common Graph Algorithms
from gremlin_python.driver import client, serializer
class GraphAlgorithms:
def __init__(self, gremlin_client):
self.client = gremlin_client
def pagerank_approximation(self, iterations=3):
"""Approximate PageRank using iterative neighbor counting"""
query = f"""
g.V().hasLabel('person')
.project('name', 'score')
.by('name')
.by(
repeat(both('knows'))
.times({iterations})
.dedup()
.count()
)
.order()
.by(select('score'), desc)
"""
return self._execute(query)
def find_clusters(self, min_size=3):
"""Find tightly connected groups"""
query = f"""
g.V().hasLabel('person')
.where(out('knows').count().is(gte({min_size})))
.project('name', 'group')
.by('name')
.by(out('knows').values('name').fold())
"""
return self._execute(query)
def detect_bridges(self):
"""Find vertices that connect different communities"""
query = """
g.V().hasLabel('person')
.as('p')
.out('knows')
.out('knows')
.where(without('p'))
.in('knows')
.where(without('p').and(without(select('p').out('knows'))))
.select('p')
.dedup()
.values('name')
"""
return self._execute(query)
def _execute(self, query):
callback = self.client.submitAsync(query)
return callback.result().all().result()
Key Takeaways
- Model relationships first - Think in terms of connections
- Choose partition keys wisely - Affects query performance
- Limit traversal depth - Unbounded queries can be expensive
- Use projections - Return only needed data
- Leverage indexes - Create indexes for frequently filtered properties
Graph databases unlock powerful relationship-based queries that would be complex or impossible with traditional relational approaches.