一、为什么需要图数据库
传统关系型数据库处理图关系时存在瓶颈:
关系型数据库的问题:
- 多层JOIN查询性能差
- 递归查询复杂
- 社交网络查询困难
- 路径分析效率低
图数据库的优势:
- 无需JOIN,直接遍历关系
- 递归查询天然支持
- 适合社交网络分析
- 路径算法高效
二、Neo4j核心概念
1. 基本元素
节点(Node)
├── 属性(Property):key-value
├── 标签(Label):类型标记
└── 关系(Relationship)
关系(Relationship)
├── 类型(Type)
├── 方向(Direction)
└── 属性(Property)
2. 数据模型
cypher
// 创建用户节点
CREATE (u:User {
userId: '1001',
name: '张三',
age: 28,
city: '北京'
})
// 创建关系
CREATE (u1:User {userId: '1001'})-[:FOLLOWS {since: '2024-01-01'}]->(u2:User {userId: '1002'})
// 创建多个关系
CREATE (u1:User {userId: '1001'})
-[:FOLLOWS]->(u2:User {userId: '1002'})
-[:FOLLOWS]->(u3:User {userId: '1003'})
-[:LIKES]->(p:Post {postId: '2001'})
三、Spring Data Neo4j实战
1. 依赖配置
xml
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-neo4j</artifactId>
</dependency>
yaml
spring:
data:
neo4j:
uri: bolt://localhost:7687
username: neo4j
password: password
database: neo4j
2. 实体定义
java
// 用户节点
@Node(labels = "User")
public class User {
@Id
@GeneratedValue
private Long id;
@Property("userId")
private String userId;
private String name;
private Integer age;
private String city;
@CreatedDate
private LocalDateTime createTime;
// 关注关系
@Relationship(type = "FOLLOWS", direction = Relationship.Direction.OUTGOING)
private Set<User> following = new HashSet<>();
@Relationship(type = "FOLLOWS", direction = Relationship.Direction.INCOMING)
private Set<User> followers = new HashSet<>();
// 动态关系
@Relationship(type = "LIKES")
private Set<Post> likedPosts = new HashSet<>();
}
// 帖子节点
@Node(labels = "Post")
public class Post {
@Id
@GeneratedValue
private Long id;
private String postId;
private String content;
private Integer likes;
@CreatedDate
private LocalDateTime createTime;
// 作者关系
@Relationship(type = "AUTHOR", direction = Relationship.Direction.INCOMING)
private User author;
}
// 关注关系(带属性)
@RelationshipProperties
public class Follows {
@Id
@GeneratedValue
private Long id;
private LocalDateTime since;
@TargetNode
private User user;
}
3. Repository定义
java
public interface UserRepository extends Neo4jRepository<User, Long> {
Optional<User> findByUserId(String userId);
// 查询用户的所有关注
@Query("MATCH (u:User {userId: $userId})-[:FOLLOWS]->(f:User) RETURN f")
List<User> findFollowing(@Param("userId") String userId);
// 查询用户的粉丝
@Query("MATCH (u:User {userId: $userId})<-[:FOLLOWS]-(f:User) RETURN f")
List<User> findFollowers(@Param("userId") String userId);
// 查询共同关注
@Query("MATCH (u1:User {userId: $userId1})-[:FOLLOWS]->(f:User)<-[:FOLLOWS]-(u2:User {userId: $userId2}) RETURN f")
List<User> findCommonFollowing(@Param("userId1") String userId1, @Param("userId2") String userId2);
}
4. 业务服务
java
@Service
@Slf4j
public class SocialService {
@Autowired
private UserRepository userRepository;
@Autowired
private Neo4jTemplate neo4jTemplate;
// 关注用户
@Transactional
public void follow(String followerId, String followeeId) {
Optional<User> follower = userRepository.findByUserId(followerId);
Optional<User> followee = userRepository.findByUserId(followeeId);
if (follower.isPresent() && followee.isPresent()) {
follower.get().getFollowing().add(followee.get());
userRepository.save(follower.get());
}
}
// 取消关注
@Transactional
public void unfollow(String followerId, String followeeId) {
Optional<User> follower = userRepository.findByUserId(followerId);
Optional<User> followee = userRepository.findByUserId(followeeId);
if (follower.isPresent() && followee.isPresent()) {
follower.get().getFollowing().remove(followee.get());
userRepository.save(follower.get());
}
}
// 推荐可能认识的人(粉丝的粉丝)
@Query("""
MATCH (me:User {userId: $userId})-[:FOLLOWS]->()-[:FOLLOWS]->(fof:User)
WHERE NOT (me)-[:FOLLOWS]->(fof)
AND me <> fof
RETURN fof, COUNT(*) AS commonFriends
ORDER BY commonFriends DESC
LIMIT 10
""")
List<User> recommendFriends(@Param("userId") String userId);
}
四、社交网络分析
1. 好友推荐
cypher
// 基于共同好友推荐
MATCH (me:User {userId: '1001'})-[:FOLLOWS]->(friend)-[:FOLLOWS]->(candidate)
WHERE NOT (me)-[:FOLLOWS]->(candidate)
RETURN candidate, COUNT(*) AS score
ORDER BY score DESC
LIMIT 5
// 基于二度好友推荐
MATCH (me:User {userId: '1001'})-[r1:FOLLOWS*2]->(candidate)
WHERE NOT (me)-[:FOLLOWS]->(candidate)
WITH candidate, MIN(LENGTH(r1)) AS distance
ORDER BY distance
LIMIT 5
RETURN candidate
2. 影响力分析
cypher
// 计算用户影响力(粉丝数 + 二度粉丝)
MATCH (user:User {userId: '1001'})
OPTIONAL MATCH (user)-[:FOLLOWS]->(f1:User)
OPTIONAL MATCH (f1)-[:FOLLOWS]->(f2:User)
WHERE f2 <> user
RETURN user.name AS user,
SIZE((user)<-[:FOLLOWS]-()) AS followers,
SIZE((f1)-[:FOLLOWS]->()) AS f1_influence,
SIZE((f2)-[:FOLLOWS]->()) AS f2_influence,
SIZE((user)<-[:FOLLOWS]-()) + SIZE((f1)-[:FOLLOWS]->()) + SIZE((f2)-[:FOLLOWS]->()) AS influence_score
3. 社区发现
cypher
// 使用Louvain算法进行社区发现
CALL algo.louvain.stream('User', 'FOLLOWS', {direction: 'BOTH'})
YIELD nodeId, community
RETURN community, COLLECT(algo.getNodeById(nodeId).name) AS members
ORDER BY SIZE(members) DESC
LIMIT 10
4. 最短路径
cypher
// 查找两个用户的关系路径
MATCH path = shortestPath(
(a:User {userId: '1001'})-[:FOLLOWS*1..5]-(b:User {userId: '9999'})
)
RETURN path, LENGTH(path) AS distance
LIMIT 1
// 查找所有关系路径
MATCH path = (a:User {userId: '1001'})-[:FOLLOWS*1..3]-(b:User {userId: '9999'})
RETURN path
ORDER BY LENGTH(path)
LIMIT 10
五、性能优化
1. 索引优化
cypher
// 为常用查询字段创建索引
CREATE INDEX user_userId IF NOT EXISTS FOR (u:User) ON (u.userId);
CREATE INDEX user_name IF NOT EXISTS FOR (u:User) ON (u.name);
// 复合索引
CREATE INDEX user_city_age IF NOT EXISTS FOR (u:User) ON (u.city, u.age);
// 关系类型索引
CREATE INDEX follows_since IF NOT EXISTS FOR ()-[r:FOLLOWS]-() ON (r.since);
2. 查询优化
cypher
// 使用PROFILE分析查询
PROFILE
MATCH (u:User {userId: '1001'})-[:FOLLOWS]->(f:User)-[:FOLLOWS]->(fof:User)
RETURN fof.name, COUNT(*) AS times
ORDER BY times DESC
// 优化:添加LIMIT
PROFILE
MATCH (u:User {userId: '1001'})-[:FOLLOWS]->(f:User)-[:FOLLOWS]->(fof:User)
WITH fof, COUNT(*) AS times
ORDER BY times DESC
LIMIT 10
RETURN fof.name, times
3. 图分裂策略
cypher
// 将老数据分离到不同数据库
CREATE DATABASE social_2023;
CREATE DATABASE social_2024;
:use social_2024
// 在新数据库中创建指向老数据的边
CREATE (u:User {userId: '1001'})-[:ALSO_EXISTS_IN {database: 'social_2023'}]->(:ExternalRef {ref: '1001'})
六、集群部署
1. Docker部署
yaml
version: '3'
services:
neo4j:
image: neo4j:5.11-community
container_name: neo4j
ports:
- "7474:7474" # HTTP
- "7687:7687" # Bolt
volumes:
- ./data:/data
- ./logs:/logs
- ./conf:/conf
environment:
- NEO4J_AUTH=neo4j/password
- NEO4J_dbms_memory_heap_initial__size=2g
- NEO4J_dbms_memory_heap_max__size=4g
2. 集群模式(Causal Cluster)
yaml
# docker-compose.yml
version: '3'
services:
neo4j-core-1:
image: neo4j:5.11-enterprise
environment:
- NEO4J_dbms_mode=CORE
- NEO4J_causal__clustering_minimum__cores__for__online__tx__protocol=3
- NEO4J_causal__clustering_initial__discovery__members=neo4j-core-1:5000,neo4j-core-2:5000,neo4j-core-3:5000
- NEO4J_dbms_memory_heap_initial__size=2g
- NEO4J_dbms_memory_heap_max__size=4g
ports:
- "7474:7474"
- "7687:7687"
七、总结
Neo4j是处理社交网络关系的利器:
- 图模型:节点+关系,直观表达社交网络
- Cypher查询:声明式语法,简洁强大
- 好友推荐:基于共同好友、二度好友
- 社区发现:Louvain等算法
适用场景:
- 社交网络
- 推荐系统
- 知识图谱
- 欺诈检测
个人观点,仅供参考