【架构实战】图数据库Neo4j在社交系统中的应用

一、为什么需要图数据库

传统关系型数据库处理图关系时存在瓶颈：

关系型数据库的问题：

多层JOIN查询性能差
递归查询复杂
社交网络查询困难
路径分析效率低

图数据库的优势：

无需JOIN，直接遍历关系
递归查询天然支持
适合社交网络分析
路径算法高效

二、Neo4j核心概念

1. 基本元素

复制代码

节点（Node）
├── 属性（Property）：key-value
├── 标签（Label）：类型标记
└── 关系（Relationship）

关系（Relationship）
├── 类型（Type）
├── 方向（Direction）
└── 属性（Property）

2. 数据模型

cypher 复制代码

// 创建用户节点
CREATE (u:User {
    userId: '1001',
    name: '张三',
    age: 28,
    city: '北京'
})

// 创建关系
CREATE (u1:User {userId: '1001'})-[:FOLLOWS {since: '2024-01-01'}]->(u2:User {userId: '1002'})

// 创建多个关系
CREATE (u1:User {userId: '1001'})
    -[:FOLLOWS]->(u2:User {userId: '1002'})
    -[:FOLLOWS]->(u3:User {userId: '1003'})
    -[:LIKES]->(p:Post {postId: '2001'})

三、Spring Data Neo4j实战

1. 依赖配置

xml 复制代码

<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-data-neo4j</artifactId>
</dependency>

yaml 复制代码

spring:
  data:
    neo4j:
      uri: bolt://localhost:7687
      username: neo4j
      password: password
      database: neo4j

2. 实体定义

java 复制代码

// 用户节点
@Node(labels = "User")
public class User {
    
    @Id
    @GeneratedValue
    private Long id;
    
    @Property("userId")
    private String userId;
    
    private String name;
    private Integer age;
    private String city;
    
    @CreatedDate
    private LocalDateTime createTime;
    
    // 关注关系
    @Relationship(type = "FOLLOWS", direction = Relationship.Direction.OUTGOING)
    private Set<User> following = new HashSet<>();
    
    @Relationship(type = "FOLLOWS", direction = Relationship.Direction.INCOMING)
    private Set<User> followers = new HashSet<>();
    
    // 动态关系
    @Relationship(type = "LIKES")
    private Set<Post> likedPosts = new HashSet<>();
}

// 帖子节点
@Node(labels = "Post")
public class Post {
    
    @Id
    @GeneratedValue
    private Long id;
    
    private String postId;
    private String content;
    private Integer likes;
    
    @CreatedDate
    private LocalDateTime createTime;
    
    // 作者关系
    @Relationship(type = "AUTHOR", direction = Relationship.Direction.INCOMING)
    private User author;
}

// 关注关系（带属性）
@RelationshipProperties
public class Follows {
    
    @Id
    @GeneratedValue
    private Long id;
    
    private LocalDateTime since;
    
    @TargetNode
    private User user;
}

3. Repository定义

java 复制代码

public interface UserRepository extends Neo4jRepository<User, Long> {
    
    Optional<User> findByUserId(String userId);
    
    // 查询用户的所有关注
    @Query("MATCH (u:User {userId: $userId})-[:FOLLOWS]->(f:User) RETURN f")
    List<User> findFollowing(@Param("userId") String userId);
    
    // 查询用户的粉丝
    @Query("MATCH (u:User {userId: $userId})<-[:FOLLOWS]-(f:User) RETURN f")
    List<User> findFollowers(@Param("userId") String userId);
    
    // 查询共同关注
    @Query("MATCH (u1:User {userId: $userId1})-[:FOLLOWS]->(f:User)<-[:FOLLOWS]-(u2:User {userId: $userId2}) RETURN f")
    List<User> findCommonFollowing(@Param("userId1") String userId1, @Param("userId2") String userId2);
}

4. 业务服务

java 复制代码

@Service
@Slf4j
public class SocialService {
    
    @Autowired
    private UserRepository userRepository;
    
    @Autowired
    private Neo4jTemplate neo4jTemplate;
    
    // 关注用户
    @Transactional
    public void follow(String followerId, String followeeId) {
        Optional<User> follower = userRepository.findByUserId(followerId);
        Optional<User> followee = userRepository.findByUserId(followeeId);
        
        if (follower.isPresent() && followee.isPresent()) {
            follower.get().getFollowing().add(followee.get());
            userRepository.save(follower.get());
        }
    }
    
    // 取消关注
    @Transactional
    public void unfollow(String followerId, String followeeId) {
        Optional<User> follower = userRepository.findByUserId(followerId);
        Optional<User> followee = userRepository.findByUserId(followeeId);
        
        if (follower.isPresent() && followee.isPresent()) {
            follower.get().getFollowing().remove(followee.get());
            userRepository.save(follower.get());
        }
    }
    
    // 推荐可能认识的人（粉丝的粉丝）
    @Query("""
        MATCH (me:User {userId: $userId})-[:FOLLOWS]->()-[:FOLLOWS]->(fof:User)
        WHERE NOT (me)-[:FOLLOWS]->(fof)
        AND me <> fof
        RETURN fof, COUNT(*) AS commonFriends
        ORDER BY commonFriends DESC
        LIMIT 10
        """)
    List<User> recommendFriends(@Param("userId") String userId);
}

四、社交网络分析

1. 好友推荐

cypher 复制代码

// 基于共同好友推荐
MATCH (me:User {userId: '1001'})-[:FOLLOWS]->(friend)-[:FOLLOWS]->(candidate)
WHERE NOT (me)-[:FOLLOWS]->(candidate)
RETURN candidate, COUNT(*) AS score
ORDER BY score DESC
LIMIT 5

// 基于二度好友推荐
MATCH (me:User {userId: '1001'})-[r1:FOLLOWS*2]->(candidate)
WHERE NOT (me)-[:FOLLOWS]->(candidate)
WITH candidate, MIN(LENGTH(r1)) AS distance
ORDER BY distance
LIMIT 5
RETURN candidate

2. 影响力分析

cypher 复制代码

// 计算用户影响力（粉丝数 + 二度粉丝）
MATCH (user:User {userId: '1001'})
OPTIONAL MATCH (user)-[:FOLLOWS]->(f1:User)
OPTIONAL MATCH (f1)-[:FOLLOWS]->(f2:User)
WHERE f2 <> user
RETURN user.name AS user,
       SIZE((user)<-[:FOLLOWS]-()) AS followers,
       SIZE((f1)-[:FOLLOWS]->()) AS f1_influence,
       SIZE((f2)-[:FOLLOWS]->()) AS f2_influence,
       SIZE((user)<-[:FOLLOWS]-()) + SIZE((f1)-[:FOLLOWS]->()) + SIZE((f2)-[:FOLLOWS]->()) AS influence_score

3. 社区发现

cypher 复制代码

// 使用Louvain算法进行社区发现
CALL algo.louvain.stream('User', 'FOLLOWS', {direction: 'BOTH'})
YIELD nodeId, community
RETURN community, COLLECT(algo.getNodeById(nodeId).name) AS members
ORDER BY SIZE(members) DESC
LIMIT 10

4. 最短路径

cypher 复制代码

// 查找两个用户的关系路径
MATCH path = shortestPath(
    (a:User {userId: '1001'})-[:FOLLOWS*1..5]-(b:User {userId: '9999'})
)
RETURN path, LENGTH(path) AS distance
LIMIT 1

// 查找所有关系路径
MATCH path = (a:User {userId: '1001'})-[:FOLLOWS*1..3]-(b:User {userId: '9999'})
RETURN path
ORDER BY LENGTH(path)
LIMIT 10

五、性能优化

1. 索引优化

cypher 复制代码

// 为常用查询字段创建索引
CREATE INDEX user_userId IF NOT EXISTS FOR (u:User) ON (u.userId);

CREATE INDEX user_name IF NOT EXISTS FOR (u:User) ON (u.name);

// 复合索引
CREATE INDEX user_city_age IF NOT EXISTS FOR (u:User) ON (u.city, u.age);

// 关系类型索引
CREATE INDEX follows_since IF NOT EXISTS FOR ()-[r:FOLLOWS]-() ON (r.since);

2. 查询优化

cypher 复制代码

// 使用PROFILE分析查询
PROFILE
MATCH (u:User {userId: '1001'})-[:FOLLOWS]->(f:User)-[:FOLLOWS]->(fof:User)
RETURN fof.name, COUNT(*) AS times
ORDER BY times DESC

// 优化：添加LIMIT
PROFILE
MATCH (u:User {userId: '1001'})-[:FOLLOWS]->(f:User)-[:FOLLOWS]->(fof:User)
WITH fof, COUNT(*) AS times
ORDER BY times DESC
LIMIT 10
RETURN fof.name, times

3. 图分裂策略

cypher 复制代码

// 将老数据分离到不同数据库
CREATE DATABASE social_2023;
CREATE DATABASE social_2024;

:use social_2024

// 在新数据库中创建指向老数据的边
CREATE (u:User {userId: '1001'})-[:ALSO_EXISTS_IN {database: 'social_2023'}]->(:ExternalRef {ref: '1001'})

六、集群部署

1. Docker部署

yaml 复制代码

version: '3'
services:
  neo4j:
    image: neo4j:5.11-community
    container_name: neo4j
    ports:
      - "7474:7474"  # HTTP
      - "7687:7687"  # Bolt
    volumes:
      - ./data:/data
      - ./logs:/logs
      - ./conf:/conf
    environment:
      - NEO4J_AUTH=neo4j/password
      - NEO4J_dbms_memory_heap_initial__size=2g
      - NEO4J_dbms_memory_heap_max__size=4g

2. 集群模式（Causal Cluster）

yaml 复制代码

# docker-compose.yml
version: '3'
services:
  neo4j-core-1:
    image: neo4j:5.11-enterprise
    environment:
      - NEO4J_dbms_mode=CORE
      - NEO4J_causal__clustering_minimum__cores__for__online__tx__protocol=3
      - NEO4J_causal__clustering_initial__discovery__members=neo4j-core-1:5000,neo4j-core-2:5000,neo4j-core-3:5000
      - NEO4J_dbms_memory_heap_initial__size=2g
      - NEO4J_dbms_memory_heap_max__size=4g
    ports:
      - "7474:7474"
      - "7687:7687"

七、总结

Neo4j是处理社交网络关系的利器：

图模型：节点+关系，直观表达社交网络
Cypher查询：声明式语法，简洁强大
好友推荐：基于共同好友、二度好友
社区发现：Louvain等算法

适用场景：

社交网络
推荐系统
知识图谱
欺诈检测

个人观点，仅供参考