【架构实战】图数据库Neo4j在社交系统中的应用

一、为什么需要图数据库

传统关系型数据库处理图关系时存在瓶颈:

关系型数据库的问题:

  • 多层JOIN查询性能差
  • 递归查询复杂
  • 社交网络查询困难
  • 路径分析效率低

图数据库的优势:

  • 无需JOIN,直接遍历关系
  • 递归查询天然支持
  • 适合社交网络分析
  • 路径算法高效

二、Neo4j核心概念

1. 基本元素

复制代码
节点(Node)
├── 属性(Property):key-value
├── 标签(Label):类型标记
└── 关系(Relationship)

关系(Relationship)
├── 类型(Type)
├── 方向(Direction)
└── 属性(Property)

2. 数据模型

cypher 复制代码
// 创建用户节点
CREATE (u:User {
    userId: '1001',
    name: '张三',
    age: 28,
    city: '北京'
})

// 创建关系
CREATE (u1:User {userId: '1001'})-[:FOLLOWS {since: '2024-01-01'}]->(u2:User {userId: '1002'})

// 创建多个关系
CREATE (u1:User {userId: '1001'})
    -[:FOLLOWS]->(u2:User {userId: '1002'})
    -[:FOLLOWS]->(u3:User {userId: '1003'})
    -[:LIKES]->(p:Post {postId: '2001'})

三、Spring Data Neo4j实战

1. 依赖配置

xml 复制代码
<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-data-neo4j</artifactId>
</dependency>
yaml 复制代码
spring:
  data:
    neo4j:
      uri: bolt://localhost:7687
      username: neo4j
      password: password
      database: neo4j

2. 实体定义

java 复制代码
// 用户节点
@Node(labels = "User")
public class User {
    
    @Id
    @GeneratedValue
    private Long id;
    
    @Property("userId")
    private String userId;
    
    private String name;
    private Integer age;
    private String city;
    
    @CreatedDate
    private LocalDateTime createTime;
    
    // 关注关系
    @Relationship(type = "FOLLOWS", direction = Relationship.Direction.OUTGOING)
    private Set<User> following = new HashSet<>();
    
    @Relationship(type = "FOLLOWS", direction = Relationship.Direction.INCOMING)
    private Set<User> followers = new HashSet<>();
    
    // 动态关系
    @Relationship(type = "LIKES")
    private Set<Post> likedPosts = new HashSet<>();
}

// 帖子节点
@Node(labels = "Post")
public class Post {
    
    @Id
    @GeneratedValue
    private Long id;
    
    private String postId;
    private String content;
    private Integer likes;
    
    @CreatedDate
    private LocalDateTime createTime;
    
    // 作者关系
    @Relationship(type = "AUTHOR", direction = Relationship.Direction.INCOMING)
    private User author;
}

// 关注关系(带属性)
@RelationshipProperties
public class Follows {
    
    @Id
    @GeneratedValue
    private Long id;
    
    private LocalDateTime since;
    
    @TargetNode
    private User user;
}

3. Repository定义

java 复制代码
public interface UserRepository extends Neo4jRepository<User, Long> {
    
    Optional<User> findByUserId(String userId);
    
    // 查询用户的所有关注
    @Query("MATCH (u:User {userId: $userId})-[:FOLLOWS]->(f:User) RETURN f")
    List<User> findFollowing(@Param("userId") String userId);
    
    // 查询用户的粉丝
    @Query("MATCH (u:User {userId: $userId})<-[:FOLLOWS]-(f:User) RETURN f")
    List<User> findFollowers(@Param("userId") String userId);
    
    // 查询共同关注
    @Query("MATCH (u1:User {userId: $userId1})-[:FOLLOWS]->(f:User)<-[:FOLLOWS]-(u2:User {userId: $userId2}) RETURN f")
    List<User> findCommonFollowing(@Param("userId1") String userId1, @Param("userId2") String userId2);
}

4. 业务服务

java 复制代码
@Service
@Slf4j
public class SocialService {
    
    @Autowired
    private UserRepository userRepository;
    
    @Autowired
    private Neo4jTemplate neo4jTemplate;
    
    // 关注用户
    @Transactional
    public void follow(String followerId, String followeeId) {
        Optional<User> follower = userRepository.findByUserId(followerId);
        Optional<User> followee = userRepository.findByUserId(followeeId);
        
        if (follower.isPresent() && followee.isPresent()) {
            follower.get().getFollowing().add(followee.get());
            userRepository.save(follower.get());
        }
    }
    
    // 取消关注
    @Transactional
    public void unfollow(String followerId, String followeeId) {
        Optional<User> follower = userRepository.findByUserId(followerId);
        Optional<User> followee = userRepository.findByUserId(followeeId);
        
        if (follower.isPresent() && followee.isPresent()) {
            follower.get().getFollowing().remove(followee.get());
            userRepository.save(follower.get());
        }
    }
    
    // 推荐可能认识的人(粉丝的粉丝)
    @Query("""
        MATCH (me:User {userId: $userId})-[:FOLLOWS]->()-[:FOLLOWS]->(fof:User)
        WHERE NOT (me)-[:FOLLOWS]->(fof)
        AND me <> fof
        RETURN fof, COUNT(*) AS commonFriends
        ORDER BY commonFriends DESC
        LIMIT 10
        """)
    List<User> recommendFriends(@Param("userId") String userId);
}

四、社交网络分析

1. 好友推荐

cypher 复制代码
// 基于共同好友推荐
MATCH (me:User {userId: '1001'})-[:FOLLOWS]->(friend)-[:FOLLOWS]->(candidate)
WHERE NOT (me)-[:FOLLOWS]->(candidate)
RETURN candidate, COUNT(*) AS score
ORDER BY score DESC
LIMIT 5

// 基于二度好友推荐
MATCH (me:User {userId: '1001'})-[r1:FOLLOWS*2]->(candidate)
WHERE NOT (me)-[:FOLLOWS]->(candidate)
WITH candidate, MIN(LENGTH(r1)) AS distance
ORDER BY distance
LIMIT 5
RETURN candidate

2. 影响力分析

cypher 复制代码
// 计算用户影响力(粉丝数 + 二度粉丝)
MATCH (user:User {userId: '1001'})
OPTIONAL MATCH (user)-[:FOLLOWS]->(f1:User)
OPTIONAL MATCH (f1)-[:FOLLOWS]->(f2:User)
WHERE f2 <> user
RETURN user.name AS user,
       SIZE((user)<-[:FOLLOWS]-()) AS followers,
       SIZE((f1)-[:FOLLOWS]->()) AS f1_influence,
       SIZE((f2)-[:FOLLOWS]->()) AS f2_influence,
       SIZE((user)<-[:FOLLOWS]-()) + SIZE((f1)-[:FOLLOWS]->()) + SIZE((f2)-[:FOLLOWS]->()) AS influence_score

3. 社区发现

cypher 复制代码
// 使用Louvain算法进行社区发现
CALL algo.louvain.stream('User', 'FOLLOWS', {direction: 'BOTH'})
YIELD nodeId, community
RETURN community, COLLECT(algo.getNodeById(nodeId).name) AS members
ORDER BY SIZE(members) DESC
LIMIT 10

4. 最短路径

cypher 复制代码
// 查找两个用户的关系路径
MATCH path = shortestPath(
    (a:User {userId: '1001'})-[:FOLLOWS*1..5]-(b:User {userId: '9999'})
)
RETURN path, LENGTH(path) AS distance
LIMIT 1

// 查找所有关系路径
MATCH path = (a:User {userId: '1001'})-[:FOLLOWS*1..3]-(b:User {userId: '9999'})
RETURN path
ORDER BY LENGTH(path)
LIMIT 10

五、性能优化

1. 索引优化

cypher 复制代码
// 为常用查询字段创建索引
CREATE INDEX user_userId IF NOT EXISTS FOR (u:User) ON (u.userId);

CREATE INDEX user_name IF NOT EXISTS FOR (u:User) ON (u.name);

// 复合索引
CREATE INDEX user_city_age IF NOT EXISTS FOR (u:User) ON (u.city, u.age);

// 关系类型索引
CREATE INDEX follows_since IF NOT EXISTS FOR ()-[r:FOLLOWS]-() ON (r.since);

2. 查询优化

cypher 复制代码
// 使用PROFILE分析查询
PROFILE
MATCH (u:User {userId: '1001'})-[:FOLLOWS]->(f:User)-[:FOLLOWS]->(fof:User)
RETURN fof.name, COUNT(*) AS times
ORDER BY times DESC

// 优化:添加LIMIT
PROFILE
MATCH (u:User {userId: '1001'})-[:FOLLOWS]->(f:User)-[:FOLLOWS]->(fof:User)
WITH fof, COUNT(*) AS times
ORDER BY times DESC
LIMIT 10
RETURN fof.name, times

3. 图分裂策略

cypher 复制代码
// 将老数据分离到不同数据库
CREATE DATABASE social_2023;
CREATE DATABASE social_2024;

:use social_2024

// 在新数据库中创建指向老数据的边
CREATE (u:User {userId: '1001'})-[:ALSO_EXISTS_IN {database: 'social_2023'}]->(:ExternalRef {ref: '1001'})

六、集群部署

1. Docker部署

yaml 复制代码
version: '3'
services:
  neo4j:
    image: neo4j:5.11-community
    container_name: neo4j
    ports:
      - "7474:7474"  # HTTP
      - "7687:7687"  # Bolt
    volumes:
      - ./data:/data
      - ./logs:/logs
      - ./conf:/conf
    environment:
      - NEO4J_AUTH=neo4j/password
      - NEO4J_dbms_memory_heap_initial__size=2g
      - NEO4J_dbms_memory_heap_max__size=4g

2. 集群模式(Causal Cluster)

yaml 复制代码
# docker-compose.yml
version: '3'
services:
  neo4j-core-1:
    image: neo4j:5.11-enterprise
    environment:
      - NEO4J_dbms_mode=CORE
      - NEO4J_causal__clustering_minimum__cores__for__online__tx__protocol=3
      - NEO4J_causal__clustering_initial__discovery__members=neo4j-core-1:5000,neo4j-core-2:5000,neo4j-core-3:5000
      - NEO4J_dbms_memory_heap_initial__size=2g
      - NEO4J_dbms_memory_heap_max__size=4g
    ports:
      - "7474:7474"
      - "7687:7687"

七、总结

Neo4j是处理社交网络关系的利器:

  • 图模型:节点+关系,直观表达社交网络
  • Cypher查询:声明式语法,简洁强大
  • 好友推荐:基于共同好友、二度好友
  • 社区发现:Louvain等算法

适用场景:

  • 社交网络
  • 推荐系统
  • 知识图谱
  • 欺诈检测

个人观点,仅供参考

相关推荐
夕除2 小时前
MVN--06
数据库·sql·mybatis
无心水2 小时前
OpenClaw与多Agent架构在HR数字化转型中的深度实践
人工智能·架构·多agent·openclaw·养龙虾·hr数字化转型
鸠摩智首席音效师2 小时前
如何在 MacOS 上安装 PostgreSQL ?
数据库·macos·postgresql
℡終嚸♂6802 小时前
SQL 注入:从原理到实战的完整指南
数据库·sql
东方隐侠安全团队-千里2 小时前
基于SAST+AI代码审计 架构与功能详解
人工智能·microsoft·架构
航Hang*2 小时前
第2章:进阶Linux系统——第8节:配置与管理MariaDB服务器
linux·运维·服务器·数据库·笔记·学习·mariadb
AKA__Zas2 小时前
初识SQL(1.0 PLUS)
数据库·sql·学习方法
卢傢蕊2 小时前
PostgreSQL 日常维护
数据库·postgresql·oracle