duckdb图查询插件duckpgq的安装和使用

插件存储库 https://github.com/cwida/duckpgq-extension

示例查询语句来自 https://duckpgq.org/

示例数据来自 https://github.com/Dtenwolde/duckpgq-docs/

将示例数据的duckdb数据库文件复制到当前目录,比如snb.duckdb。

在duckdb cli中安装duckpgq插件

高版本的duckdb没有这个插件

复制代码
C:\d>duckdb154
DuckDB v1.5.4 (Variegata)
Enter ".help" for usage hints.
memory D INSTALL duckpgq FROM community;
HTTP Error:
Failed to download extension "duckpgq" at URL "http://community-extensions.duckdb.org/v1.5.4/windows_amd64/duckpgq.duckdb_extension.gz" (HTTP 404)

Candidate extensions: "ducklake", "quack", "odbc", "uc_catalog", "autocomplete"
For more info, visit https://duckdb.org/docs/stable/extensions/troubleshooting?version=v1.5.4&platform=windows_amd64&extension=duckpgq
memory D

1.4.1版有

复制代码
C:\d>duckdb141
DuckDB v1.4.1 (Andium) b390a7c376
Enter ".help" for usage hints.
Connected to a transient in-memory database.
Use ".open FILENAME" to reopen on a persistent database.
D INSTALL duckpgq FROM community;
100% ▕██████████████████████████████████████▏ (00:00:16.74 elapsed)

安装后就可以查询

sql 复制代码
D ATTACH './snb.duckdb';
D use snb;
D load duckpgq;
D CREATE or replace PROPERTY GRAPH snb
   VERTEX TABLES (
     Person, Forum
   )
   EDGE TABLES (
     Person_knows_person     SOURCE KEY (Person1Id) REFERENCES Person (id)
                             DESTINATION KEY (Person2Id) REFERENCES Person (id)
                             LABEL knows,
     Forum_hasMember_Person  SOURCE KEY (ForumId) REFERENCES Forum (id)
                             DESTINATION KEY (PersonId) REFERENCES Person (id)
                             LABEL hasMember
   );
┌─────────┐
│ Success │
│ boolean │
├─────────┤
│ 0 rows  │
└─────────┘
D -- find the shortest path from one person to all other persons
D FROM GRAPH_TABLE (snb
     MATCH p = ANY SHORTEST (p1:person WHERE p1.id = 14)-[k:knows]->*(p2:person)
     COLUMNS (p1.id, p2.id as other_person_id, element_id(p), path_length(p))
   );
┌───────┬─────────────────┬────────────────────────────┬────────────────┐
│  id   │ other_person_id │       element_id(p)        │ path_length(p) │
│ int64 │      int64      │          int64[]           │     int64      │
├───────┼─────────────────┼────────────────────────────┼────────────────┤
│    14 │              14 │ [0]                        │              0 │
│    14 │  10995116277782 │ [0, 0, 13]                 │              1 │
│    14 │  24189255811081 │ [0, 1, 26]                 │              1 │
│    14 │  24189255811109 │ [0, 1, 26, 61, 27]         │              2 │
│    14 │  26388279066641 │ [0, 0, 13, 42, 29]         │              2 │
│    14 │  26388279066658 │ [0, 0, 13, 43, 31]         │              2 │
│    14 │  26388279066668 │ [0, 2, 32]                 │              1 │
│    14 │  28587302322180 │ [0, 0, 13, 44, 33]         │              2 │
│    14 │  28587302322196 │ [0, 1, 26, 65, 35]         │              2 │
│    14 │  28587302322204 │ [0, 0, 13, 45, 36]         │              2 │
│    14 │  28587302322223 │ [0, 0, 13, 44, 33, 78, 38] │              3 │
│    14 │  30786325577731 │ [0, 0, 13, 44, 33, 79, 39] │              3 │
│    14 │  30786325577740 │ [0, 0, 13, 43, 31, 72, 40] │              3 │
│    14 │  32985348833329 │ [0, 0, 13, 44, 33, 80, 43] │              3 │
│    14 │  35184372088834 │ [0, 1, 26, 66, 44]         │              2 │
│    14 │  35184372088850 │ [0, 0, 13, 43, 31, 73, 45] │              3 │
│    14 │  35184372088856 │ [0, 0, 13, 46, 46]         │              2 │
├───────┴─────────────────┴────────────────────────────┴────────────────┤
│ 17 rows                                                     4 columns │
└───────────────────────────────────────────────────────────────────────┘
D -- Find mutual friends between two users
D FROM GRAPH_TABLE (snb
     MATCH (p1:Person WHERE p1.id = 16)-[k:knows]->(p2:Person)<-[k2:knows]-(p3:Person WHERE p3.id = 32)
     COLUMNS (p2.firstName)
   );
┌───────────┐
│ firstName │
│  varchar  │
├───────────┤
│ Ali       │
└───────────┘
D -- Find the 3 most popular people
D FROM GRAPH_TABLE (snb
     MATCH (follower:Person)-[follows:knows]->(person:Person)
     COLUMNS (person.id AS personID, person.firstname, person.lastname, follower.id AS followerID)
   )
   SELECT personID, firstname, lastname, COUNT(followerID) AS numFollowers
   GROUP BY ALL
   ORDER BY numFollowers DESC
   LIMIT 3;
┌────────────────┬───────────┬──────────┬──────────────┐
│    personID    │ firstName │ lastName │ numFollowers │
│     int64      │  varchar  │ varchar  │    int64     │
├────────────────┼───────────┼──────────┼──────────────┤
│ 24189255811081 │ Alim      │ Guliyev  │           10 │
│ 28587302322180 │ Bryn      │ Davies   │            9 │
│ 26388279066658 │ Roberto   │ Diaz     │            9 │
└────────────────┴───────────┴──────────┴──────────────┘
D -- Number of forums posted on by the most followed person
D WITH
   mfp AS (
     FROM GRAPH_TABLE (snb
       MATCH (follower:Person)-[follows:knows]->(person:Person)
       COLUMNS (person.id AS personID, person.firstname, follower.id AS followerID)
     )
   SELECT personID, firstname, COUNT(followerID) AS numFollowers
   GROUP BY ALL ORDER BY numFollowers DESC LIMIT 1
   )
   FROM
     mfp,
     GRAPH_TABLE (snb
       MATCH (person:Person)<-[fhm:hasMember]-(f:Forum)
       COLUMNS (person.id AS personID, f.id as forumId)
   ) mem
   SELECT mfp.personID, mfp.firstname, mfp.numFollowers, count(mem.forumId) forumCount
   WHERE mfp.personID = mem.personID
   GROUP BY ALL;

最后一个查询直接退出了,不知错在哪。改用DuckDB 1.5版,可以了。

sql 复制代码
C:\d>duckdb150
DuckDB v1.5.0 (Variegata)
Enter ".help" for usage hints.
memory D INSTALL duckpgq FROM community;
memory D ATTACH './snb.duckdb';
memory D use snb;
snb D load duckpgq;
snb D CREATE or replace PROPERTY GRAPH snb
      VERTEX TABLES (
        Person, Forum
      )
      EDGE TABLES (
        Person_knows_person     SOURCE KEY (Person1Id) REFERENCES Person (id)
                                DESTINATION KEY (Person2Id) REFERENCES Person (id)
                                LABEL knows,
        Forum_hasMember_Person  SOURCE KEY (ForumId) REFERENCES Forum (id)
                                DESTINATION KEY (PersonId) REFERENCES Person (id)
                                LABEL hasMember
      );
┌─────────┐
│ Success │
│ boolean │
└─────────┘
  0 rows
snb D -- Number of forums posted on by the most followed person
snb D WITH
      mfp AS (
        FROM GRAPH_TABLE (snb
          MATCH (follower:Person)-[follows:knows]->(person:Person)
          COLUMNS (person.id AS personID, person.firstname, follower.id AS followerID)
        )
      SELECT personID, firstname, COUNT(followerID) AS numFollowers
      GROUP BY ALL ORDER BY numFollowers DESC LIMIT 1
      )
      FROM
        mfp,
        GRAPH_TABLE (snb
          MATCH (person:Person)<-[fhm:hasMember]-(f:Forum)
          COLUMNS (person.id AS personID, f.id as forumId)
      ) mem
      SELECT mfp.personID, mfp.firstname, mfp.numFollowers, count(mem.forumId) forumCount
      WHERE mfp.personID = mem.personID
      GROUP BY ALL;
┌────────────────┬───────────┬──────────────┬────────────┐
│    personID    │ firstName │ numFollowers │ forumCount │
│     int64      │  varchar  │    int64     │   int64    │
├────────────────┼───────────┼──────────────┼────────────┤
│ 24189255811081 │ Alim      │           10 │        138 │
└────────────────┴───────────┴──────────────┴────────────┘
snb D

其他几个例子

sql 复制代码
D ATTACH './airline-data-small.duckdb' as airline;
D use airline;
D CREATE PROPERTY GRAPH flight_graph
   VERTEX TABLES (
     aircrafts_data, airports_data,
     bookings, flights,
     tickets, seats
   )
   EDGE TABLES (
     route
       SOURCE KEY (departure_airport) REFERENCES airports_data(airport_code)
       DESTINATION KEY (arrival_airport) REFERENCES airports_data(airport_code),
     ticket_flights
       SOURCE KEY (ticket_no) REFERENCES tickets(ticket_no)
       DESTINATION KEY (flight_id) REFERENCES flights(flight_id),
     bookings_tickets
       SOURCE KEY (book_ref) REFERENCES bookings(book_ref)
       DESTINATION KEY (ticket_no) REFERENCES tickets(ticket_no),
     boarding_passes
       SOURCE KEY (ticket_no) REFERENCES tickets(ticket_no)
       DESTINATION KEY (seat_no) REFERENCES seats(seat_no)
 );
┌─────────┐
│ Success │
│ boolean │
├─────────┤
│ 0 rows  │
└─────────┘
D --Shortest Route Between Airports
D FROM (
   SELECT unnest(flights) AS flights
     FROM GRAPH_TABLE (
     flight_graph
     MATCH o = ANY SHORTEST (a:airports_data WHERE a.airport_code = 'UKX')
       -[fr:route]->*
       (a2:airports_data WHERE a2.airport_code = 'CNN')
     COLUMNS (edges(o) AS flights)
   )
 )
 JOIN route f
   ON f.rowid = flights;
┌─────────┬───────────────────┬─────────────────┐
│ flights │ departure_airport │ arrival_airport │
│  int64  │      varchar      │     varchar     │
├─────────┼───────────────────┼─────────────────┤
│       3 │ MJZ               │ CNN             │
│      58 │ UKX               │ KJA             │
│     313 │ SVO               │ MJZ             │
│     493 │ KJA               │ SVO             │
└─────────┴───────────────────┴─────────────────┘
D --Most Expensive Seats on Average
D FROM GRAPH_TABLE (
   flight_graph
   MATCH (b:bookings)-[bt:bookings_tickets]->(t:tickets)-[bp:boarding_passes]->(s:seats)
 )
 SELECT round(avg(total_amount), 2) avg_amount, seat_no
 GROUP BY seat_no
 ORDER BY avg_amount DESC;
┌────────────┬─────────┐
│ avg_amount │ seat_no │
│   double   │ varchar │
├────────────┼─────────┤
│  165002.34 │ 5H      │
│  162638.52 │ 3H      │
│  161094.62 │ 1H      │
│       ·    │ ·       │
│       ·    │ ·       │
│       ·    │ ·       │
│    83132.0 │ 26J     │
├────────────┴─────────┤
│ 461 rows   2 columns │
│ (40 shown)           │
└──────────────────────┘
D ATTACH './finbench.duckdb' AS finbench;
D use finbench;
D CREATE OR REPLACE PROPERTY GRAPH finbench
 VERTEX TABLES (
   Account, Company, Loan,
   Medium, Person
 )
 EDGE TABLES (
   AccountRepayLoan        SOURCE KEY (accountId) REFERENCES Account (accountId)
                           DESTINATION KEY (loanId) REFERENCES Loan (loanId)
                           LABEL repay,
   AccountTransferAccount  SOURCE KEY (fromId) REFERENCES Account (accountId)
                           DESTINATION KEY (toId) REFERENCES Account (AccountId)
                           LABEL transfer,
   AccountWithdrawAccount  SOURCE KEY (fromId) REFERENCES Account (accountId)
                           DESTINATION KEY (toId) REFERENCES Account (AccountId)
                           LABEL withdraw,
   CompanyApplyLoan        SOURCE KEY (companyId) REFERENCES Company (companyId)
                           DESTINATION KEY (loanId) REFERENCES Loan (loanId)
                           LABEL companyApply,
   CompanyGuaranteeCompany SOURCE KEY (fromId) REFERENCES Company (companyId)
                           DESTINATION KEY (toId) REFERENCES Company (companyId)
                           LABEL companyGuarantee,
   CompanyInvestCompany    SOURCE KEY (investorId) REFERENCES Company (companyId)
                           DESTINATION KEY (companyId) REFERENCES Company (companyId)
                           LABEL companyInvest,
   CompanyOwnAccount       SOURCE KEY (companyId) REFERENCES Company (companyId)
                           DESTINATION KEY (accountId) REFERENCES Account (accountId)
                           LABEL companyOwn,
   LoanDepositAccount      SOURCE KEY (loanId) REFERENCES Loan (loanId)
                           DESTINATION KEY (accountId) REFERENCES Account (accountId)
                           LABEL deposit,
   MediumSignInAccount     SOURCE KEY (mediumId) REFERENCES Medium (mediumId)
                           DESTINATION KEY (accountId) REFERENCES Account (accountId)
                           LABEL signIn,
   PersonApplyLoan         SOURCE KEY (personId) REFERENCES Person (personId)
                           DESTINATION KEY (loanId) REFERENCES Loan (loanId)
                           LABEL personApply,
   PersonGuaranteePerson   SOURCE KEY (fromId) REFERENCES Person (personId)
                           DESTINATION KEY (toId) REFERENCES Person (personId)
                           LABEL personGuarantee,
   PersonInvestCompany     SOURCE KEY (investorId) REFERENCES Person (personId)
                           DESTINATION KEY (companyId) REFERENCES Company (companyId)
                           LABEL personInvest,
   PersonOwnAccount        SOURCE KEY (personId) REFERENCES Person (personId)
                           DESTINATION KEY (accountId) REFERENCES Account (accountId)
                           LABEL personOwn
 );
┌─────────┐
│ Success │
│ boolean │
├─────────┤
│ 0 rows  │
└─────────┘
D --Find blocked accounts via transfers
D FROM GRAPH_TABLE (
   finbench
   MATCH (src:Account where src.accountId = 16607023625929101)
     <-[e1:transfer]-(mid:Account)
     -[e2:transfer]->(dst:Account where dst.isBlocked = true)
   COLUMNS (src.accountId as src_id, dst.accountId as dst_id)
 )
 SELECT src_id, dst_id
 WHERE src_Id <> dst_id;
┌───────────────────┬────────────────────┐
│      src_id       │       dst_id       │
│       int64       │       int64        │
├───────────────────┼────────────────────┤
│ 16607023625929101 │ 112027040730841418 │
│ 16607023625929101 │ 164662861375734142 │

│ 16607023625929101 │ 260645828434067974 │
├───────────────────┴────────────────────┤
│ 25 rows                      2 columns │
└────────────────────────────────────────┘
D --Filter high-value transfers by time
D FROM GRAPH_TABLE (
   finbench
   MATCH (src:Account)-[e:Transfer]->(dst:Account)
   WHERE '2022-07-13 09:18:33.137' < e.createtime
     AND e.createtime < '2022-09-03 02:31:47.812'
     AND e.amount > 4829783
   );
┌─────────────────────┬────────────────┬────────────────────┬───┬─────────────────┬────────────────────┬────────────┐
│      accountId      │  accountLevel  │     accoutType     │ ... │ lastLoginTime_1 │     nickname_1     │ phonenum_1 │
│        int64        │    varchar     │      varchar       │   │      int64      │      varchar       │  varchar   │
├─────────────────────┼────────────────┼────────────────────┼───┼─────────────────┼────────────────────┼────────────┤
│ 4837710424726045987 │ Platinum level │ retirement account │ ... │   1624940728902 │ Arlena Penha       │ 487-8303   │
│ 4679521487814657374 │ Silver level   │ brokerage account  │ ... │   1885911947578 │ Kurt Fernandes     │ 100-2070   │
│ 4794081803335893276 │ Silver level   │ credit card        │ ... │   1607559769318 │ Von Kwiecinski     │ 093-2605   │
│           ·         │      ·         │      ·             │ · │         ·       │      ·             │    ·       │
│           ·         │      ·         │      ·             │ · │         ·       │      ·             │    ·       │
│           ·         │      ·         │      ·             │ · │         ·       │      ·             │    ·       │
│ 4857132198119080575 │ Basic level    │ credit card        │ ... │   1659899427801 │ Fredric Nothstine  │ 323-0349   │
├─────────────────────┴────────────────┴────────────────────┴───┴─────────────────┴────────────────────┴────────────┤
│ 638 rows (40 shown)                                                                          28 columns (6 shown) │
└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
D