1. 什么是 Py2neo? #
1.1 Py2neo 简介 #
Py2neo 是一个高级、Pythonic 的客户端库,用于连接和操作 Neo4j 图数据库。
为什么选择 Py2neo?
想象一下,你要操作 Neo4j 图数据库,有两种方式:
- 官方 neo4j 驱动:需要写 Cypher 查询语句,类似 SQL
- Py2neo:用 Python 对象的方式操作,更符合 Python 开发者的习惯
Py2neo 就像是一个"翻译官",把 Python 代码翻译成 Cypher 查询,让你用 Python 的方式操作图数据库。
Py2neo 的特点:
- Pythonic API:符合 Python 开发者习惯,代码更简洁
- 对象导向:节点、关系都是 Python 对象,操作更直观
- 快速开发:减少了学习 Cypher 的成本
- 良好抽象:对图数据库操作进行了高度封装
1.2 前置知识 #
在学习本教程之前,你需要掌握:
- Python 基础:变量、函数、类、异常处理
- Neo4j 基础:了解什么是节点、关系、属性
- 基本概念:图数据库的基本概念
1.3 Py2neo vs 官方驱动 #
| 特性 | 官方 neo4j 驱动 | Py2neo |
|---|---|---|
| API 风格 | 原生、底层、基于查询语句 | 高级、Pythonic、对象导向 |
| 学习曲线 | 较陡峭,需熟悉 Cypher | 平缓,Python 开发者友好 |
| 主要用途 | 高性能、精细化控制 | 快速开发、数据建模 |
| 代码示例 | driver.execute_query("MATCH...") |
graph.create(node) |
选择建议:
- 如果你是 Python 开发者,想快速上手,选择 Py2neo
- 如果你需要高性能和精细控制,选择 官方驱动
2. 安装和连接 #
2.1 安装 Py2neo #
# 使用 pip 安装 Py2neo
pip install py2neo验证安装:
import py2neo
print(f"Py2neo 版本:{py2neo.__version__}")2.2 连接数据库 #
from py2neo import Graph, Node, Relationship
# 创建数据库连接
graph = Graph(
"bolt://localhost:7687",
auth=("neo4j", "12345678"),
name="neo4j"
)
# 验证连接
try:
# 使用 graph.run 只用于简单的连接测试
result = graph.run("RETURN 1").data()
print("连接成功!")
except Exception as e:
print(f"连接失败:{e}")3. 核心数据模型 #
3.1 节点 (Node) #
节点是图数据库中的基本元素,用来表示实体。
from py2neo import Node
# 创建节点
# Node(标签, 属性1=值1, 属性2=值2, ...)
person = Node(
"Person",
name="周星驰",
born=1962,
job="演员"
)
print(person)
# 输出:(:Person {born: 1962, job: '演员', name: '周星驰'})
# 访问节点属性
print(f"姓名:{person['name']}")
print(f"出生年份:{person['born']}")
# 修改节点属性
person['job'] = "导演"
print(f"职业:{person['job']}")
# 获取节点的所有属性
print(f"所有属性:{dict(person)}")
# 获取节点的标签
print(f"标签:{list(person.labels)}")创建多标签节点:
from py2neo import Node
# 节点可以有多个标签
actor_director = Node(
"Person", "Actor", "Director",
name="周星驰",
born=1962
)
print(f"标签:{list(actor_director.labels)}")
# 输出:标签:['Person', 'Actor', 'Director']批量创建节点:
from py2neo import Node
# 准备数据
actors_data = [
{"name": "周星驰", "born": 1962},
{"name": "吴孟达", "born": 1952},
{"name": "朱茵", "born": 1971}
]
# 批量创建节点
actors = [Node("Actor", **data) for data in actors_data]
for actor in actors:
print(actor)3.2 关系 (Relationship) #
关系用来连接两个节点,表示它们之间的关联。
from py2neo import Node, Relationship
# 创建节点
zxc = Node("Actor", name="周星驰", born=1962)
kungfu = Node("Movie", title="功夫", released=2004)
# 创建关系
# Relationship(起始节点, 关系类型, 结束节点, 属性...)
acted_in = Relationship(zxc, "ACTED_IN", kungfu, roles=["阿星"])
print(acted_in)
# 输出:(周星驰)-[:ACTED_IN {roles: ['阿星']}]->(功夫)
# 访问关系属性
print(f"角色:{acted_in['roles']}")
# 访问关系的起始节点和结束节点
print(f"起始节点:{acted_in.start_node['name']}")
print(f"结束节点:{acted_in.end_node['title']}")
# 获取关系类型
print(f"关系类型:{type(acted_in).__name__}")创建带多个属性的关系:
from py2neo import Node, Relationship
zxc = Node("Actor", name="周星驰")
shaolin = Node("Movie", title="少林足球", released=2001)
# 关系可以有多个属性
directed = Relationship(
zxc, "DIRECTED", shaolin,
year=2001,
award="最佳导演提名"
)
print(directed)3.3 子图 (Subgraph) #
子图是节点和关系的集合,可以一次性保存到数据库。
from py2neo import Node, Relationship
# 创建节点
zxc = Node("Actor", name="周星驰", born=1962)
wmd = Node("Actor", name="吴孟达", born=1952)
kungfu = Node("Movie", title="功夫", released=2004)
# 创建关系
r1 = Relationship(zxc, "ACTED_IN", kungfu, roles=["阿星"])
r2 = Relationship(wmd, "ACTED_IN", kungfu, roles=["包租公"])
# 使用 | 运算符组合成子图
subgraph = zxc | wmd | kungfu | r1 | r2
print(f"子图包含 {len(subgraph.nodes)} 个节点")
print(f"子图包含 {len(subgraph.relationships)} 个关系")4. 数据操作 #
4.1 保存数据到数据库 #
使用 graph.create() 将节点和关系保存到数据库。
from py2neo import Graph, Node, Relationship
graph = Graph("bolt://localhost:7687", auth=("neo4j", "12345678"))
# 创建节点
zxc = Node("Actor", name="周星驰", born=1962)
kungfu = Node("Movie", title="功夫", released=2004)
# 创建关系
acted_in = Relationship(zxc, "ACTED_IN", kungfu, roles=["阿星"])
# 方法1:分别保存
graph.create(zxc)
graph.create(kungfu)
graph.create(acted_in)
# 方法2:使用子图一次性保存(推荐,更高效)
subgraph = zxc | kungfu | acted_in
graph.create(subgraph)
print("数据保存成功!")4.2 使用 merge() 避免重复 #
graph.merge() 会先检查节点是否存在,如果存在则匹配,如果不存在则创建。
from py2neo import Graph, Node, Relationship
graph = Graph("bolt://localhost:7687", auth=("neo4j", "12345678"))
# 创建节点
zxc = Node("Actor", name="周星驰", born=1962)
# 使用 merge 保存节点
# 第二个参数指定用于匹配的标签
# 第三个参数指定用于匹配的属性(唯一键)
graph.merge(zxc, "Actor", "name")
print("节点已合并(如果不存在则创建)")
# 合并关系
kungfu = Node("Movie", title="功夫", released=2004)
graph.merge(kungfu, "Movie", "title")
acted_in = Relationship(zxc, "ACTED_IN", kungfu, roles=["阿星"])
graph.merge(acted_in, "ACTED_IN", "roles")
print("关系已合并")批量合并示例:
from py2neo import Graph, Node, Relationship
graph = Graph("bolt://localhost:7687", auth=("neo4j", "12345678"))
# 准备数据
movies_data = [
{"title": "功夫", "released": 2004},
{"title": "少林足球", "released": 2001},
{"title": "大话西游", "released": 1995}
]
# 批量合并节点
for data in movies_data:
movie = Node("Movie", **data)
graph.merge(movie, "Movie", "title")
print("批量合并完成!")4.3 查询数据 #
使用 NodeMatcher 查询节点 #
NodeMatcher 是 Py2neo 提供的 Pythonic 查询接口。
from py2neo import Graph, NodeMatcher
graph = Graph("bolt://localhost:7687", auth=("neo4j", "12345678"))
# 创建节点匹配器
matcher = NodeMatcher(graph)
# 查询所有 Actor 节点
all_actors = list(matcher.match("Actor"))
print(f"共有 {len(all_actors)} 个演员")
for actor in all_actors:
print(f" {actor['name']}")
# 按属性查询单个节点
zxc = matcher.match("Actor", name="周星驰").first()
if zxc:
print(f"找到:{zxc['name']},出生于 {zxc['born']} 年")
# 条件查询
# where() 方法添加条件,_ 表示当前节点
old_actors = matcher.match("Actor").where("_.born < 1960")
for actor in old_actors:
print(f" {actor['name']} ({actor['born']})")
# 限制返回数量
top3 = matcher.match("Actor").limit(3)
for actor in top3:
print(f" {actor['name']}")
# 排序查询
sorted_actors = matcher.match("Actor").order_by("_.born")
for actor in sorted_actors:
print(f" {actor['name']} ({actor['born']})")使用 RelationshipMatcher 查询关系 #
from py2neo import Graph, NodeMatcher, RelationshipMatcher
graph = Graph("bolt://localhost:7687", auth=("neo4j", "12345678"))
# 创建匹配器
node_matcher = NodeMatcher(graph)
rel_matcher = RelationshipMatcher(graph)
# 查询所有 ACTED_IN 关系
all_acted_in = list(rel_matcher.match(r_type="ACTED_IN"))
print(f"共有 {len(all_acted_in)} 条出演关系")
# 查询特定节点的关系
zxc = node_matcher.match("Actor", name="周星驰").first()
if zxc:
# 查询周星驰的所有出演关系
zxc_movies = rel_matcher.match(nodes=[zxc], r_type="ACTED_IN")
for rel in zxc_movies:
movie = rel.end_node
print(f" {zxc['name']} 出演 《{movie['title']}》饰演 {rel['roles']}")4.4 更新数据 #
使用 graph.push() 将修改后的节点同步到数据库。
from py2neo import Graph, NodeMatcher
graph = Graph("bolt://localhost:7687", auth=("neo4j", "12345678"))
matcher = NodeMatcher(graph)
# 查找要更新的节点
zxc = matcher.match("Actor", name="周星驰").first()
if zxc:
# 修改属性
zxc['job'] = "导演"
zxc['awards'] = ["金像奖", "金马奖"]
# 推送更改到数据库
graph.push(zxc)
print("节点更新成功!")
else:
print("节点不存在")批量更新示例:
from py2neo import Graph, NodeMatcher
graph = Graph("bolt://localhost:7687", auth=("neo4j", "12345678"))
matcher = NodeMatcher(graph)
# 查找所有演员
actors = matcher.match("Actor")
# 批量更新
for actor in actors:
actor['updated'] = True
graph.push(actor)
print("批量更新完成!")4.5 删除数据 #
使用 graph.delete() 删除节点和关系。
from py2neo import Graph, NodeMatcher, RelationshipMatcher
graph = Graph("bolt://localhost:7687", auth=("neo4j", "12345678"))
node_matcher = NodeMatcher(graph)
rel_matcher = RelationshipMatcher(graph)
# 查找要删除的节点
test_node = node_matcher.match("TestNode").first()
if test_node:
# 删除节点(会同时删除相关的关系)
graph.delete(test_node)
print("节点删除成功!")
# 只删除关系,不删除节点
zxc = node_matcher.match("Actor", name="周星驰").first()
if zxc:
# 查找该节点的特定关系
rels = rel_matcher.match(nodes=[zxc], r_type="TEST_REL")
for rel in rels:
graph.separate(rel) # separate 只删除关系,不删除节点
print("关系删除成功!")清空数据库(谨慎使用):
from py2neo import Graph
graph = Graph("bolt://localhost:7687", auth=("neo4j", "12345678"))
# 删除所有节点和关系
graph.delete_all()
print("数据库已清空!")5. 路径查询 #
路径查询需要使用 Cypher,因为 Py2neo 没有提供原生的路径查询 API。
5.1 查找朋友的朋友 #
from py2neo import Graph
graph = Graph("bolt://localhost:7687", auth=("neo4j", "12345678"))
# 路径查询需要使用 graph.run()
result = graph.run("""
MATCH (me:Person {name: $name})-[:FRIEND*1..2]-(friend:Person)
WHERE me <> friend
RETURN DISTINCT friend.name AS name
""", name="张三")
print("朋友的朋友:")
for record in result:
print(f" {record['name']}")5.2 查找最短路径 #
from py2neo import Graph
graph = Graph("bolt://localhost:7687", auth=("neo4j", "12345678"))
result = graph.run("""
MATCH path = shortestPath(
(a:Actor {name: $name1})-[*]-(b:Actor {name: $name2})
)
RETURN [node IN nodes(path) | node.name] AS path,
length(path) AS depth
""", name1="周星驰", name2="朱茵")
for record in result:
print(f"路径:{' -> '.join(record['path'])}")
print(f"深度:{record['depth']}")6. 实际应用示例 #
6.1 电影知识图谱 #
from py2neo import Graph, Node, Relationship, NodeMatcher, RelationshipMatcher
class MovieKnowledgeGraph:
"""电影知识图谱"""
def __init__(self, uri, auth):
self.graph = Graph(uri, auth=auth)
self.node_matcher = NodeMatcher(self.graph)
self.rel_matcher = RelationshipMatcher(self.graph)
def add_actor(self, name, born):
"""添加演员"""
actor = Node("Actor", name=name, born=born)
self.graph.merge(actor, "Actor", "name")
return actor
def add_movie(self, title, released):
"""添加电影"""
movie = Node("Movie", title=title, released=released)
self.graph.merge(movie, "Movie", "title")
return movie
def add_acted_in(self, actor_name, movie_title, roles):
"""添加出演关系"""
# 查找演员和电影节点
actor = self.node_matcher.match("Actor", name=actor_name).first()
movie = self.node_matcher.match("Movie", title=movie_title).first()
if actor and movie:
rel = Relationship(actor, "ACTED_IN", movie, roles=roles)
self.graph.merge(rel, "ACTED_IN", "roles")
return rel
return None
def add_directed(self, director_name, movie_title):
"""添加导演关系"""
director = self.node_matcher.match("Actor", name=director_name).first()
movie = self.node_matcher.match("Movie", title=movie_title).first()
if director and movie:
rel = Relationship(director, "DIRECTED", movie)
self.graph.merge(rel, "DIRECTED")
return rel
return None
def get_actor(self, name):
"""获取演员信息"""
return self.node_matcher.match("Actor", name=name).first()
def get_movie(self, title):
"""获取电影信息"""
return self.node_matcher.match("Movie", title=title).first()
def get_movies_by_actor(self, actor_name):
"""获取演员出演的电影"""
actor = self.get_actor(actor_name)
if not actor:
return []
movies = []
rels = self.rel_matcher.match(nodes=[actor], r_type="ACTED_IN")
for rel in rels:
movie = rel.end_node
movies.append({
"title": movie["title"],
"released": movie["released"],
"roles": rel["roles"]
})
return movies
def get_actors_in_movie(self, movie_title):
"""获取电影中的演员"""
movie = self.get_movie(movie_title)
if not movie:
return []
actors = []
rels = self.rel_matcher.match(nodes=[movie], r_type="ACTED_IN")
for rel in rels:
actor = rel.start_node
actors.append({
"name": actor["name"],
"born": actor["born"],
"roles": rel["roles"]
})
return actors
# 使用示例
if __name__ == "__main__":
kg = MovieKnowledgeGraph(
uri="bolt://localhost:7687",
auth=("neo4j", "12345678")
)
# 添加演员
kg.add_actor("周星驰", 1962)
kg.add_actor("吴孟达", 1952)
kg.add_actor("朱茵", 1971)
kg.add_actor("赵薇", 1976)
# 添加电影
kg.add_movie("功夫", 2004)
kg.add_movie("少林足球", 2001)
kg.add_movie("大话西游", 1995)
# 添加出演关系
kg.add_acted_in("周星驰", "功夫", ["阿星"])
kg.add_acted_in("周星驰", "少林足球", ["五师兄"])
kg.add_acted_in("周星驰", "大话西游", ["至尊宝"])
kg.add_acted_in("吴孟达", "少林足球", ["黄金右脚"])
kg.add_acted_in("朱茵", "大话西游", ["紫霞仙子"])
kg.add_acted_in("赵薇", "少林足球", ["阿梅"])
# 添加导演关系
kg.add_directed("周星驰", "功夫")
kg.add_directed("周星驰", "少林足球")
# 查询周星驰的电影
print("周星驰出演的电影:")
movies = kg.get_movies_by_actor("周星驰")
for movie in movies:
print(f" 《{movie['title']}》({movie['released']}) - 饰演 {movie['roles']}")
# 查询少林足球的演员
print("\n《少林足球》的演员:")
actors = kg.get_actors_in_movie("少林足球")
for actor in actors:
print(f" {actor['name']} - 饰演 {actor['roles']}")6.2 社交网络应用 #
from py2neo import Graph, Node, Relationship, NodeMatcher, RelationshipMatcher
from datetime import datetime
class SocialNetwork:
"""社交网络应用"""
def __init__(self, uri, auth):
self.graph = Graph(uri, auth=auth)
self.node_matcher = NodeMatcher(self.graph)
self.rel_matcher = RelationshipMatcher(self.graph)
def add_user(self, user_id, name, age=None, city=None):
"""添加用户"""
user = Node("User", id=user_id, name=name, age=age, city=city)
self.graph.merge(user, "User", "id")
return user
def add_friendship(self, user1_id, user2_id):
"""添加朋友关系(双向)"""
user1 = self.node_matcher.match("User", id=user1_id).first()
user2 = self.node_matcher.match("User", id=user2_id).first()
if user1 and user2:
# 创建双向关系
rel = Relationship(user1, "FRIEND", user2, since=datetime.now().year)
self.graph.merge(rel, "FRIEND")
return rel
return None
def get_user(self, user_id):
"""获取用户"""
return self.node_matcher.match("User", id=user_id).first()
def get_friends(self, user_id):
"""获取用户的朋友"""
user = self.get_user(user_id)
if not user:
return []
friends = []
# 查找所有 FRIEND 关系
rels = self.rel_matcher.match(nodes=[user], r_type="FRIEND")
for rel in rels:
# 获取关系的另一端
friend = rel.end_node if rel.start_node == user else rel.start_node
friends.append({
"id": friend["id"],
"name": friend["name"],
"since": rel["since"]
})
return friends
def recommend_friends(self, user_id, limit=5):
"""推荐朋友(朋友的朋友)- 这个需要用 Cypher"""
result = self.graph.run("""
MATCH (user:User {id: $user_id})-[:FRIEND]-(friend)-[:FRIEND]-(potential:User)
WHERE user <> potential
AND NOT (user)-[:FRIEND]-(potential)
WITH potential, COUNT(DISTINCT friend) AS common_friends
RETURN potential.id AS id, potential.name AS name, common_friends
ORDER BY common_friends DESC
LIMIT $limit
""", user_id=user_id, limit=limit)
return [dict(record) for record in result]
# 使用示例
if __name__ == "__main__":
social = SocialNetwork(
uri="bolt://localhost:7687",
auth=("neo4j", "12345678")
)
# 添加用户
social.add_user("u1", "张三", 25, "北京")
social.add_user("u2", "李四", 30, "上海")
social.add_user("u3", "王五", 28, "广州")
social.add_user("u4", "赵六", 32, "深圳")
social.add_user("u5", "钱七", 26, "杭州")
# 添加朋友关系
social.add_friendship("u1", "u2") # 张三 <-> 李四
social.add_friendship("u1", "u3") # 张三 <-> 王五
social.add_friendship("u2", "u4") # 李四 <-> 赵六
social.add_friendship("u3", "u4") # 王五 <-> 赵六
social.add_friendship("u4", "u5") # 赵六 <-> 钱七
# 获取张三的朋友
print("张三的朋友:")
friends = social.get_friends("u1")
for friend in friends:
print(f" {friend['name']} (从 {friend['since']} 年开始)")
# 为张三推荐朋友
print("\n为张三推荐朋友:")
recommendations = social.recommend_friends("u1")
for rec in recommendations:
print(f" {rec['name']} (共同朋友数:{rec['common_friends']})")7. 事务处理 #
from py2neo import Graph, Node
graph = Graph("bolt://localhost:7687", auth=("neo4j", "12345678"))
# 开始事务
tx = graph.begin()
try:
# 在事务中批量创建节点
for i in range(100):
node = Node("TestUser", id=i, name=f"User_{i}")
tx.create(node)
# 提交事务
graph.commit(tx)
print("批量创建成功!")
except Exception as e:
# 回滚事务
graph.rollback(tx)
print(f"创建失败:{e}")8. 最佳实践 #
8.1 何时使用 graph.run() #
虽然我们推荐使用 Py2neo 的原生 API,但以下情况可以使用 graph.run():
- 路径查询:Py2neo 没有原生的路径查询 API
- 复杂聚合查询:如 COUNT、SUM、AVG 等
- 批量操作优化:使用 UNWIND 批量处理大量数据
- 高级 Cypher 功能:如 APOC 插件、图算法等
8.2 API 选择指南 #
| 操作 | 推荐方式 | 备注 |
|---|---|---|
| 创建单个节点 | graph.create(node) |
Pythonic |
| 创建多个节点 | graph.create(subgraph) |
子图一次性创建 |
| 避免重复创建 | graph.merge(node, label, key) |
推荐使用 |
| 查询节点 | NodeMatcher |
Pythonic 查询 |
| 查询关系 | RelationshipMatcher |
Pythonic 查询 |
| 更新节点 | 修改属性 + graph.push(node) |
先改后推 |
| 删除节点 | graph.delete(node) |
同时删除关系 |
| 路径查询 | graph.run() |
需要 Cypher |
| 批量操作 | graph.run() + UNWIND |
性能更好 |
9. 小结 #
9.1 核心概念 #
| 概念 | 说明 | 用法 |
|---|---|---|
| Graph | 数据库连接 | Graph(uri, auth=...) |
| Node | 节点对象 | Node("Label", prop=value) |
| Relationship | 关系对象 | Relationship(a, "TYPE", b) |
| NodeMatcher | 节点查询器 | matcher.match("Label") |
| RelationshipMatcher | 关系查询器 | matcher.match(r_type="TYPE") |
9.2 关键要点 #
- 优先使用 Py2neo 原生 API:Node、Relationship、NodeMatcher 等
- 使用 merge() 避免重复:比 create() 更安全
- 使用 push() 同步更改:修改节点后要推送到数据库
- 路径查询用 Cypher:Py2neo 没有原生支持
- 批量操作用事务:提高性能和数据一致性