Python面试题集 - 数据结构与算法
目录
内置数据结构
Q1: Python列表(List)的底层实现与性能分析
详细解答:
底层实现:
- 动态数组(Dynamic Array)
- 连续内存空间
- 支持随机访问,O(1)时间复杂度
- 动态扩容机制(通常1.125倍)
性能分析:
python
# 时间复杂度表
"""
操作 平均时间 最坏时间
访问 list[i] O(1) O(1)
搜索 x in list O(n) O(n)
追加 append O(1) O(n) (扩容时)
插入 insert O(n) O(n)
删除 del/remove O(n) O(n)
切片 list[a:b] O(k) O(k) (k=b-a)
"""
# 1. 追加操作
import time
def test_append():
lst = []
start = time.time()
for i in range(1000000):
lst.append(i)
print(f"追加100万元素: {time.time() - start:.4f}秒")
test_append() # 非常快,均摊O(1)
# 2. 插入操作
def test_insert():
lst = list(range(10000))
start = time.time()
for i in range(1000):
lst.insert(0, i) # 在开头插入,最慢
print(f"开头插入1000次: {time.time() - start:.4f}秒")
test_insert() # 较慢,O(n)
# 3. 预分配空间
# 方法1:使用乘法
lst1 = [0] * 1000000 # 快速创建
# 方法2:列表推导式
lst2 = [0 for _ in range(1000000)]
# 方法3:使用*运算符(注意引用问题)
lst3 = [[]] * 3
lst3[0].append(1)
print(lst3) # [[1], [1], [1]],共享引用
# 正确方式
lst4 = [[] for _ in range(3)]
lst4[0].append(1)
print(lst4) # [[1], [], []]
列表操作最佳实践:
python
# 1. 频繁追加 - 使用append而非+
# 不好
result = []
for i in range(10000):
result = result + [i] # 每次创建新列表,O(n²)
# 好
result = []
for i in range(10000):
result.append(i) # 均摊O(1)
# 2. 批量操作 - 使用extend
# 不好
for item in items:
result.append(item)
# 好
result.extend(items) # 更高效
# 3. 删除元素 - 根据场景选择
lst = list(range(1000))
# 删除指定索引
del lst[500] # O(n)
# 删除指定值(第一次出现)
lst.remove(500) # O(n)
# 删除并返回
value = lst.pop() # O(1),末尾删除
value = lst.pop(0) # O(n),开头删除
# 4. 列表复制
original = [1, 2, [3, 4]]
# 浅拷贝
copy1 = original[:]
copy2 = original.copy()
copy3 = list(original)
# 深拷贝
import copy
deep = copy.deepcopy(original)
# 5. 列表去重
# 方法1:保持顺序
def dedupe(items):
seen = set()
result = []
for item in items:
if item not in seen:
seen.add(item)
result.append(item)
return result
# 方法2:不保持顺序
lst = [1, 2, 2, 3, 3, 4]
unique = list(set(lst))
# 方法3:保持顺序(Python 3.7+字典有序)
unique = list(dict.fromkeys(lst))
Q2: 字典(Dict)的实现原理与性能优化
详细解答:
底层实现:
- 哈希表(Hash Table)
- Python 3.6+保持插入顺序
- 开放寻址法处理冲突
- 动态扩容(容量达到2/3时)
哈希冲突处理:
python
# Python字典使用开放寻址法
# 当发生冲突时,探测下一个位置
class SimpleDict:
"""简化的字典实现,展示原理"""
def __init__(self, size=8):
self.size = size
self.keys = [None] * size
self.values = [None] * size
self.used = 0
def _hash(self, key):
return hash(key) % self.size
def _probe(self, index):
"""线性探测"""
return (index + 1) % self.size
def put(self, key, value):
if self.used >= self.size * 0.66:
self._resize()
index = self._hash(key)
while self.keys[index] is not None:
if self.keys[index] == key:
self.values[index] = value
return
index = self._probe(index)
self.keys[index] = key
self.values[index] = value
self.used += 1
def get(self, key):
index = self._hash(key)
while self.keys[index] is not None:
if self.keys[index] == key:
return self.values[index]
index = self._probe(index)
raise KeyError(key)
def _resize(self):
"""扩容"""
old_keys = self.keys
old_values = self.values
self.size *= 2
self.keys = [None] * self.size
self.values = [None] * self.size
self.used = 0
for key, value in zip(old_keys, old_values):
if key is not None:
self.put(key, value)
性能特性:
python
# 时间复杂度
"""
操作 平均时间 最坏时间
访问 dict[key] O(1) O(n)
插入 dict[key]=v O(1) O(n)
删除 del dict[key] O(1) O(n)
查找 key in dict O(1) O(n)
"""
# 性能测试
import time
# 1. 字典查找 vs 列表查找
data_dict = {i: i for i in range(10000)}
data_list = list(range(10000))
# 字典查找
start = time.time()
for _ in range(10000):
_ = 5000 in data_dict
print(f"字典查找: {time.time() - start:.6f}秒")
# 列表查找
start = time.time()
for _ in range(10000):
_ = 5000 in data_list
print(f"列表查找: {time.time() - start:.6f}秒")
# 2. 字典键的选择
# 可哈希类型:int, str, tuple(元素可哈希)
# 不可哈希类型:list, dict, set
valid_keys = {
42: "int key",
"name": "string key",
(1, 2, 3): "tuple key"
}
# 错误示例
try:
invalid = {[1, 2]: "list key"}
except TypeError as e:
print(f"错误: {e}")
字典操作最佳实践:
python
# 1. 默认值处理
from collections import defaultdict
# 方法1:get方法
count = {}
for item in items:
count[item] = count.get(item, 0) + 1
# 方法2:setdefault
count = {}
for item in items:
count.setdefault(item, 0)
count[item] += 1
# 方法3:defaultdict(推荐)
count = defaultdict(int)
for item in items:
count[item] += 1
# 2. 字典合并
dict1 = {'a': 1, 'b': 2}
dict2 = {'b': 3, 'c': 4}
# Python 3.9+
merged = dict1 | dict2 # {'a': 1, 'b': 3, 'c': 4}
# Python 3.5+
merged = {**dict1, **dict2}
# 通用方法
merged = dict1.copy()
merged.update(dict2)
# 3. 字典推导式
# 反转字典
original = {'a': 1, 'b': 2, 'c': 3}
reversed_dict = {v: k for k, v in original.items()}
# 过滤字典
large_values = {k: v for k, v in original.items() if v > 1}
# 转换值
squared = {k: v**2 for k, v in original.items()}
# 4. 有序字典(Python 3.7+默认有序)
from collections import OrderedDict
# 3.6之前需要使用OrderedDict
ordered = OrderedDict()
ordered['a'] = 1
ordered['b'] = 2
# 3.7+普通dict即可
normal_dict = {'a': 1, 'b': 2} # 保持插入顺序
# 5. 字典视图
d = {'a': 1, 'b': 2, 'c': 3}
# 键视图
keys = d.keys() # dict_keys(['a', 'b', 'c'])
# 值视图
values = d.values() # dict_values([1, 2, 3])
# 项视图
items = d.items() # dict_items([('a', 1), ('b', 2), ('c', 3)])
# 视图是动态的
d['d'] = 4
print(list(keys)) # ['a', 'b', 'c', 'd']
# 6. 性能优化技巧
# 预先估计大小
large_dict = dict.fromkeys(range(10000), 0)
# 避免频繁的键检查
# 不好
if key in my_dict:
value = my_dict[key]
else:
value = default
# 好
value = my_dict.get(key, default)
# 批量更新
updates = {'a': 1, 'b': 2}
my_dict.update(updates) # 比逐个赋值快
Q3: 集合(Set)的应用与性能
详细解答:
底层实现:
- 基于哈希表实现
- 元素必须可哈希
- 无序(Python 3.7+有迭代顺序)
- 自动去重
基础操作:
python
# 创建集合
set1 = {1, 2, 3, 4, 5}
set2 = set([1, 2, 3, 3, 4]) # {1, 2, 3, 4}
# 空集合
empty_set = set() # 不能用{},那是空字典
# 集合操作
a = {1, 2, 3, 4}
b = {3, 4, 5, 6}
# 并集
union = a | b # {1, 2, 3, 4, 5, 6}
union = a.union(b)
# 交集
intersection = a & b # {3, 4}
intersection = a.intersection(b)
# 差集
difference = a - b # {1, 2}
difference = a.difference(b)
# 对称差集
symmetric_diff = a ^ b # {1, 2, 5, 6}
symmetric_diff = a.symmetric_difference(b)
# 子集判断
is_subset = {1, 2} <= a # True
is_subset = {1, 2}.issubset(a)
# 超集判断
is_superset = a >= {1, 2} # True
is_superset = a.issuperset({1, 2})
实际应用场景:
python
# 1. 去重
numbers = [1, 2, 2, 3, 3, 4, 5, 5]
unique = list(set(numbers))
# 保持顺序的去重
def ordered_unique(sequence):
seen = set()
return [x for x in sequence if not (x in seen or seen.add(x))]
# 2. 成员测试(性能优越)
# 检查是否存在
large_list = list(range(10000))
large_set = set(range(10000))
import time
# 列表查找:O(n)
start = time.time()
for _ in range(10000):
_ = 9999 in large_list
print(f"列表: {time.time() - start:.6f}秒")
# 集合查找:O(1)
start = time.time()
for _ in range(10000):
_ = 9999 in large_set
print(f"集合: {time.time() - start:.6f}秒")
# 3. 查找共同元素
list1 = [1, 2, 3, 4, 5]
list2 = [4, 5, 6, 7, 8]
common = list(set(list1) & set(list2)) # [4, 5]
# 4. 查找差异
only_in_list1 = list(set(list1) - set(list2)) # [1, 2, 3]
# 5. 数据验证
valid_ids = {1, 2, 3, 4, 5}
input_ids = [1, 3, 6]
invalid = [id for id in input_ids if id not in valid_ids]
print(f"无效ID: {invalid}") # [6]
# 6. 图算法中的访问标记
def dfs(graph, start):
visited = set()
stack = [start]
while stack:
node = stack.pop()
if node not in visited:
visited.add(node)
stack.extend(graph[node] - visited)
return visited
# 7. 统计唯一元素
def count_unique(data):
return len(set(data))
# 8. 快速过滤
# 从大列表中过滤出指定集合的元素
filter_set = {2, 4, 6}
data = [1, 2, 3, 4, 5, 6, 7]
filtered = [x for x in data if x in filter_set] # O(n)
frozenset - 不可变集合:
python
# frozenset可以作为字典键或集合元素
frozen = frozenset([1, 2, 3])
# 作为字典键
dict_with_set_key = {
frozenset([1, 2]): "value1",
frozenset([3, 4]): "value2"
}
# 作为集合元素
set_of_sets = {
frozenset([1, 2]),
frozenset([3, 4])
}
# 不可变
try:
frozen.add(4)
except AttributeError as e:
print(f"错误: {e}")
常用算法实现
Q4: 排序算法的Python实现与比较
详细解答:
1. 快速排序(Quick Sort)
python
def quick_sort(arr):
"""快速排序 - 平均O(n log n),最坏O(n²)"""
if len(arr) <= 1:
return arr
pivot = arr[len(arr) // 2]
left = [x for x in arr if x < pivot]
middle = [x for x in arr if x == pivot]
right = [x for x in arr if x > pivot]
return quick_sort(left) + middle + quick_sort(right)
# 原地快排(更高效)
def quick_sort_inplace(arr, low=0, high=None):
if high is None:
high = len(arr) - 1
if low < high:
pivot_index = partition(arr, low, high)
quick_sort_inplace(arr, low, pivot_index - 1)
quick_sort_inplace(arr, pivot_index + 1, high)
return arr
def partition(arr, low, high):
pivot = arr[high]
i = low - 1
for j in range(low, high):
if arr[j] <= pivot:
i += 1
arr[i], arr[j] = arr[j], arr[i]
arr[i + 1], arr[high] = arr[high], arr[i + 1]
return i + 1
# 测试
print(quick_sort([3, 6, 8, 10, 1, 2, 1]))
2. 归并排序(Merge Sort)
python
def merge_sort(arr):
"""归并排序 - O(n log n),稳定"""
if len(arr) <= 1:
return arr
mid = len(arr) // 2
left = merge_sort(arr[:mid])
right = merge_sort(arr[mid:])
return merge(left, right)
def merge(left, right):
result = []
i = j = 0
while i < len(left) and j < len(right):
if left[i] <= right[j]:
result.append(left[i])
i += 1
else:
result.append(right[j])
j += 1
result.extend(left[i:])
result.extend(right[j:])
return result
print(merge_sort([38, 27, 43, 3, 9, 82, 10]))
3. 堆排序(Heap Sort)
python
def heap_sort(arr):
"""堆排序 - O(n log n),不稳定"""
n = len(arr)
# 建立最大堆
for i in range(n // 2 - 1, -1, -1):
heapify(arr, n, i)
# 逐个取出最大元素
for i in range(n - 1, 0, -1):
arr[0], arr[i] = arr[i], arr[0]
heapify(arr, i, 0)
return arr
def heapify(arr, n, i):
largest = i
left = 2 * i + 1
right = 2 * i + 2
if left < n and arr[left] > arr[largest]:
largest = left
if right < n and arr[right] > arr[largest]:
largest = right
if largest != i:
arr[i], arr[largest] = arr[largest], arr[i]
heapify(arr, n, largest)
print(heap_sort([12, 11, 13, 5, 6, 7]))
4. 计数排序(Counting Sort)
python
def counting_sort(arr):
"""计数排序 - O(n+k),适用于整数且范围不大"""
if not arr:
return arr
max_val = max(arr)
min_val = min(arr)
range_size = max_val - min_val + 1
# 计数
count = [0] * range_size
for num in arr:
count[num - min_val] += 1
# 累加
for i in range(1, len(count)):
count[i] += count[i - 1]
# 构建结果
output = [0] * len(arr)
for num in reversed(arr):
index = count[num - min_val] - 1
output[index] = num
count[num - min_val] -= 1
return output
print(counting_sort([4, 2, 2, 8, 3, 3, 1]))
5. Python内置排序
python
# sorted() - 返回新列表
arr = [3, 1, 4, 1, 5, 9, 2]
sorted_arr = sorted(arr) # [1, 1, 2, 3, 4, 5, 9]
print(arr) # [3, 1, 4, 1, 5, 9, 2],原列表不变
# list.sort() - 原地排序
arr.sort()
print(arr) # [1, 1, 2, 3, 4, 5, 9]
# 自定义排序键
people = [
('Alice', 25),
('Bob', 30),
('Charlie', 20)
]
# 按年龄排序
sorted_by_age = sorted(people, key=lambda x: x[1])
# [('Charlie', 20), ('Alice', 25), ('Bob', 30)]
# 多级排序
# 先按年龄,再按名字
sorted_multi = sorted(people, key=lambda x: (x[1], x[0]))
# 降序排序
sorted_desc = sorted(arr, reverse=True)
# 复杂对象排序
from operator import attrgetter, itemgetter
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
people = [Person('Alice', 25), Person('Bob', 30)]
sorted_people = sorted(people, key=attrgetter('age'))
# 字典列表排序
dict_list = [
{'name': 'Alice', 'age': 25},
{'name': 'Bob', 'age': 30}
]
sorted_dicts = sorted(dict_list, key=itemgetter('age'))
性能比较:
python
import time
import random
def benchmark_sort(sort_func, arr):
start = time.time()
sort_func(arr.copy())
return time.time() - start
# 生成测试数据
sizes = [100, 1000, 10000]
for size in sizes:
arr = [random.randint(0, 1000) for _ in range(size)]
print(f"\n数组大小: {size}")
print(f"Python内置: {benchmark_sort(sorted, arr):.6f}秒")
print(f"快速排序: {benchmark_sort(quick_sort, arr):.6f}秒")
print(f"归并排序: {benchmark_sort(merge_sort, arr):.6f}秒")
print(f"堆排序: {benchmark_sort(heap_sort, arr):.6f}秒")
# 排序算法选择建议:
"""
1. 通用场景:使用Python内置sorted()或list.sort()
- 基于Timsort,性能优秀
- O(n log n)时间复杂度
- 稳定排序
2. 整数且范围小:计数排序
- O(n + k)时间复杂度
- 空间换时间
3. 需要在线排序:堆排序
- 可以逐个处理元素
4. 链表排序:归并排序
- 不需要随机访问
"""
Q5: 搜索算法实现
详细解答:
1. 二分查找(Binary Search)
python
def binary_search(arr, target):
"""二分查找 - O(log n),要求数组有序"""
left, right = 0, len(arr) - 1
while left <= right:
mid = (left + right) // 2
if arr[mid] == target:
return mid
elif arr[mid] < target:
left = mid + 1
else:
right = mid - 1
return -1
# 递归版本
def binary_search_recursive(arr, target, left=0, right=None):
if right is None:
right = len(arr) - 1
if left > right:
return -1
mid = (left + right) // 2
if arr[mid] == target:
return mid
elif arr[mid] < target:
return binary_search_recursive(arr, target, mid + 1, right)
else:
return binary_search_recursive(arr, target, left, mid - 1)
# 查找插入位置
def binary_search_insert(arr, target):
"""查找target应该插入的位置"""
left, right = 0, len(arr)
while left < right:
mid = (left + right) // 2
if arr[mid] < target:
left = mid + 1
else:
right = mid
return left
# 测试
arr = [1, 3, 5, 7, 9, 11, 13]
print(binary_search(arr, 7)) # 3
print(binary_search_insert(arr, 6)) # 3
2. 广度优先搜索(BFS)
python
from collections import deque
def bfs(graph, start):
"""广度优先搜索"""
visited = set([start])
queue = deque([start])
result = []
while queue:
vertex = queue.popleft()
result.append(vertex)
for neighbor in graph[vertex]:
if neighbor not in visited:
visited.add(neighbor)
queue.append(neighbor)
return result
# 最短路径
def bfs_shortest_path(graph, start, end):
"""查找最短路径"""
if start == end:
return [start]
visited = {start}
queue = deque([(start, [start])])
while queue:
vertex, path = queue.popleft()
for neighbor in graph[vertex]:
if neighbor == end:
return path + [neighbor]
if neighbor not in visited:
visited.add(neighbor)
queue.append((neighbor, path + [neighbor]))
return None
# 测试
graph = {
'A': ['B', 'C'],
'B': ['A', 'D', 'E'],
'C': ['A', 'F'],
'D': ['B'],
'E': ['B', 'F'],
'F': ['C', 'E']
}
print(bfs(graph, 'A')) # ['A', 'B', 'C', 'D', 'E', 'F']
print(bfs_shortest_path(graph, 'A', 'F')) # ['A', 'C', 'F']
3. 深度优先搜索(DFS)
python
def dfs(graph, start, visited=None):
"""深度优先搜索 - 递归版本"""
if visited is None:
visited = set()
visited.add(start)
result = [start]
for neighbor in graph[start]:
if neighbor not in visited:
result.extend(dfs(graph, neighbor, visited))
return result
# 迭代版本
def dfs_iterative(graph, start):
"""深度优先搜索 - 迭代版本"""
visited = set()
stack = [start]
result = []
while stack:
vertex = stack.pop()
if vertex not in visited:
visited.add(vertex)
result.append(vertex)
stack.extend(reversed(graph[vertex]))
return result
# 路径查找
def dfs_find_path(graph, start, end, path=None):
"""DFS查找路径"""
if path is None:
path = []
path = path + [start]
if start == end:
return path
for neighbor in graph[start]:
if neighbor not in path:
new_path = dfs_find_path(graph, neighbor, end, path)
if new_path:
return new_path
return None
# 所有路径
def dfs_all_paths(graph, start, end, path=None):
"""查找所有路径"""
if path is None:
path = []
path = path + [start]
if start == end:
return [path]
paths = []
for neighbor in graph[start]:
if neighbor not in path:
new_paths = dfs_all_paths(graph, neighbor, end, path)
paths.extend(new_paths)
return paths
# 测试
print(dfs(graph, 'A')) # ['A', 'B', 'D', 'E', 'F', 'C']
print(dfs_find_path(graph, 'A', 'F')) # ['A', 'B', 'E', 'F']
print(dfs_all_paths(graph, 'A', 'F'))
时间复杂度与空间复杂度
Q6: 时间复杂度分析实例
详细解答:
常见时间复杂度:
python
# O(1) - 常数时间
def get_first(lst):
return lst[0] if lst else None
# O(log n) - 对数时间
def binary_search(arr, target):
left, right = 0, len(arr) - 1
while left <= right:
mid = (left + right) // 2
if arr[mid] == target:
return mid
elif arr[mid] < target:
left = mid + 1
else:
right = mid - 1
return -1
# O(n) - 线性时间
def find_max(lst):
if not lst:
return None
max_val = lst[0]
for num in lst:
if num > max_val:
max_val = num
return max_val
# O(n log n) - 线性对数时间
def merge_sort(arr):
if len(arr) <= 1:
return arr
mid = len(arr) // 2
left = merge_sort(arr[:mid])
right = merge_sort(arr[mid:])
return merge(left, right)
# O(n²) - 平方时间
def bubble_sort(arr):
n = len(arr)
for i in range(n):
for j in range(0, n - i - 1):
if arr[j] > arr[j + 1]:
arr[j], arr[j + 1] = arr[j + 1], arr[j]
return arr
# O(2ⁿ) - 指数时间
def fibonacci_recursive(n):
if n <= 1:
return n
return fibonacci_recursive(n-1) + fibonacci_recursive(n-2)
# O(n!) - 阶乘时间
def permutations(arr):
if len(arr) <= 1:
return [arr]
result = []
for i in range(len(arr)):
rest = arr[:i] + arr[i+1:]
for p in permutations(rest):
result.append([arr[i]] + p)
return result
复杂度分析技巧:
python
# 1. 嵌套循环
def example1(n):
for i in range(n): # O(n)
for j in range(n): # O(n)
print(i, j)
# 总复杂度: O(n²)
# 2. 减半循环
def example2(n):
i = n
while i > 1:
print(i)
i = i // 2
# 复杂度: O(log n)
# 3. 两个独立循环
def example3(n):
for i in range(n): # O(n)
print(i)
for j in range(n): # O(n)
print(j)
# 总复杂度: O(n) + O(n) = O(n)
# 4. 循环中的递归
def example4(n):
if n <= 1:
return 1
sum_val = 0
for i in range(n): # O(n)
sum_val += example4(n // 2) # O(log n)深度
# 复杂度: O(n log n)
# 5. 最好/平均/最坏情况
def linear_search(arr, target):
for i, val in enumerate(arr):
if val == target:
return i
return -1
# 最好: O(1) - 第一个元素
# 平均: O(n/2) = O(n)
# 最坏: O(n) - 最后一个或不存在
优化示例:
python
# 问题:检查数组是否有重复元素
# 方法1:暴力法 - O(n²)
def has_duplicates_brute(arr):
for i in range(len(arr)):
for j in range(i + 1, len(arr)):
if arr[i] == arr[j]:
return True
return False
# 方法2:排序法 - O(n log n)
def has_duplicates_sort(arr):
arr = sorted(arr)
for i in range(len(arr) - 1):
if arr[i] == arr[i + 1]:
return True
return False
# 方法3:哈希表 - O(n)
def has_duplicates_hash(arr):
seen = set()
for num in arr:
if num in seen:
return True
seen.add(num)
return False
# 性能测试
import time
import random
arr = [random.randint(0, 1000) for _ in range(1000)]
start = time.time()
has_duplicates_brute(arr)
print(f"暴力法: {time.time() - start:.6f}秒")
start = time.time()
has_duplicates_sort(arr)
print(f"排序法: {time.time() - start:.6f}秒")
start = time.time()
has_duplicates_hash(arr)
print(f"哈希法: {time.time() - start:.6f}秒") # 最快
高级数据结构
Q7: 高级数据结构实现
详细解答:
1. 堆(Heap)
python
import heapq
# Python的heapq实现最小堆
heap = []
# 插入元素
heapq.heappush(heap, 5)
heapq.heappush(heap, 3)
heapq.heappush(heap, 7)
heapq.heappush(heap, 1)
# 弹出最小元素
min_val = heapq.heappop(heap) # 1
# 从列表创建堆
nums = [3, 1, 4, 1, 5, 9, 2, 6]
heapq.heapify(nums) # 原地转换为堆
# 获取最大/最小的k个元素
largest_3 = heapq.nlargest(3, nums) # [9, 6, 5]
smallest_3 = heapq.nsmallest(3, nums) # [1, 1, 2]
# 实现最大堆(取反)
max_heap = []
for num in [3, 1, 4]:
heapq.heappush(max_heap, -num)
max_val = -heapq.heappop(max_heap) # 4
# 自定义堆元素
class Task:
def __init__(self, priority, description):
self.priority = priority
self.description = description
def __lt__(self, other):
return self.priority < other.priority
task_queue = []
heapq.heappush(task_queue, Task(3, "Low priority"))
heapq.heappush(task_queue, Task(1, "High priority"))
heapq.heappush(task_queue, Task(2, "Medium priority"))
next_task = heapq.heappop(task_queue)
print(next_task.description) # "High priority"
2. 字典树(Trie)
python
class TrieNode:
def __init__(self):
self.children = {}
self.is_end = False
class Trie:
def __init__(self):
self.root = TrieNode()
def insert(self, word):
"""插入单词 - O(m), m为单词长度"""
node = self.root
for char in word:
if char not in node.children:
node.children[char] = TrieNode()
node = node.children[char]
node.is_end = True
def search(self, word):
"""精确搜索 - O(m)"""
node = self.root
for char in word:
if char not in node.children:
return False
node = node.children[char]
return node.is_end
def starts_with(self, prefix):
"""前缀搜索 - O(m)"""
node = self.root
for char in prefix:
if char not in node.children:
return False
node = node.children[char]
return True
def get_words_with_prefix(self, prefix):
"""获取所有前缀匹配的单词"""
node = self.root
for char in prefix:
if char not in node.children:
return []
node = node.children[char]
result = []
self._dfs(node, prefix, result)
return result
def _dfs(self, node, path, result):
if node.is_end:
result.append(path)
for char, child in node.children.items():
self._dfs(child, path + char, result)
# 使用示例
trie = Trie()
words = ["apple", "app", "application", "apply", "banana"]
for word in words:
trie.insert(word)
print(trie.search("app")) # True
print(trie.search("appl")) # False
print(trie.starts_with("app")) # True
print(trie.get_words_with_prefix("app"))
# ['app', 'apple', 'application', 'apply']
3. 并查集(Union-Find)
python
class UnionFind:
def __init__(self, n):
self.parent = list(range(n))
self.rank = [0] * n
self.count = n
def find(self, x):
"""查找根节点(路径压缩)"""
if self.parent[x] != x:
self.parent[x] = self.find(self.parent[x])
return self.parent[x]
def union(self, x, y):
"""合并集合(按秩合并)"""
root_x = self.find(x)
root_y = self.find(y)
if root_x == root_y:
return False
if self.rank[root_x] < self.rank[root_y]:
self.parent[root_x] = root_y
elif self.rank[root_x] > self.rank[root_y]:
self.parent[root_y] = root_x
else:
self.parent[root_y] = root_x
self.rank[root_x] += 1
self.count -= 1
return True
def connected(self, x, y):
"""判断是否在同一集合"""
return self.find(x) == self.find(y)
# 应用:判断图中的连通分量数
edges = [(0, 1), (1, 2), (3, 4)]
n = 5
uf = UnionFind(n)
for x, y in edges:
uf.union(x, y)
print(f"连通分量数: {uf.count}") # 2
4. LRU缓存
python
from collections import OrderedDict
class LRUCache:
def __init__(self, capacity):
self.cache = OrderedDict()
self.capacity = capacity
def get(self, key):
if key not in self.cache:
return -1
# 移到末尾(最近使用)
self.cache.move_to_end(key)
return self.cache[key]
def put(self, key, value):
if key in self.cache:
self.cache.move_to_end(key)
self.cache[key] = value
if len(self.cache) > self.capacity:
# 删除最久未使用的(开头)
self.cache.popitem(last=False)
# 使用双向链表+哈希表的实现
class Node:
def __init__(self, key=0, value=0):
self.key = key
self.value = value
self.prev = None
self.next = None
class LRUCache2:
def __init__(self, capacity):
self.capacity = capacity
self.cache = {}
self.head = Node()
self.tail = Node()
self.head.next = self.tail
self.tail.prev = self.head
def _remove(self, node):
node.prev.next = node.next
node.next.prev = node.prev
def _add_to_head(self, node):
node.next = self.head.next
node.prev = self.head
self.head.next.prev = node
self.head.next = node
def get(self, key):
if key not in self.cache:
return -1
node = self.cache[key]
self._remove(node)
self._add_to_head(node)
return node.value
def put(self, key, value):
if key in self.cache:
node = self.cache[key]
node.value = value
self._remove(node)
self._add_to_head(node)
else:
node = Node(key, value)
self.cache[key] = node
self._add_to_head(node)
if len(self.cache) > self.capacity:
tail = self.tail.prev
self._remove(tail)
del self.cache[tail.key]
# 测试
cache = LRUCache(2)
cache.put(1, 1)
cache.put(2, 2)
print(cache.get(1)) # 1
cache.put(3, 3) # 移除key 2
print(cache.get(2)) # -1
算法优化技巧
Q8: 常见算法优化策略
详细解答:
1. 动态规划(避免重复计算)
python
# 斐波那契数列优化
# 递归(指数时间)- O(2ⁿ)
def fib_recursive(n):
if n <= 1:
return n
return fib_recursive(n-1) + fib_recursive(n-2)
# 记忆化(自顶向下)- O(n)
def fib_memo(n, memo=None):
if memo is None:
memo = {}
if n in memo:
return memo[n]
if n <= 1:
return n
memo[n] = fib_memo(n-1, memo) + fib_memo(n-2, memo)
return memo[n]
# 动态规划(自底向上)- O(n)
def fib_dp(n):
if n <= 1:
return n
dp = [0] * (n + 1)
dp[1] = 1
for i in range(2, n + 1):
dp[i] = dp[i-1] + dp[i-2]
return dp[n]
# 空间优化 - O(1)空间
def fib_optimized(n):
if n <= 1:
return n
prev, curr = 0, 1
for _ in range(2, n + 1):
prev, curr = curr, prev + curr
return curr
# 性能对比
import time
n = 35
print("计算fib(35):")
start = time.time()
result = fib_recursive(n)
print(f"递归: {time.time() - start:.4f}秒")
start = time.time()
result = fib_memo(n)
print(f"记忆化: {time.time() - start:.6f}秒")
start = time.time()
result = fib_dp(n)
print(f"DP: {time.time() - start:.6f}秒")
start = time.time()
result = fib_optimized(n)
print(f"优化: {time.time() - start:.6f}秒")
2. 贪心算法
python
# 零钱兑换问题
def coin_change_greedy(amount, coins):
"""贪心算法:总是选择最大面额"""
coins.sort(reverse=True)
result = []
for coin in coins:
while amount >= coin:
amount -= coin
result.append(coin)
return result if amount == 0 else None
# 注意:贪心不一定是最优解
print(coin_change_greedy(11, [1, 5, 6])) # [6, 5] - 2个
# 但最优解是 [5, 6] 或 [6, 5] - 仍是2个
# 而coins=[1, 3, 4]时,amount=6,贪心给[4, 1, 1],最优是[3, 3]
# 动态规划(最优解)
def coin_change_dp(amount, coins):
dp = [float('inf')] * (amount + 1)
dp[0] = 0
for i in range(1, amount + 1):
for coin in coins:
if i >= coin:
dp[i] = min(dp[i], dp[i - coin] + 1)
return dp[amount] if dp[amount] != float('inf') else -1
print(coin_change_dp(11, [1, 5, 6])) # 2
3. 双指针技巧
python
# 两数之和(有序数组)
def two_sum_sorted(arr, target):
"""双指针 - O(n)"""
left, right = 0, len(arr) - 1
while left < right:
current_sum = arr[left] + arr[right]
if current_sum == target:
return [left, right]
elif current_sum < target:
left += 1
else:
right -= 1
return None
# 移除重复元素(原地)
def remove_duplicates(arr):
"""快慢指针 - O(n)"""
if not arr:
return 0
slow = 0
for fast in range(1, len(arr)):
if arr[fast] != arr[slow]:
slow += 1
arr[slow] = arr[fast]
return slow + 1
# 反转字符串
def reverse_string(s):
"""双指针 - O(n)"""
s = list(s)
left, right = 0, len(s) - 1
while left < right:
s[left], s[right] = s[right], s[left]
left += 1
right -= 1
return ''.join(s)
# 滑动窗口
def max_sum_subarray(arr, k):
"""固定大小滑动窗口 - O(n)"""
if len(arr) < k:
return None
# 初始窗口
window_sum = sum(arr[:k])
max_sum = window_sum
# 滑动窗口
for i in range(k, len(arr)):
window_sum = window_sum - arr[i - k] + arr[i]
max_sum = max(max_sum, window_sum)
return max_sum
print(max_sum_subarray([1, 4, 2, 10, 23, 3, 1, 0, 20], 4)) # 39
4. 位运算优化
python
# 判断奇偶
def is_even(n):
return (n & 1) == 0 # 比 n % 2 == 0 快
# 乘以/除以2的幂
def multiply_by_power_of_2(n, power):
return n << power # n * (2 ** power)
def divide_by_power_of_2(n, power):
return n >> power # n // (2 ** power)
# 交换两数(不用临时变量)
def swap(a, b):
a = a ^ b
b = a ^ b # b = a
a = a ^ b # a = b
return a, b
# 计算汉明重量(1的个数)
def hamming_weight(n):
count = 0
while n:
count += 1
n &= n - 1 # 清除最低位的1
return count
print(hamming_weight(11)) # 3 (1011)
# 判断2的幂
def is_power_of_2(n):
return n > 0 and (n & (n - 1)) == 0
# 找出唯一的数(其他数出现两次)
def find_unique(arr):
"""异或运算 - O(n)时间,O(1)空间"""
result = 0
for num in arr:
result ^= num
return result
print(find_unique([1, 2, 3, 2, 1])) # 3
5. 剪枝优化
python
# 八皇后问题
def solve_n_queens(n):
def is_valid(board, row, col):
# 检查列
for i in range(row):
if board[i] == col:
return False
# 检查左上对角线
i, j = row - 1, col - 1
while i >= 0 and j >= 0:
if board[i] == j:
return False
i -= 1
j -= 1
# 检查右上对角线
i, j = row - 1, col + 1
while i >= 0 and j < n:
if board[i] == j:
return False
i -= 1
j += 1
return True
def backtrack(row):
if row == n:
result.append(board[:])
return
for col in range(n):
if is_valid(board, row, col):
board[row] = col
backtrack(row + 1)
board[row] = -1 # 回溯
result = []
board = [-1] * n
backtrack(0)
return result
solutions = solve_n_queens(8)
print(f"8皇后问题有{len(solutions)}个解")
6. 空间换时间
python
# 前缀和
class PrefixSum:
"""预处理:O(n),查询:O(1)"""
def __init__(self, nums):
self.prefix = [0]
for num in nums:
self.prefix.append(self.prefix[-1] + num)
def range_sum(self, left, right):
"""计算[left, right]的和"""
return self.prefix[right + 1] - self.prefix[left]
# 使用
nums = [1, 2, 3, 4, 5]
ps = PrefixSum(nums)
print(ps.range_sum(1, 3)) # 2 + 3 + 4 = 9
# 稀疏表(RMQ问题)
import math
class SparseTable:
"""预处理:O(n log n),查询:O(1)"""
def __init__(self, arr):
n = len(arr)
k = int(math.log2(n)) + 1
self.st = [[0] * k for _ in range(n)]
# 初始化:长度为1的区间
for i in range(n):
self.st[i][0] = arr[i]
# 动态规划填表
j = 1
while (1 << j) <= n:
i = 0
while i + (1 << j) - 1 < n:
self.st[i][j] = min(
self.st[i][j - 1],
self.st[i + (1 << (j - 1))][j - 1]
)
i += 1
j += 1
self.log_table = [0] * (n + 1)
for i in range(2, n + 1):
self.log_table[i] = self.log_table[i // 2] + 1
def query(self, left, right):
"""查询[left, right]的最小值"""
j = self.log_table[right - left + 1]
return min(
self.st[left][j],
self.st[right - (1 << j) + 1][j]
)
总结:
本文档全面讲解了Python数据结构与算法,包括:
-
内置数据结构
- 列表、字典、集合的底层实现
- 性能特性与优化技巧
- 实际应用场景
-
算法实现
- 排序算法(快排、归并、堆排序等)
- 搜索算法(二分查找、BFS、DFS)
- 时间空间复杂度分析
-
高级数据结构
- 堆、Trie、并查集
- LRU缓存实现
-
优化策略
- 动态规划、贪心算法
- 双指针、位运算
- 剪枝与空间换时间
掌握这些内容是成为优秀Python开发者的关键,也是通过技术面试的必备知识。