Python 列表處理的十個(gè)絕招:如何像大廠工程師一樣高效處理數(shù)據(jù)?
列表是 Python 中最常用的數(shù)據(jù)結(jié)構(gòu),但大多數(shù)程序員只用了它 30% 的功能。這篇文章會(huì)教你10個(gè)高頻、高效的列表操作技巧,這些都是大廠工程師在處理數(shù)百萬(wàn)級(jí)數(shù)據(jù)時(shí)每天都在用的。掌握它們,你的數(shù)據(jù)處理效率能提升 5-10 倍。

一、列表的本質(zhì)理解
為什么要學(xué)這 10 個(gè)絕招?
列表在 Python 中是動(dòng)態(tài)數(shù)組,每個(gè)操作都有其性能特征:
- 訪問(wèn) —— O(1),非常快
- 追加 —— O(1)(平均),快
- 插入 —— O(n),慢(需要移動(dòng)元素)
- 刪除 —— O(n),慢
- 搜索 —— O(n),線性掃描
大廠工程師的秘訣:選擇正確的操作和合適的數(shù)據(jù)結(jié)構(gòu),避免高成本的操作。
二、列表創(chuàng)建和初始化(絕招 1-2)
絕招 1:列表推導(dǎo)式 vs 循環(huán) —— 性能差 10 倍
場(chǎng)景:生成 0-99 的平方數(shù)列表
# ? 新手做法(低效,可讀性也差)
squares = []
for i in range(100):
squares.append(i ** 2)
# ? 大廠做法1:列表推導(dǎo)式(快 3 倍,更 Pythonic)
squares = [i ** 2for i in range(100)]
# ? 大廠做法2:map 函數(shù)(函數(shù)式編程風(fēng)格,用于復(fù)雜操作)
squares = list(map(lambda x: x ** 2, range(100)))
# 性能對(duì)比
import time
def test_performance():
iterations = 10000
# 方式1:循環(huán)
start = time.time()
for _ in range(iterations):
result = []
for i in range(100):
result.append(i ** 2)
time1 = time.time() - start
# 方式2:列表推導(dǎo)式
start = time.time()
for _ in range(iterations):
result = [i ** 2for i in range(100)]
time2 = time.time() - start
# 方式3:map
start = time.time()
for _ in range(iterations):
result = list(map(lambda x: x ** 2, range(100)))
time3 = time.time() - start
print(f"循環(huán):{time1:.4f}s")
print(f"列表推導(dǎo)式:{time2:.4f}s(快 {time1/time2:.1f} 倍)")
print(f"map:{time3:.4f}s")
test_performance()
# 輸出:
# 循環(huán):0.8234s
# 列表推導(dǎo)式:0.2156s(快 3.8 倍)
# map:0.3421s高級(jí)用法:嵌套推導(dǎo)式和條件過(guò)濾
# 創(chuàng)建 2D 矩陣(3x3)
matrix = [[i*3 + j for j in range(3)] for i in range(3)]
print(matrix)
# 輸出:[[0, 1, 2], [3, 4, 5], [6, 7, 8]]
# 帶條件的推導(dǎo)式:只保留偶數(shù)
numbers = list(range(20))
evens = [n for n in numbers if n % 2 == 0]
print(evens)
# 輸出:[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
# 復(fù)雜條件:多層嵌套
matrix = [
[i*3 + j for j in range(3)]
for i in range(3)
if i % 2 == 0
]
print(matrix)
# 輸出:[[0, 1, 2], [6, 7, 8]]
# 實(shí)戰(zhàn)應(yīng)用:從嵌套列表中提取數(shù)據(jù)
data = [
{"name": "張三", "scores": [85, 90, 92]},
{"name": "李四", "scores": [88, 95, 90]},
{"name": "王五", "scores": [92, 88, 95]},
]
# 提取所有分?jǐn)?shù)
all_scores = [score for student in data for score in student["scores"]]
print(all_scores)
# 輸出:[85, 90, 92, 88, 95, 90, 92, 88, 95]
# 計(jì)算每個(gè)學(xué)生的平均分
averages = [sum(s["scores"]) / len(s["scores"]) for s in data]
print(averages)
# 輸出:[89.0, 91.0, 91.66...]關(guān)鍵陷阱:不要在推導(dǎo)式中包含太復(fù)雜的邏輯
# ? 不好的做法(難以維護(hù))
result = [
complex_function(x)
for x in data
if complex_check(x) and another_check(x)
]
# ? 好的做法(清晰易維護(hù))
def process_and_filter(x):
ifnot complex_check(x) ornot another_check(x):
returnNone
return complex_function(x)
result = [process_and_filter(x) for x in data]
result = [r for r in result if r isnotNone]絕招 2:*args 解包和初始化 —— 偷懶的藝術(shù)
# 快速創(chuàng)建相同元素的列表
# ? 不好的做法
zeros = []
for i in range(1000):
zeros.append(0)
# ? 好的做法1:乘法操作符
zeros = [0] * 1000
ones = [1] * 1000
# ? 好的做法2:更優(yōu)雅
default_values = [None] * 10
# ?? 陷阱:可變對(duì)象的重復(fù)引用
# ? 錯(cuò)誤的做法(所有嵌套列表都指向同一個(gè)對(duì)象)
matrix = [[0] * 3] * 3
matrix[0][0] = 1
print(matrix)
# 輸出:[[1, 0, 0], [1, 0, 0], [1, 0, 0]](全變了!)
# ? 正確的做法(使用推導(dǎo)式創(chuàng)建獨(dú)立的對(duì)象)
matrix = [[0] * 3for _ in range(3)]
matrix[0][0] = 1
print(matrix)
# 輸出:[[1, 0, 0], [0, 0, 0], [0, 0, 0]](只改第一個(gè))
# 實(shí)戰(zhàn)應(yīng)用:初始化字典列表
users = [{"name": "", "age": 0, "score": 0} for _ in range(100)]
# 實(shí)戰(zhàn)應(yīng)用:創(chuàng)建二維數(shù)組(常見(jiàn)于矩陣運(yùn)算)
def create_matrix(rows, cols, initial_value=0):
"""創(chuàng)建一個(gè) rows x cols 的矩陣"""
return [[initial_value for _ in range(cols)] for _ in range(rows)]
matrix = create_matrix(3, 4)
print(matrix)
# 輸出:[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]三、列表查找和索引(絕招 3-4)
絕招 3:enumerate() —— 獲取索引和值
場(chǎng)景:處理數(shù)據(jù)時(shí)既需要值,又需要位置
# ? 不好的做法(低效且易出錯(cuò))
items = ["apple", "banana", "cherry"]
for i in range(len(items)):
print(f"索引 {i}:{items[i]}")
# ? 好的做法:enumerate(快且優(yōu)雅)
for i, item in enumerate(items):
print(f"索引 {i}:{item}")
# 指定起始索引
for i, item in enumerate(items, start=1):
print(f"第 {i} 項(xiàng):{item}")
# 輸出:第 1 項(xiàng):apple,第 2 項(xiàng):banana,...
# 實(shí)戰(zhàn)應(yīng)用1:給數(shù)據(jù)添加序號(hào)
def add_line_numbers(lines):
"""給每一行添加行號(hào)"""
return [f"{i+1}: {line}"for i, line in enumerate(lines)]
lines = ["import os", "import sys", "print('hello')"]
numbered_lines = add_line_numbers(lines)
print(numbered_lines)
# 輸出:['1: import os', '2: import sys', '3: print(\'hello\')']
# 實(shí)戰(zhàn)應(yīng)用2:找出所有匹配項(xiàng)的索引
def find_all_indices(lst, target):
"""找出列表中所有等于 target 的元素的索引"""
return [i for i, x in enumerate(lst) if x == target]
numbers = [1, 2, 3, 2, 4, 2, 5]
print(find_all_indices(numbers, 2))
# 輸出:[1, 3, 5]
# 實(shí)戰(zhàn)應(yīng)用3:處理文件行,保留行號(hào)信息
def parse_csv_with_line_numbers(csv_text):
"""解析 CSV,返回 (line_number, data_dict)"""
lines = csv_text.strip().split("\n")
headers = lines[0].split(",")
result = []
for line_num, line in enumerate(lines[1:], start=2):
data = dict(zip(headers, line.split(",")))
result.append((line_num, data))
return result
csv_data = """name,age,city
張三,25,北京
李四,30,上海"""
parsed = parse_csv_with_line_numbers(csv_data)
for line_num, data in parsed:
print(f"第 {line_num} 行:{data}")絕招 4:index() vs find 邏輯 —— 選擇合適的查找方式
# 列表中找元素
items = ["apple", "banana", "cherry", "banana"]
# 獲取第一個(gè)出現(xiàn)的位置
pos = items.index("banana")
print(pos) # 輸出:1
# ?? 陷阱1:找不到會(huì)報(bào)錯(cuò)
try:
pos = items.index("grape")
except ValueError as e:
print(f"錯(cuò)誤:{e}") # 輸出:錯(cuò)誤:'grape' is not in list
# ? 安全的做法1:使用異常處理
def find_safe(lst, target, default=-1):
try:
return lst.index(target)
except ValueError:
return default
print(find_safe(items, "grape")) # 輸出:-1
# ? 安全的做法2:使用 in 操作符先檢查
if"grape"in items:
pos = items.index("grape")
else:
print("未找到")
# 獲取元素最后一次出現(xiàn)的位置
last_pos = len(items) - 1 - items[::-1].index("banana")
print(last_pos) # 輸出:3
# 性能對(duì)比:查找 vs 遍歷計(jì)數(shù)
import time
large_list = list(range(1000000))
# 方式1:使用 index(O(n))
start = time.time()
for _ in range(1000):
try:
pos = large_list.index(999999)
except ValueError:
pass
time1 = time.time() - start
# 方式2:使用 in 檢查(O(n))
start = time.time()
for _ in range(1000):
if999999in large_list:
pass
time2 = time.time() - start
print(f"index 方式:{time1:.4f}s")
print(f"in 方式:{time2:.4f}s")
# 實(shí)戰(zhàn)應(yīng)用:從列表中移除特定元素(需要找到位置)
def remove_first_occurrence(lst, target):
"""移除列表中第一個(gè)出現(xiàn)的目標(biāo)元素"""
try:
lst.remove(target) # 更簡(jiǎn)潔的做法
returnTrue
except ValueError:
returnFalse
numbers = [1, 2, 3, 2, 4]
remove_first_occurrence(numbers, 2)
print(numbers) # 輸出:[1, 3, 2, 4]
# 實(shí)戰(zhàn)應(yīng)用:檢查列表中的重復(fù)元素
def find_duplicates(lst):
"""找出列表中的所有重復(fù)元素"""
seen = set()
duplicates = set()
for item in lst:
if item in seen:
duplicates.add(item)
else:
seen.add(item)
return list(duplicates)
print(find_duplicates([1, 2, 2, 3, 3, 3, 4]))
# 輸出:[2, 3]四、列表修改操作(絕招 5-6)
絕招 5:append() vs extend() vs insert() —— 選錯(cuò)性能差 100 倍
場(chǎng)景:向列表中添加元素
# 理解三個(gè)方法的區(qū)別
items = [1, 2, 3]
# append():添加單個(gè)元素(O(1))
items.append(4)
print(items) # 輸出:[1, 2, 3, 4]
# extend():添加可迭代對(duì)象中的所有元素(O(n),n 是新元素?cái)?shù))
items.extend([5, 6, 7])
print(items) # 輸出:[1, 2, 3, 4, 5, 6, 7]
# insert():在指定位置插入單個(gè)元素(O(n),需要移動(dòng)后續(xù)元素)
items.insert(0, 0) # 在最前面插入
print(items) # 輸出:[0, 1, 2, 3, 4, 5, 6, 7]
# ?? 性能陷阱:頻繁在列表前面插入
# ? 不好的做法(O(n2) 復(fù)雜度,非常慢)
result = []
for i in range(1000):
result.insert(0, i) # 每次都要移動(dòng)所有元素
# ? 好的做法1:先追加,后反轉(zhuǎn)(O(n))
result = []
for i in range(1000):
result.append(i)
result.reverse()
# ? 好的做法2:使用 deque(O(1))
from collections import deque
result = deque()
for i in range(1000):
result.appendleft(i)
result = list(result)
# ?? 性能陷阱:append 一個(gè)列表(不是 extend)
# ? 錯(cuò)誤的做法
combined = [1, 2, 3]
combined.append([4, 5, 6])
print(combined)
# 輸出:[1, 2, 3, [4, 5, 6]](嵌套了)
# ? 正確的做法
combined = [1, 2, 3]
combined.extend([4, 5, 6])
print(combined)
# 輸出:[1, 2, 3, 4, 5, 6]
# 性能對(duì)比:append vs extend vs +=
import time
data = list(range(100))
iterations = 10000
# 方式1:append 單個(gè)元素
start = time.time()
for _ in range(iterations):
result = [1, 2, 3]
result.append(4)
time1 = time.time() - start
# 方式2:extend 多個(gè)元素
start = time.time()
for _ in range(iterations):
result = [1, 2, 3]
result.extend([4, 5, 6, 7])
time2 = time.time() - start
# 方式3:使用 + 操作符
start = time.time()
for _ in range(iterations):
result = [1, 2, 3]
result = result + [4, 5, 6, 7]
time3 = time.time() - start
print(f"append:{time1:.4f}s")
print(f"extend:{time2:.4f}s")
print(f"+操作符:{time3:.4f}s(慢 {time3/time2:.1f} 倍)")
# 實(shí)戰(zhàn)應(yīng)用:構(gòu)建事件日志
class EventLog:
def __init__(self):
self.events = []
def log_event(self, event):
"""記錄單個(gè)事件"""
self.events.append(event)
def log_batch_events(self, events):
"""批量記錄事件"""
self.events.extend(events)
def get_latest(self, n):
"""獲取最后 n 個(gè)事件"""
return self.events[-n:]
log = EventLog()
log.log_event({"type": "login", "user": "zhangsan"})
log.log_batch_events([
{"type": "click", "button": "submit"},
{"type": "logout", "user": "zhangsan"}
])
print(log.get_latest(2))絕招 6:del vs remove vs pop —— 刪除操作的微妙差異
# 三種刪除方式的區(qū)別
numbers = [1, 2, 3, 4, 5]
# del:按索引刪除(O(n),需要移動(dòng)后續(xù)元素)
# 優(yōu)點(diǎn):快速,可刪除多個(gè)
# 缺點(diǎn):需要知道索引
del numbers[2]
print(numbers) # 輸出:[1, 2, 4, 5]
# remove:按值刪除第一個(gè)匹配項(xiàng)(O(n),需要查找)
# 優(yōu)點(diǎn):按值刪除,直觀
# 缺點(diǎn):如果值不存在會(huì)報(bào)錯(cuò)
numbers = [1, 2, 3, 4, 5]
numbers.remove(3)
print(numbers) # 輸出:[1, 2, 4, 5]
# pop:按索引刪除并返回值(O(n))
# 優(yōu)點(diǎn):返回刪除的值,可用于棧操作
# 缺點(diǎn):默認(rèn)刪除最后一個(gè),從前面刪除很慢
numbers = [1, 2, 3, 4, 5]
last = numbers.pop()
print(last, numbers) # 輸出:5 [1, 2, 3, 4]
first = numbers.pop(0)
print(first, numbers) # 輸出:1 [2, 3, 4]
# ?? 性能陷阱:在循環(huán)中刪除元素
# ? 不好的做法(會(huì)跳過(guò)元素)
numbers = [1, 2, 3, 4, 5, 2, 2]
for i, num in enumerate(numbers):
if num == 2:
numbers.pop(i) # 錯(cuò)誤!會(huì)跳過(guò)元素
print(numbers) # 輸出可能不符合預(yù)期
# ? 好的做法1:反向循環(huán)(從后往前)
numbers = [1, 2, 3, 4, 5, 2, 2]
for i in range(len(numbers) - 1, -1, -1):
if numbers[i] == 2:
numbers.pop(i)
print(numbers) # 輸出:[1, 3, 4, 5]
# ? 好的做法2:列表推導(dǎo)式(創(chuàng)建新列表)
numbers = [1, 2, 3, 4, 5, 2, 2]
numbers = [x for x in numbers if x != 2]
print(numbers) # 輸出:[1, 3, 4, 5]
# ? 好的做法3:使用 filter
numbers = [1, 2, 3, 4, 5, 2, 2]
numbers = list(filter(lambda x: x != 2, numbers))
print(numbers) # 輸出:[1, 3, 4, 5]
# 性能對(duì)比:刪除操作
import time
large_list = list(range(10000))
iterations = 1000
# 方式1:從末尾 pop(O(1))
start = time.time()
for _ in range(iterations):
lst = large_list.copy()
lst.pop()
time1 = time.time() - start
# 方式2:從開(kāi)頭 pop(O(n))
start = time.time()
for _ in range(iterations):
lst = large_list.copy()
lst.pop(0)
time2 = time.time() - start
# 方式3:del 指定索引
start = time.time()
for _ in range(iterations):
lst = large_list.copy()
del lst[5000]
time3 = time.time() - start
print(f"pop():{time1:.4f}s")
print(f"pop(0):{time2:.4f}s(慢 {time2/time1:.1f} 倍)")
print(f"del:{time3:.4f}s")
# 實(shí)戰(zhàn)應(yīng)用:清理無(wú)效數(shù)據(jù)
def clean_data(records, invalid_marker=None):
"""清理包含無(wú)效標(biāo)記的記錄"""
# 方式1:不推薦(在循環(huán)中刪除)
# for i in range(len(records) - 1, -1, -1):
# if records[i] == invalid_marker:
# records.pop(i)
# 方式2:推薦(創(chuàng)建新列表)
return [r for r in records if r != invalid_marker]
records = [1, None, 3, None, 5, 7, None]
clean_records = clean_data(records, None)
print(clean_records) # 輸出:[1, 3, 5, 7]五、列表排序和重排(絕招 7-8)
絕招 7:sort() vs sorted() —— 性能和使用場(chǎng)景
# sort() 原地排序(修改列表本身)vs sorted() 返回新列表
numbers = [3, 1, 4, 1, 5, 9, 2, 6]
# 方式1:sort()(O(n log n),原地排序)
numbers.sort()
print(numbers) # 輸出:[1, 1, 2, 3, 4, 5, 6, 9]
# 方式2:sorted()(O(n log n),返回新列表)
numbers = [3, 1, 4, 1, 5, 9, 2, 6]
sorted_numbers = sorted(numbers)
print(sorted_numbers) # 輸出:[1, 1, 2, 3, 4, 5, 6, 9]
print(numbers) # 原列表不變:[3, 1, 4, 1, 5, 9, 2, 6]
# 降序排列
numbers = [3, 1, 4, 1, 5, 9]
numbers.sort(reverse=True)
print(numbers) # 輸出:[9, 5, 4, 3, 1, 1]
# ? 高級(jí)用法1:按自定義鍵排序
students = [
{"name": "張三", "score": 85},
{"name": "李四", "score": 92},
{"name": "王五", "score": 78},
]
# 按分?jǐn)?shù)排序
students.sort(key=lambda x: x["score"])
print(students)
# 按分?jǐn)?shù)降序排序
students.sort(key=lambda x: x["score"], reverse=True)
print(students)
# 按姓名排序
students.sort(key=lambda x: x["name"])
print(students)
# ? 高級(jí)用法2:多級(jí)排序
students = [
{"name": "張三", "score": 85, "age": 20},
{"name": "李四", "score": 85, "age": 22},
{"name": "王五", "score": 90, "age": 19},
]
# 先按分?jǐn)?shù)降序,再按年齡升序
from operator import itemgetter
students.sort(key=itemgetter("score", "age"), reverse=True) # ? 會(huì)全部反序
# ? 正確做法:分步排序(從次要條件開(kāi)始)
students.sort(key=itemgetter("age")) # 先按年齡升序
students.sort(key=itemgetter("score"), reverse=True) # 再按分?jǐn)?shù)降序
# 輸出會(huì)按分?jǐn)?shù)降序,相同分?jǐn)?shù)按年齡升序
# ? 高級(jí)用法3:忽略大小寫(xiě)排序
words = ["Apple", "banana", "Cherry"]
words.sort()
print(words) # 輸出:['Apple', 'Cherry', 'banana'](大寫(xiě)在前)
words.sort(key=str.lower)
print(words) # 輸出:['Apple', 'banana', 'Cherry'](不區(qū)分大小寫(xiě))
# ? 高級(jí)用法4:用 Pandas 處理復(fù)雜排序
import pandas as pd
df = pd.DataFrame({
"name": ["張三", "李四", "王五"],
"score": [85, 92, 78],
"age": [20, 22, 19]
})
# 按多列排序
sorted_df = df.sort_values(by=["score", "age"], ascending=[False, True])
print(sorted_df)
# 性能對(duì)比:sort vs sorted
import time
data = list(range(100000, 0, -1))
iterations = 100
# 方式1:sort()(修改原列表)
start = time.time()
for _ in range(iterations):
lst = data.copy()
lst.sort()
time1 = time.time() - start
# 方式2:sorted()(創(chuàng)建新列表)
start = time.time()
for _ in range(iterations):
result = sorted(data)
time2 = time.time() - start
print(f"sort():{time1:.4f}s")
print(f"sorted():{time2:.4f}s")
# 通常差異不大,但 sort() 更節(jié)省內(nèi)存
# 實(shí)戰(zhàn)應(yīng)用:排行榜系統(tǒng)
class Leaderboard:
def __init__(self):
self.players = []
def add_player(self, name, score):
self.players.append({"name": name, "score": score})
def get_top_10(self):
"""獲取前 10 名"""
sorted_players = sorted(self.players,
key=lambda x: x["score"],
reverse=True)
return sorted_players[:10]
def get_rank(self, player_name):
"""獲取玩家的排名"""
sorted_players = sorted(self.players,
key=lambda x: x["score"],
reverse=True)
for rank, player in enumerate(sorted_players, 1):
if player["name"] == player_name:
return rank
returnNone
leaderboard = Leaderboard()
leaderboard.add_player("Player A", 1000)
leaderboard.add_player("Player B", 1500)
leaderboard.add_player("Player C", 1200)
print(leaderboard.get_top_10())
print(f"Player B 的排名:{leaderboard.get_rank('Player B')}")絕招 8:reverse() 和 切片反轉(zhuǎn) —— 哪個(gè)更快?
# 兩種反轉(zhuǎn)方式
numbers = [1, 2, 3, 4, 5]
# 方式1:reverse()(O(n),原地反轉(zhuǎn))
numbers.reverse()
print(numbers) # 輸出:[5, 4, 3, 2, 1]
# 方式2:切片反轉(zhuǎn)(O(n),創(chuàng)建新列表)
numbers = [1, 2, 3, 4, 5]
reversed_numbers = numbers[::-1]
print(reversed_numbers) # 輸出:[5, 4, 3, 2, 1]
# 方式3:reversed() 函數(shù)(返回迭代器)
numbers = [1, 2, 3, 4, 5]
reversed_iter = reversed(numbers)
print(list(reversed_iter)) # 輸出:[5, 4, 3, 2, 1]
# 性能對(duì)比
import time
large_list = list(range(1000000))
iterations = 1000
# 方式1:reverse()
start = time.time()
for _ in range(iterations):
lst = large_list.copy()
lst.reverse()
time1 = time.time() - start
# 方式2:切片反轉(zhuǎn)
start = time.time()
for _ in range(iterations):
result = large_list[::-1]
time2 = time.time() - start
# 方式3:reversed() 函數(shù)
start = time.time()
for _ in range(iterations):
result = list(reversed(large_list))
time3 = time.time() - start
print(f"reverse():{time1:.4f}s(最快,原地修改)")
print(f"[::-1]:{time2:.4f}s")
print(f"reversed():{time3:.4f}s")
# ?? 陷阱:reversed() 返回迭代器,不是列表
numbers = [1, 2, 3, 4, 5]
for num in reversed(numbers):
print(num) # 只能迭代一次
# 實(shí)戰(zhàn)應(yīng)用1:回文檢查
def is_palindrome(lst):
"""檢查列表是否是回文"""
return lst == lst[::-1]
print(is_palindrome([1, 2, 3, 2, 1])) # True
print(is_palindrome([1, 2, 3, 4, 5])) # False
# 實(shí)戰(zhàn)應(yīng)用2:Z 形讀取矩陣
def zigzag_traverse(matrix):
"""Z 形遍歷矩陣(交替反轉(zhuǎn)行)"""
result = []
for i, row in enumerate(matrix):
if i % 2 == 0:
result.extend(row)
else:
result.extend(row[::-1])
return result
matrix = [
[1, 2, 3],
[4, 5, 6],
[7, 8, 9]
]
print(zigzag_traverse(matrix))
# 輸出:[1, 2, 3, 6, 5, 4, 7, 8, 9]六、列表切片和復(fù)制(絕招 9)
絕招 9:切片的藝術(shù) —— 一行代碼代替 10 行循環(huán)
# 切片基礎(chǔ):list[start:stop:step]
numbers = list(range(10)) # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
# 獲取子集
print(numbers[2:5]) # [2, 3, 4](包含 start,不包含 stop)
print(numbers[:5]) # [0, 1, 2, 3, 4](從開(kāi)始到位置 5)
print(numbers[5:]) # [5, 6, 7, 8, 9](從位置 5 到結(jié)束)
# 步長(zhǎng)
print(numbers[::2]) # [0, 2, 4, 6, 8](每隔一個(gè)取一個(gè))
print(numbers[1::2]) # [1, 3, 5, 7, 9](從位置 1 開(kāi)始,每隔一個(gè))
# 負(fù)索引
print(numbers[-3:]) # [7, 8, 9](最后 3 個(gè))
print(numbers[:-3]) # [0, 1, 2, 3, 4, 5, 6](除了最后 3 個(gè))
# 反轉(zhuǎn)
print(numbers[::-1]) # [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
# ? 高級(jí)技巧1:原地修改(使用切片賦值)
numbers = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
numbers[2:5] = [20, 30, 40] # 替換位置 2-4 的元素
print(numbers)
# 輸出:[0, 1, 20, 30, 40, 5, 6, 7, 8, 9]
# 刪除元素
numbers = list(range(10))
numbers[2:5] = [] # 刪除位置 2-4 的元素
print(numbers)
# 輸出:[0, 1, 5, 6, 7, 8, 9]
# 插入元素
numbers = [1, 2, 5, 6]
numbers[2:2] = [3, 4] # 在位置 2 插入 [3, 4]
print(numbers)
# 輸出:[1, 2, 3, 4, 5, 6]
# ? 高級(jí)技巧2:旋轉(zhuǎn)數(shù)組
def rotate(lst, k):
"""將列表向右旋轉(zhuǎn) k 位"""
k = k % len(lst)
return lst[-k:] + lst[:-k]
print(rotate([1, 2, 3, 4, 5], 2))
# 輸出:[4, 5, 1, 2, 3]
# ? 高級(jí)技巧3:從列表中取出 N 個(gè)間隔的元素
def sample_every_n(lst, n):
"""每 n 個(gè)元素取一個(gè)"""
return lst[::n]
data = list(range(20))
print(sample_every_n(data, 5))
# 輸出:[0, 5, 10, 15]
# ? 高級(jí)技巧4:將列表分成 N 個(gè)等份
def chunk_list(lst, n):
"""將列表分成 n 個(gè)等份"""
return [lst[i::n] for i in range(n)]
data = list(range(10))
chunks = chunk_list(data, 3)
print(chunks)
# 輸出:[[0, 3, 6, 9], [1, 4, 7], [2, 5, 8]]
# ? 高級(jí)技巧5:滑動(dòng)窗口
def sliding_window(lst, window_size):
"""返回所有大小為 window_size 的窗口"""
return [lst[i:i+window_size] for i in range(len(lst) - window_size + 1)]
print(sliding_window([1, 2, 3, 4, 5], 3))
# 輸出:[[1, 2, 3], [2, 3, 4], [3, 4, 5]]
# 性能對(duì)比:切片 vs 循環(huán)
import time
large_list = list(range(1000000))
iterations = 1000
# 方式1:切片
start = time.time()
for _ in range(iterations):
result = large_list[10000:20000]
time1 = time.time() - start
# 方式2:循環(huán)
start = time.time()
for _ in range(iterations):
result = []
for i in range(10000, 20000):
result.append(large_list[i])
time2 = time.time() - start
print(f"切片:{time1:.4f}s")
print(f"循環(huán):{time2:.4f}s(慢 {time2/time1:.1f} 倍)")
# 實(shí)戰(zhàn)應(yīng)用1:批量處理數(shù)據(jù)
def batch_process(data, batch_size, process_func):
"""將數(shù)據(jù)分批處理"""
results = []
for i in range(0, len(data), batch_size):
batch = data[i:i+batch_size]
results.append(process_func(batch))
return results
data = list(range(100))
def sum_batch(batch):
return sum(batch)
results = batch_process(data, 10, sum_batch)
print(results[:3]) # [45, 145, 245, ...]
# 實(shí)戰(zhàn)應(yīng)用2:獲取前 N 個(gè)和后 N 個(gè)
def get_head_tail(lst, n):
"""獲取前 n 個(gè)和后 n 個(gè)元素"""
return lst[:n], lst[-n:]
data = list(range(20))
head, tail = get_head_tail(data, 3)
print(f"前 3 個(gè):{head},后 3 個(gè):{tail}")
# 輸出:前 3 個(gè):[0, 1, 2],后 3 個(gè):[17, 18, 19]七、列表復(fù)制的陷阱(絕招 10)
絕招 10:淺拷貝 vs 深拷貝 —— 一個(gè) Bug 的誕生
# 問(wèn)題:列表中的可變對(duì)象
original = [[1, 2], [3, 4]]
# ? 錯(cuò)誤做法1:直接賦值(只復(fù)制引用)
copy1 = original
copy1[0][0] = 999
print(original) # 輸出:[[999, 2], [3, 4]](原列表也被改了!)
# ? 正確做法1:淺拷貝(copy() 方法)
original = [[1, 2], [3, 4]]
copy2 = original.copy()
copy2[0][0] = 999
print(original) # 輸出:[[999, 2], [3, 4]](?? 內(nèi)層列表仍被影響)
# ? 正確做法2:淺拷貝(切片)
original = [[1, 2], [3, 4]]
copy3 = original[:]
copy3[0][0] = 999
print(original) # 輸出:[[999, 2], [3, 4]](?? 同樣的問(wèn)題)
# ? 正確做法3:深拷貝(深度復(fù)制)
import copy
original = [[1, 2], [3, 4]]
deep_copy = copy.deepcopy(original)
deep_copy[0][0] = 999
print(original) # 輸出:[[1, 2], [3, 4]](? 原列表不變)
# 理解淺拷貝 vs 深拷貝
original = {
"name": "張三",
"scores": [85, 90, 95],
"info": {"age": 25, "city": "北京"}
}
# 淺拷貝:只復(fù)制最外層
import copy
shallow = copy.copy(original)
shallow["scores"][0] = 999
print(original["scores"]) # [999, 90, 95](被改了)
shallow["info"]["city"] = "上海"
print(original["info"]["city"]) # 上海(被改了)
# 深拷貝:遞歸復(fù)制所有層
deep = copy.deepcopy(original)
deep["scores"][0] = 999
deep["info"]["city"] = "上海"
print(original["scores"]) # [85, 90, 95](不變)
print(original["info"]["city"]) # 北京(不變)
# 性能對(duì)比:copy vs deepcopy
import time
data = [[i for i in range(100)] for _ in range(100)]
# 淺拷貝
start = time.time()
for _ in range(1000):
shallow = copy.copy(data)
time1 = time.time() - start
# 深拷貝
start = time.time()
for _ in range(1000):
deep = copy.deepcopy(data)
time2 = time.time() - start
print(f"淺拷貝:{time1:.4f}s")
print(f"深拷貝:{time2:.4f}s(慢 {time2/time1:.1f} 倍)")
# ?? 特殊情況:不可變對(duì)象的列表
original = [1, 2, 3, 4, 5]
copy_list = original.copy()
copy_list[0] = 999
print(original) # 輸出:[1, 2, 3, 4, 5](? 不變,因?yàn)閿?shù)字是不可變的)
# 實(shí)戰(zhàn)應(yīng)用1:保存原始狀態(tài)
class DataProcessor:
def __init__(self, data):
self.original_data = copy.deepcopy(data)
self.processed_data = copy.deepcopy(data)
def reset(self):
"""恢復(fù)到原始狀態(tài)"""
self.processed_data = copy.deepcopy(self.original_data)
def modify(self, index, value):
"""修改數(shù)據(jù)"""
self.processed_data[index] = value
processor = DataProcessor([[1, 2], [3, 4]])
processor.modify(0, [999, 999])
print(processor.processed_data) # [[999, 999], [3, 4]]
processor.reset()
print(processor.processed_data) # [[1, 2], [3, 4]]
# 實(shí)戰(zhàn)應(yīng)用2:維持歷史記錄
class DataHistory:
def __init__(self):
self.history = []
def record_state(self, data):
"""記錄當(dāng)前狀態(tài)"""
self.history.append(copy.deepcopy(data))
def get_history(self, index):
"""獲取歷史狀態(tài)"""
return self.history[index] if0 <= index < len(self.history) elseNone
history = DataHistory()
data = [1, 2, 3]
history.record_state(data)
data[0] = 999
history.record_state(data)
print(history.get_history(0)) # [1, 2, 3](第一個(gè)狀態(tài)不變)
print(history.get_history(1)) # [999, 2, 3](第二個(gè)狀態(tài))八、綜合實(shí)戰(zhàn):完整的數(shù)據(jù)處理系統(tǒng)
實(shí)戰(zhàn)案例1:電商購(gòu)物車(chē)系統(tǒng)
class ShoppingCart:
def __init__(self):
self.items = []
def add_item(self, product_id, name, price, quantity=1):
"""添加商品"""
# 檢查是否已存在
for item in self.items:
if item["product_id"] == product_id:
item["quantity"] += quantity
return
# 新商品
self.items.append({
"product_id": product_id,
"name": name,
"price": price,
"quantity": quantity
})
def remove_item(self, product_id):
"""移除商品"""
self.items = [item for item in self.items
if item["product_id"] != product_id]
def get_total_price(self):
"""獲取總價(jià)"""
return sum(item["price"] * item["quantity"] for item in self.items)
def get_items_by_price_range(self, min_price, max_price):
"""按價(jià)格范圍篩選"""
return [item for item in self.items
if min_price <= item["price"] <= max_price]
def apply_discount(self, discount_rate):
"""應(yīng)用折扣"""
for item in self.items:
item["price"] *= (1 - discount_rate)
def get_sorted_items(self, sort_by="name"):
"""按指定字段排序"""
if sort_by == "price":
return sorted(self.items, key=lambda x: x["price"])
elif sort_by == "name":
return sorted(self.items, key=lambda x: x["name"])
elif sort_by == "quantity":
return sorted(self.items, key=lambda x: x["quantity"])
return self.items
def get_top_items(self, n=3):
"""獲取前 N 件最貴的商品"""
sorted_items = sorted(self.items,
key=lambda x: x["price"],
reverse=True)
return sorted_items[:n]
# 使用示例
cart = ShoppingCart()
cart.add_item(1, "手機(jī)", 3999, 1)
cart.add_item(2, "耳機(jī)", 599, 2)
cart.add_item(3, "充電寶", 199, 1)
print(f"總價(jià):{cart.get_total_price():.2f}") # 總價(jià):5396.00
print(f"前 2 件最貴商品:{cart.get_top_items(2)}")實(shí)戰(zhàn)案例2:學(xué)生成績(jī)管理系統(tǒng)
class GradeManager:
def __init__(self):
self.students = []
def add_student(self, name, scores):
"""添加學(xué)生成績(jī)"""
self.students.append({
"name": name,
"scores": scores.copy() # 深拷貝,避免外部修改
})
def get_average_score(self, name):
"""獲取學(xué)生平均分"""
for student in self.students:
if student["name"] == name:
return sum(student["scores"]) / len(student["scores"])
returnNone
def get_class_average(self):
"""獲取班級(jí)平均分"""
all_scores = [score for student in self.students
for score in student["scores"]]
return sum(all_scores) / len(all_scores) if all_scores else0
def get_students_above_average(self):
"""獲取所有高于班級(jí)平均分的學(xué)生"""
class_avg = self.get_class_average()
return [student for student in self.students
if self.get_average_score(student["name"]) > class_avg]
def get_ranked_students(self):
"""獲取按平均分排序的學(xué)生"""
students_with_avg = [
(student["name"], self.get_average_score(student["name"]))
for student in self.students
]
return sorted(students_with_avg, key=lambda x: x[1], reverse=True)
def get_pass_rate(self, pass_score=60):
"""獲取及格率"""
total_scores = sum(len(student["scores"]) for student in self.students)
pass_scores = sum(
1for student in self.students
for score in student["scores"]
if score >= pass_score
)
return pass_scores / total_scores if total_scores > 0else0
def remove_outliers(self, name, threshold=2):
"""移除某學(xué)生異常成績(jī)(超過(guò) threshold 倍標(biāo)準(zhǔn)差)"""
for student in self.students:
if student["name"] == name:
scores = student["scores"]
mean = sum(scores) / len(scores)
std_dev = (sum((x - mean) ** 2for x in scores) / len(scores)) ** 0.5
# 保留在平均值 ± threshold*std_dev 范圍內(nèi)的成績(jī)
student["scores"] = [
s for s in scores
if abs(s - mean) <= threshold * std_dev
]
break
# 使用示例
manager = GradeManager()
manager.add_student("張三", [85, 90, 92])
manager.add_student("李四", [88, 95, 90])
manager.add_student("王五", [70, 75, 80])
print(f"班級(jí)平均分:{manager.get_class_average():.2f}")
print(f"排名:{manager.get_ranked_students()}")
print(f"及格率:{manager.get_pass_rate():.2%}")九、速查表
序號(hào) | 絕招 | 復(fù)雜度 | 常用度 | 關(guān)鍵點(diǎn) |
1 | 列表推導(dǎo)式 | O(n) | ????? | 比循環(huán)快 3 倍 |
2 | 初始化 (*args) | O(n) | ???? | 避免可變對(duì)象重復(fù) |
3 | enumerate() | O(n) | ????? | 同時(shí)獲取索引和值 |
4 | index() 查找 | O(n) | ???? | 需要異常處理 |
5 | append/extend | O(1)/O(n) | ????? | append 單個(gè),extend 多個(gè) |
6 | del/remove/pop | O(n) | ???? | 從末尾刪除最快 |
7 | sort() / sorted() | O(n log n) | ????? | 支持自定義鍵排序 |
8 | reverse() | O(n) | ???? | 原地反轉(zhuǎn)最快 |
9 | 切片 | O(k) | ????? | 靈活且高效 |
10 | deepcopy | O(n) | ??? | 處理嵌套結(jié)構(gòu) |
十、優(yōu)秀實(shí)踐建議
? 做這些事:
- 使用列表推導(dǎo)式 —— 簡(jiǎn)潔、快速、Pythonic
- append() 追加 —— 永遠(yuǎn)不要在循環(huán)中用 + 拼接列表
- 從后往前刪除 —— 避免索引混亂
- 預(yù)先分配空間 —— 用 * 初始化,減少 append 開(kāi)銷(xiāo)
- 使用切片 —— 代替循環(huán)獲取子集
- sort() 原地排序 —— 需要排序時(shí)首選
- 深拷貝嵌套結(jié)構(gòu) —— 避免數(shù)據(jù)污染
- enumerate() 獲取索引 —— 比 range(len()) 更優(yōu)雅
- 選擇合適的數(shù)據(jù)結(jié)構(gòu) —— 頻繁從前面刪除用 deque
- 理解復(fù)雜度 —— 避免 O(n2) 的常見(jiàn)錯(cuò)誤
? 不要做這些事:
- 在循環(huán)中用 + 拼接列表(O(n2))
- 在循環(huán)中刪除元素(容易跳過(guò))
- 假設(shè)淺拷貝能完全復(fù)制(內(nèi)層對(duì)象仍共享)
- 用 sort() 按多個(gè)條件排序(需要分步排序)
- 在列表前面頻繁插入(O(n),改用 deque)
- 忽視列表初始化中的可變對(duì)象陷阱
- 不處理 index() 的異常
- 過(guò)度優(yōu)化(除非有性能問(wèn)題)
- 混淆 append 和 extend
- 在不需要排序的時(shí)候排序
十一、常見(jiàn)面試題
題目1:找出列表中第二大的數(shù)
def second_largest(lst):
"""找第二大的數(shù)"""
# 方式1:排序(O(n log n))
return sorted(set(lst), reverse=True)[1]
# 方式2:兩次遍歷(O(n),更高效)
max1 = max2 = float('-inf')
for num in lst:
if num > max1:
max2 = max1
max1 = num
elif num > max2:
max2 = num
return max2
print(second_largest([3, 1, 4, 1, 5, 9, 2, 6])) # 輸出:6題目2:檢查列表中是否有重復(fù)
def has_duplicate(lst):
"""檢查是否有重復(fù)元素"""
# 方式1:使用 set(O(n))
return len(lst) != len(set(lst))
# 方式2:使用 seen 集合(O(n))
seen = set()
for item in lst:
if item in seen:
returnTrue
seen.add(item)
returnFalse
print(has_duplicate([1, 2, 3, 2])) # True題目3:旋轉(zhuǎn)數(shù)組
def rotate_array(lst, k):
"""將數(shù)組向右旋轉(zhuǎn) k 步"""
k = k % len(lst)
return lst[-k:] + lst[:-k]
print(rotate_array([1, 2, 3, 4, 5], 2)) # [4, 5, 1, 2, 3]題目4:兩個(gè)數(shù)組的交集
def array_intersection(arr1, arr2):
"""求兩個(gè)數(shù)組的交集"""
# 方式1:使用集合
return list(set(arr1) & set(arr2))
# 方式2:使用列表推導(dǎo)式
set2 = set(arr2)
return [x for x in arr1 if x in set2]
print(array_intersection([1, 2, 2, 1], [2, 2])) # [2]十二、性能優(yōu)化總結(jié)
場(chǎng)景1:大規(guī)模拼接
# ? 不好(O(n2))
result = []
for item in items:
result = result + [item]
# ? 好(O(n))
result = []
for item in items:
result.append(item)
# ? 最好(O(n))
result = [item for item in items]場(chǎng)景2:刪除多個(gè)元素
# ? 不好(容易出錯(cuò))
for i in range(len(lst)):
if lst[i] == target:
lst.pop(i)
# ? 好(清晰高效)
lst = [x for x in lst if x != target]場(chǎng)景3:復(fù)制大型嵌套結(jié)構(gòu)
# ? 不好(淺拷貝,內(nèi)層對(duì)象共享)
copy = original.copy()
# ? 好(深拷貝,完全獨(dú)立)
import copy
deep_copy = copy.deepcopy(original)十三、總結(jié)與下一步
這 10 個(gè)絕招涵蓋了 Python 列表操作的 95% 的場(chǎng)景。關(guān)鍵要點(diǎn):
- 推導(dǎo)式 vs 循環(huán) —— 總是選擇推導(dǎo)式
- append vs extend —— 按需選擇
- 排序和搜索 —— 理解復(fù)雜度
- 切片的力量 —— 一行代碼做復(fù)雜操作































