在Python中编写AI程序通常涉及多个第三方库,如NumPy、Pandas、Scikit-learn、TensorFlow、PyTorch、Transformers等。由于每个库都提供了成千上万的函数、类和方法,不可能真正"穷举"所有语句。不过,我们可以按照AI开发的一般流程,分类列举其中最常用、最核心的语句示例,帮助你快速了解AI编程中常见的代码片段。
1. 导入常用库
python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# 机器学习
from sklearn import datasets, model_selection, preprocessing, metrics, linear_model, ensemble, neural_network
# 深度学习框架
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers, losses, metrics as tf_metrics
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
# 自然语言处理
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification, Trainer, TrainingArguments
2. 数据加载与预处理
python
# 加载内置数据集(sklearn)
iris = datasets.load_iris()
X, y = iris.data, iris.target
# 读取外部数据(pandas)
df = pd.read_csv('data.csv')
df.head()
# 划分训练/测试集
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2, random_state=42)
# 标准化
scaler = preprocessing.StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 图像数据加载(torchvision)
from torchvision import datasets, transforms
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# 文本数据分词(transformers)
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
encoded = tokenizer("Hello world!", padding=True, truncation=True, return_tensors='pt')
3. 模型定义
Scikit-learn 模型
python
model = linear_model.LogisticRegression()
model = ensemble.RandomForestClassifier(n_estimators=100)
model = neural_network.MLPClassifier(hidden_layer_sizes=(100, 50))
TensorFlow/Keras 模型
python
# Sequential API
model = keras.Sequential([
layers.Dense(128, activation='relu', input_shape=(784,)),
layers.Dropout(0.2),
layers.Dense(10, activation='softmax')
])
# Functional API
inputs = keras.Input(shape=(784,))
x = layers.Dense(128, activation='relu')(inputs)
outputs = layers.Dense(10, activation='softmax')(x)
model = keras.Model(inputs=inputs, outputs=outputs)
# 编译
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
PyTorch 模型
python
class MyModel(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(784, 128)
self.fc2 = nn.Linear(128, 10)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.2)
def forward(self, x):
x = x.view(x.size(0), -1)
x = self.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
model = MyModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
4. 损失函数与优化器
python
# TensorFlow
loss_fn = losses.CategoricalCrossentropy()
optimizer = optimizers.Adam(learning_rate=0.001)
# PyTorch
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
5. 训练循环
Scikit-learn
python
model.fit(X_train, y_train)
TensorFlow
python
history = model.fit(X_train, y_train, batch_size=32, epochs=10, validation_split=0.2)
PyTorch(手动循环)
python
for epoch in range(num_epochs):
model.train()
for inputs, labels in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')
6. 评估与验证
Scikit-learn
python
y_pred = model.predict(X_test)
accuracy = metrics.accuracy_score(y_test, y_pred)
TensorFlow
python
test_loss, test_acc = model.evaluate(X_test, y_test)
PyTorch
python
model.eval()
with torch.no_grad():
correct = 0
total = 0
for inputs, labels in test_loader:
outputs = model(inputs)
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = correct / total
7. 模型保存与加载
python
# TensorFlow
model.save('model.h5')
model = keras.models.load_model('model.h5')
# PyTorch
torch.save(model.state_dict(), 'model.pth')
model.load_state_dict(torch.load('model.pth'))
# Scikit-learn
import joblib
joblib.dump(model, 'model.pkl')
model = joblib.load('model.pkl')
8. 推理与预测
python
# 一般形式
y_pred = model.predict(X_new)
# Hugging Face pipeline
from transformers import pipeline
classifier = pipeline('sentiment-analysis')
result = classifier("I love AI!")
9. 可视化与辅助工具
python
# 绘图
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='val')
plt.legend()
plt.show()
# 混淆矩阵
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
cm = confusion_matrix(y_test, y_pred)
ConfusionMatrixDisplay(cm).plot()
# 数据探查
df.describe()
df.info()
sns.pairplot(df, hue='species')
10. 其他常用库/语句
python
# 图像处理(OpenCV)
import cv2
img = cv2.imread('image.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 自然语言处理(NLTK)
import nltk
nltk.download('punkt')
tokens = nltk.word_tokenize("Hello world!")
# 强化学习(Gym)
import gym
env = gym.make('CartPole-v1')
obs = env.reset()
for _ in range(1000):
action = env.action_space.sample()
obs, reward, done, info = env.step(action)
if done:
obs = env.reset()
总结
以上列举了Python AI开发中最常用的语句,覆盖了数据准备、模型构建、训练、评估、推理等关键环节。由于AI领域涉及的库极其丰富,实际开发时可能还会用到更多特定的函数或方法,例如:
- 自定义层/损失函数
- 回调函数(TensorFlow Callbacks)
- 数据增强(Albumentations)
- 分布式训练(torch.distributed)
- 超参数调优(Optuna、Ray Tune)
如果你需要针对某个具体库或任务(如计算机视觉、NLP)的更详细语句列表,请进一步说明,我可以为你展开。