推荐收藏！万字长文带入快速使用 keras

这些年，有很多感悟：一个人精力是有限的，一个人视野也有有限的，你总会不经意间发现优秀人的就在身边。

看我文章的小伙伴应该经常听我说过的一句话：技术要学会交流、分享，不建议闭门造车。一个人可以走的很快、一堆人可以走的更远。 这句话是这些年经验的总结。

本文就是由我们粉丝群的小伙伴讨论、分享的总结，最终汇总成了这个《keras 快速使用手册》，如果你也喜欢学习、交流，可以文末的方式加入我们。

我们开始正题：

keras快速使用手册

python 复制代码

import keras
import tensorflow as tf
print(keras.__version__)
print(tf.__version__)

基本命令

python 复制代码

import random
import tensorflow as tf
import numpy as np
import os
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
seed_everything(2019)

定义数据的输入和输出

直接加载到内存中

python 复制代码

train_x = np.random.radnom((1000,23))
train_y = np.random.radnom((1000,1))
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(train_x, train_y, test_size=0.1, random_state=0, shuffle=True)

迭代器的形式读取数据

python 复制代码

import keras


# helper function for data visualization    
def denormalize(x):
    """Scale image to range 0..1 for correct plot"""
    x_max = np.percentile(x, 98)
    x_min = np.percentile(x, 2)    
    x = (x - x_min) / (x_max - x_min)
    x = x.clip(0, 1)
    return x
    

# classes for data loading and preprocessing
class Dataset:
    """CamVid Dataset. Read images, apply augmentation and preprocessing transformations.
    
    Args:
        images_dir (str): path to images folder
        masks_dir (str): path to segmentation masks folder
        class_values (list): values of classes to extract from segmentation mask
        augmentation (albumentations.Compose): data transfromation pipeline 
            (e.g. flip, scale, etc.)
        preprocessing (albumentations.Compose): data preprocessing 
            (e.g. noralization, shape manipulation, etc.)
    
    """

    def __init__(self, images_dir, masks_dir, classes=None, augmentation=None, preprocessing=None,):
        
        self.CLASSES = ['sky', 'building', 'pole', 'road', 'pavement', 
               'tree', 'signsymbol', 'fence', 'car', 
               'pedestrian', 'bicyclist', 'unlabelled']
        self.ids = os.listdir(images_dir)
        self.images_fps = [os.path.join(images_dir, image_id) for image_id in self.ids]
        self.masks_fps = [os.path.join(masks_dir, image_id) for image_id in self.ids]
        
        # convert str names to class values on masks
        self.class_values = [self.CLASSES.index(cls.lower()) for cls in self.CLASSES]
        
        self.augmentation = augmentation
        self.preprocessing = preprocessing
    
    def __getitem__(self, i):
        
        # read data
        image = cv2.imread(self.images_fps[i])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image,(480,480))
        mask = cv2.imread(self.masks_fps[i], 0)
        mask = cv2.resize(mask,(480,480))
        
        # extract certain classes from mask (e.g. cars)
        masks = [(mask == v) for v in self.class_values]
        mask = np.stack(masks, axis=-1).astype('float')
        
        # add background if mask is not binary
        if mask.shape[-1] != 1:
            background = 1 - mask.sum(axis=-1, keepdims=True)
            mask = np.concatenate((mask, background), axis=-1)
        
        # apply augmentations
        if self.augmentation:
            sample = self.augmentation(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
        
        # apply preprocessing
        if self.preprocessing:
            sample = self.preprocessing(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
            
        return image, mask
        
    def __len__(self):
        return len(self.ids)
    
    
class Dataloder(keras.utils.Sequence):
    """Load data from dataset and form batches
    
    Args:
        dataset: instance of Dataset class for image loading and preprocessing.
        batch_size: Integet number of images in batch.
        shuffle: Boolean, if `True` shuffle image indexes each epoch.
    """
    
    def __init__(self, dataset, batch_size=1, augment=None, shuffle=False):
        self.dataset = dataset
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indexes = np.arange(len(dataset))
        self.augment = augment

        self.on_epoch_end()

    def __getitem__(self, i):
        
        # collect batch data
        start = i * self.batch_size
        stop = (i + 1) * self.batch_size
        data = []
        for j in range(start, stop):
            if self.augment is None:
                data.append(np.array(X),np.array(y))
            else:            
                im,mask = [],[]   
                for x,y in zip(X,y):
                    augmented = self.augment(image=x, mask=y)
                    im.append(augmented['image'])
                    mask.append(augmented['mask'])
                data.append(np.array(im),np.array(mask))
        # transpose list of lists
        batch = [np.stack(samples, axis=0) for samples in zip(*data)]
        
        return batch
    
    def __len__(self):
        """Denotes the number of batches per epoch"""
        return len(self.indexes) // self.batch_size
    
    def on_epoch_end(self):
        """Callback function to shuffle indexes each epoch"""
        if self.shuffle:
            self.indexes = np.random.permutation(self.indexes)  


# Dataset for validation images
train_dataset = Dataset(x_train_dir, y_train_dir)
valid_dataset = Dataset(x_valid_dir, y_valid_dir)

# check shapes for errors
BATCH_SIZE =4
train_dataloader = Dataloder(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_dataloader = Dataloder(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)

数据增强

python 复制代码

from albumentations import (
  Compose, HorizontalFlip, CLAHE, HueSaturationValue,
  RandomBrightness, RandomContrast, RandomGamma, OneOf,
  ToFloat, ShiftScaleRotate, GridDistortion, ElasticTransform, JpegCompression, HueSaturationValue,
  RGBShift, RandomBrightness, RandomContrast, Blur, MotionBlur, MedianBlur, GaussNoise, CenterCrop,
  IAAAdditiveGaussianNoise, GaussNoise, OpticalDistortion, RandomSizedCrop
)

AUGMENTATIONS_TRAIN = Compose([
  HorizontalFlip(p=0.5),
  OneOf([
    RandomContrast(),
    RandomGamma(),
    RandomBrightness(),
  ], p=0.3),
  OneOf([
    ElasticTransform(alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
    GridDistortion(),
    OpticalDistortion(distort_limit=2, shift_limit=0.5),
  ], p=0.3),
  RandomSizedCrop(min_max_height=(sz / 2, sz), height=h, width=w, p=0.5),
  ToFloat(max_value=1)
], p=1)

AUGMENTATIONS_TEST = Compose([
  ToFloat(max_value=1)
], p=1)

train_dataloader = Dataloder(train_dataset, batch_size=BATCH_SIZE,augment=AUGMENTATIONS_TRAIN,shuffle=True)
valid_dataloader = Dataloder(valid_dataset, batch_size=BATCH_SIZE, augment=AUGMENTATIONS_TEST,shuffle=False)

定义网络模型

python 复制代码

from keras.layers import *
from keras.models import *
import warnings
warnings.filterwarnings("ignore")

def my_model(input_shape, with_dropout= True):
    m_input = Input(input_shape)
    out = Conv2D(32, 3,padding='same', activation="relu")(m_input)
    out = BatchNormalization()(out)
    out = MaxPool2D(2)(out)
    out = Conv2D(64, 3, padding='same', activation="relu")(out)
    out = Conv2D(64, 3, padding='same', activation="relu")(out)
    out = Conv2D(64, 3, padding='same', activation="relu")(out)
    out = BatchNormalization()(out)
    out = MaxPool2D(2)(out)
    out = Conv2D(64, 3, padding='same', activation="relu")(out)
    out = Conv2D(64, 3, padding='same', activation="relu")(out)
    out = Conv2D(64, 3, padding='same', activation="relu")(out)
    out = BatchNormalization()(out)
    out = MaxPool2D(2)(out)
    out = Conv2D(128, 3, padding='same', activation="relu")(out)
    out = Conv2D(128, 3, padding='same', activation="relu")(out)
    out = Conv2D(128, 3, padding='same', activation="relu")(out)
    out = BatchNormalization()(out)
    out = MaxPool2D(2)(out)
    out = Conv2D(128, 3, padding='same', activation="relu")(out)
    out = Conv2D(128, 3, padding='same', activation="relu")(out)
    out = Conv2D(128, 3, padding='same', activation="relu")(out)
    out = Flatten()(out)
    out = Dense(64, activation="relu")(out)
    if with_dropout:
        out = Dropout(0.4)(out)
    out = Dense(2, activation="linear")(out)
    model = Model(m_input, out)
    return model

model = my_model((128,128,1))
model.summary()

获得网络所有的层

获得每一层模型的名称，是否可训练，权重信息

python 复制代码

print(" model layers:",len(model.layers))
x = np.random.random((1,128,128,1))
out = model.predict(x)
print("output size:",x.shape)
for layer in model.layers:
    print(layer.name,layer.trainable)
    if len(layer.weights)>0:
        print(layer.weights[0].shape)
        break

python 复制代码

import keras.backend as K
from keras.layers import  *
import numpy as np


model=None

model.load_weights("finall_weights.h5")


def get_layers_output(model,x_input,index):
  middle_layer = K.function([model.input], [model.layers[index].output])
  middle_out=middle_layer([np.expand_dims(x_input,axis=0)])[0]
  return middle_out


############################################################################
##获得某一层的权重
layer_dict = dict([(layer.name, layer) for layer in model.layers])
print(layer_dict)
print(model.layers[-1].output)
last_weights=model.layers[-1].get_weights()[0]
last_bais=model.layers[-1].get_weights()[1]
print("last layer weights:",last_weights.shape)
print("last layer bias:",last_bais,last_bais.shape) ##[0.4908378  0.48844907]

############################################################################
##获得某一层的输出
x_train=np.random.random((1,100,100,))
first_layer_out=get_layers_output(model,x_train[0],1)
print(first_layer_out[:,1,1,:3])  ## results:[[0.07100815 0.38357598 0.        ]]
############################################################################


############################################################################
##设置模型网络层是否可训练
def make_trainable(net, val):
    net.trainable = val
    for l in net.layers:
        l.trainable = val
# make_trainable(model,False)
############################################################################

获得网络的权重参数

python 复制代码

t=model.get_layer('conv2d_14')
print(t)
for index in t.get_weights():
    print(index.shape)

冻结网络的某一层

python 复制代码

for layer in model.layers:
    if not isinstance(layer, BatchNormalization):
        layer.trainable = False

保存和加载权重参数信息

python 复制代码

model.save_weights("weights.h5")  ## v1
model.load_weights("weights.h5",by_name=True)## v2

def IoU(y_true, y_pred, eps=1e-6):
    if K.max(y_true) == 0.0:
        return IoU(1-y_true, 1-y_pred) ## empty image; calc IoU of zeros
    intersection = K.sum(y_true * y_pred, axis=[1,2,3])
    union = K.sum(y_true, axis=[1,2,3]) + K.sum(y_pred, axis=[1,2,3]) - intersection
    return K.mean( (intersection + eps) / (union + eps), axis=0)

加载模型

python 复制代码

from keras.models import load_model
model = load_model('model.h5', custom_objects={'IoU': IoU}) ## v3

model.save("path_to_my_model")

检查权重是否加载成功

python 复制代码

model = get_model()
# Train the model.
test_input = np.random.random((128, 32))
test_target = np.random.random((128, 1))
model.fit(test_input, test_target)

# Calling `save('my_model')` creates a SavedModel folder `my_model`.
model.save("my_model")

# It can be used to reconstruct the model identically.
reconstructed_model = keras.models.load_model("my_model")
# # Option 1: Load with the custom_object argument.
# reconstructed_model = keras.models.load_model(
#     "my_model", custom_objects={"CustomModel": CustomModel}
# )

# Let's check:
np.testing.assert_allclose(
    model.predict(test_input), reconstructed_model.predict(test_input)
)

自定义网络层

python 复制代码

import keras.backend as K
from keras.layers import *
from keras.models import *
import numpy as np

def sub_mean(x):
    x-=x/2
    return x

class MyLayer(Layer):
    def __init__(self, output_dim, **kw):
        self.output_dim = output_dim
        super(MyLayer, self).__init__(**kw)

    def build(self, input_shape):
        input_dim = input_shape[1]
        inital_SCALER = np.ones((input_dim,self.output_dim))
        self.SCALER = K.variable(inital_SCALER)
        self.trainable_weights = [self.SCALER]
        super(MyLayer, self).build(input_shape)

    def call(self, x, mask=None):
        x = K.dot(x,self.SCALER)
        return x

    def compute_output_shape(self, input_shape):
        return (input_shape[0],self.output_dim)



def get_submean_model():
    model = Sequential()
    model.add(Dense(5, input_dim=7))
    model.add(MyLayer(1))
    model.add(Lambda(sub_mean,output_shape=lambda input_shape:input_shape))
    model.compile(optimizer='rmsprop', loss='mse')
    return model
model = get_submean_model()
model.summary()

python 复制代码

import tensorflow as tf
from keras.layers import *
from keras.models import *

class CustomDense(Layer):
    def __init__(self, units=32):
        super(CustomDense, self).__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            shape=(self.units,), initializer="random_normal", trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b
    def get_config(self):
        return {"units": self.units}


inputs = Input((4,))
outputs = CustomDense(10)(inputs)

model = Model(inputs, outputs)
model.summary()

python 复制代码

config = model.get_config()
new_model = Model.from_config(config, custom_objects={"CustomDense": CustomDense})
new_model.summary()

复制代码

out = new_model(tf.zeros((2,4)))
out.shape

获得某一层参数

python 复制代码

layer = layers.Dense(3)
print(layer.weights)  # Empty

x=tf.zeros((2,3))
out=layer(x)
out.shape,layer.weights

python 复制代码

model = keras.Sequential()
model.add(layers.Dense(2, activation="relu", input_shape=(4,)))

model.summary()

常用的优化器

python 复制代码

from keras.optimizers import RMSprop,Adam,SGD,Adadelta
m_sgd = SGD(lr=0.01,momentum=0.9,decay=1e-3,nesterov=False)

python 复制代码

from keras.losses import *
from keras.optimizers import *
model.compile(
    optimizer=RMSprop(1e-3),
    loss={
        "priority": BinaryCrossentropy(from_logits=True),
        "department": CategoricalCrossentropy(from_logits=True),
    },
     metrics=["acc"],
    loss_weights={"priority": 1.0, "department": 0.2},
)

常用的损失函数

python 复制代码

from keras.losses import mean_squared_error,sparse_categorical_crossentropy,binary_crossentropy

from keras.optimizers import Adam,SGD

def dice_coef_loss(y_true, y_pred):
    def dice_coef(y_true, y_pred, smooth=1):
        intersection = K.sum(y_true * y_pred, axis=[1, 2, 3])
        union = K.sum(y_true, axis=[1, 2, 3]) + K.sum(y_pred, axis=[1, 2, 3])
        return K.mean((2. * intersection + smooth) / (union + smooth), axis=0)
    return 1 - dice_coef(y_true, y_pred, smooth=1)

# 自定义损失函数
def IoU(y_true, y_pred, eps=1e-6):
    if K.max(y_true) == 0.0:
        return IoU(1-y_true, 1-y_pred) ## empty image; calc IoU of zeros
    intersection = K.sum(y_true * y_pred, axis=[1,2,3])
    union = K.sum(y_true, axis=[1,2,3]) + K.sum(y_pred, axis=[1,2,3]) - intersection
    return K.mean( (intersection + eps) / (union + eps), axis=0)

model.compile(optimizer=Adam(lr=0.001),loss=dice_coef_loss, metrics=[IoU])

常用的指标验证函数

python 复制代码

"""
categorical_accuracy
accuracy
binary_accuracy
mse
mape
top_k_categorical_accuracy
"""

model.compile(loss="mse", optimizer=RMSprop(lr=0.001),metrics=['mape'])
# 自定义验证指标函数
def define_rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_true - y_pred), axis=-1))

model.compile(optimizer='rmsprop',metrics=[define_rmse],loss = define_rmse)

训练过程

python 复制代码

# train model
from keras.callbacks import *

callbacks = [
    ModelCheckpoint('./best_model.h5', save_weights_only=True, save_best_only=True, mode='min',verbose=1),
    ReduceLROnPlateau(monitor='val_loss',factor=0.6,patience=6,mode='min',verbose=1),
    EarlyStopping(monitor='val_loss', patience=30, verbose=1),
]

history = model.fit_generator(train_dataloader, steps_per_epoch=len(train_dataloader), epochs=EPOCHS,  callbacks=callbacks, 
                              validation_data=valid_dataloader, 
                              validation_steps=len(valid_dataloader),verbose=1)

## train model
  history = model.fit(x_train, y_train, batch_size=16 * 4, epochs=120, verbose=1, validation_data=(x_test, y_test),
                      callbacks=[checkpoint, early_stopping,r_lr],verbose=1)

预测阶段

python 复制代码

scores = model.evaluate_generator(val_dataloader)
scores = model.predict_generator(test_dataloader)


scores = model.evaluate(x = val_train, y = val_label,batch_size = None, verbose = 1)
pred = model.predict(np.expand_dims(image, axis=0))

python 复制代码

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
units = 32
timesteps = 10
input_dim = 5

# Define a Functional model
inputs = keras.Input((None, units))
x = layers.GlobalAveragePooling1D()(inputs)
outputs = layers.Dense(1)(x)
model = keras.Model(inputs, outputs)


class CustomRNN(layers.Layer):
    def __init__(self):
        super(CustomRNN, self).__init__()
        self.units = units
        self.projection_1 = layers.Dense(units=units, activation="tanh")
        self.projection_2 = layers.Dense(units=units, activation="tanh")
        # Our previously-defined Functional model
        self.classifier = model

    def call(self, inputs):
        outputs = []
        state = tf.zeros(shape=(inputs.shape[0], self.units))
        for t in range(inputs.shape[1]):
            x = inputs[:, t, :]
            h = self.projection_1(x)
            y = h + self.projection_2(state)
            state = y
            outputs.append(y)
        features = tf.stack(outputs, axis=1)
        return self.classifier(features)


rnn_model = CustomRNN()
out = rnn_model(tf.zeros((1, timesteps, input_dim)))
print(out.shape)

python 复制代码

(1, 1)

keras 模型与pytorch模型

二维卷积层(keras vs pytorch)

python 复制代码

import numpy as np
import torch
import torch.nn as nn
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.layers as layers

#initialize the layers respectively
torch_layer = nn.Conv2d(    
    in_channels=3,
    out_channels=32,
    kernel_size=(3, 3),
    stride=(1, 1)
)
torch_model = nn.Sequential(
              # nn.ZeroPad2d((2,3,2,3)),
              torch_layer
              )

tf_layer = layers.Conv2D(
    filters=32,
    kernel_size=(3, 3),
    strides=(1, 1),
    # padding='same',
    # use_bias=False
)

tf_model = keras.Sequential([
           # layers.ZeroPadding2D((3, 3)),
           tf_layer
           ])

#setting weights in torch layer and tf layer respectively
torch_weights = np.random.rand(32, 3, 3, 3)
torch_bias = np.random.rand(32)
tf_weights = np.transpose(torch_weights, (2, 3, 1, 0))
tf_bias = torch_bias

## 赋值固定的权重
torch_layer.weight = torch.nn.Parameter(torch.Tensor(torch_weights))
torch_layer.bias = torch.nn.Parameter(torch.Tensor(torch_bias))

tf_model(np.zeros((1,256,256,3)))
# tf_layer.kernel.assign(tf_weights)
tf_layer.weights[0].assign(tf_weights)
tf_layer.weights[1].assign(tf_bias)


tf_inputs = np.random.rand(1,  256, 256, 3)
torch_inputs =torch.Tensor(np.transpose(tf_inputs, (0, 3 ,1,2)))

with torch.no_grad():
    torch_output = torch_model(torch_inputs)
tf_output = tf_model(tf_inputs)
print(tf_output.numpy().shape, torch_output.shape)
np.allclose(tf_output.numpy() ,np.transpose(torch_output.numpy(),(0, 2, 3, 1))) #True

python 复制代码

(1, 254, 254, 32) torch.Size([1, 32, 254, 254])

True

BatchNormal层(keras vs pytorch)

python 复制代码

tf_inputs = np.random.rand(1,  12, 12, 32)
layer =layers.BatchNormalization() 
layer(tf_inputs)
weights = layer.get_weights() # gamma,beta,mean,var
for layer in weights:
    print(layer.shape)

python 复制代码

(32,)
(32,)
(32,)
(32,)

python 复制代码

torch_layer = nn.BatchNorm2d(32)
torch_layer.weight,torch_layer.bias

python 复制代码

(Parameter containing:
 tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        requires_grad=True),
 Parameter containing:
 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True))

python 复制代码

import numpy as np
import torch
import torch.nn as nn
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.layers as layers

#initialize the layers respectively
torch_layer = nn.BatchNorm2d(32,eps=1e-05, momentum=0.1)  #weight(gamma)和bias(beta)将被使用
torch_model = nn.Sequential(
              # nn.ZeroPad2d((2,3,2,3)),
              torch_layer
              )

tf_layer = layers.BatchNormalization( momentum=0.1,
    epsilon=1e-05,)

tf_model = keras.Sequential([
           # layers.ZeroPadding2D((3, 3)),
           tf_layer
           ])

#setting weights in torch layer and tf layer respectively
weights = np.random.rand(32)
bias = np.random.rand(32)
## 赋值固定的权重
torch_layer.weight = torch.nn.Parameter(torch.Tensor(weights))
torch_layer.bias = torch.nn.Parameter(torch.Tensor(bias))

tf_model(np.zeros((1,12,12,32)))
# tf_layer.kernel.assign(tf_weights)
tf_layer.weights[0].assign(weights)
tf_layer.weights[1].assign(bias)


tf_inputs = np.random.rand(1,  12, 12, 32)
torch_inputs =torch.Tensor(np.transpose(tf_inputs, (0, 3 ,1,2)))
tf_layer.weights[2].assign(tf_inputs.mean(axis=(0,1,2)))
tf_layer.weights[3].assign(tf_inputs.std(axis=(0,1,2)))

with torch.no_grad():
    torch_output = torch_model(torch_inputs)
tf_output = tf_model(tf_inputs)
print(tf_output.numpy().shape, torch_output.shape)
print(np.allclose(tf_output.numpy() ,np.transpose(torch_output.numpy(),(0, 2, 3, 1)))) #True
tf_output[0,0,0,:10],np.transpose(torch_output.numpy(),(0, 2, 3, 1))[0,0,0,:10]

python 复制代码

(1, 12, 12, 32) torch.Size([1, 32, 12, 12])
False

(<tf.Tensor: shape=(10,), dtype=float32, numpy=
 array([ 0.45506844, -0.12573612,  0.81879985,  0.08219968,  0.08907801,
         0.88896   ,  0.6723665 ,  0.9736586 , -0.1965737 ,  0.38121822],
       dtype=float32)>,
 array([ 0.53097904, -0.34610078,  0.8367056 , -0.046165  , -0.2674462 ,
         0.953462  ,  0.69444454,  1.0367548 , -0.36438996,  0.60396177],
       dtype=float32))

最大池化层(keras vs pytorch)

python 复制代码

import numpy as np
import torch
import torch.nn as nn
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.layers as layers

#prepare inputs and do inference
# torch_inputs = torch.Tensor(np.random.rand(1, 3, 256, 256))
# tf_inputs = np.transpose(torch_inputs.numpy(), (0, 2, 3, 1))
tf_inputs = np.random.rand(1,  256, 256, 3)
torch_inputs =torch.Tensor(np.transpose(tf_inputs, (0, 3 ,1,2)))


torch_layer =  nn.MaxPool2d(2)
torch_model = nn.Sequential(torch_layer)
tf_layer = layers.MaxPooling2D(pool_size=(2, 2))
tf_model = keras.Sequential([tf_layer])

with torch.no_grad():
    torch_output = torch_model(torch_inputs)
tf_output = tf_model.predict(tf_inputs)
print(tf_output.shape, torch_output.shape)
np.allclose(tf_output ,np.transpose(torch_output.numpy(),(0, 2, 3, 1))) #True

python 复制代码

(1, 128, 128, 3) torch.Size([1, 3, 128, 128])
True

全连接层输出(keras vs pytorch)

python 复制代码

import numpy as np
import torch
import torch.nn as nn
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.layers as layers

#initialize the layers respectively
torch_layer = nn.Linear(256,10)
torch_model = nn.Sequential( torch_layer)

tf_layer = layers.Dense(10,activation=None)
tf_model = keras.Sequential([ tf_layer])

#setting weights in torch layer and tf layer respectively
torch_weights = np.random.rand(10,256)
torch_bias = np.random.rand(10)

tf_weights = np.transpose(torch_weights, (1, 0))
tf_bias = torch_bias


## 赋值固定的权重
torch_layer.weight = torch.nn.Parameter(torch.Tensor(torch_weights))
torch_layer.bias = torch.nn.Parameter(torch.Tensor(torch_bias))
tf_model(np.zeros((4,256)))
tf_layer.weights[0].assign(tf_weights)
tf_layer.weights[1].assign(tf_bias)


inputs = torch.Tensor(np.random.rand(4,  256))
with torch.no_grad():
    torch_output = torch_model(inputs)
tf_output = tf_model(inputs.numpy())
print(tf_output.numpy().shape, torch_output.shape)
np.allclose(tf_output.numpy() ,torch_output.numpy()) #True

python 复制代码

(4, 10) torch.Size([4, 10])

True

Flatten vs view() (keras vs pytorch)

python 复制代码

import numpy as np
import torch
import torch.nn as nn
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
np.set_printoptions(precision=4)
torch.set_printoptions(precision=4)

#initialize the layers respectively

tf_layer = layers.Flatten()
tf_model = keras.Sequential([ tf_layer])

tf_inputs = np.random.rand(2,2,2,3)
torch_inputs =torch.Tensor(tf_inputs) #np.transpose(tf_inputs, (0, 3, 1, 2)))
print(tf_inputs.shape, torch_inputs.shape)
with torch.no_grad():
    torch_output =  torch.flatten(torch_inputs,start_dim=1)
tf_output = tf_model(tf_inputs)
print(tf_output.numpy().shape, torch_output.shape)
np.allclose(tf_output.numpy() ,torch_output.numpy()) #True

python 复制代码

(2, 2, 2, 3) torch.Size([2, 2, 2, 3])
(2, 12) torch.Size([2, 12])
True

技术交流与资料获取

技术要学会交流、分享，不建议闭门造车。一个人可以走的很快、一堆人可以走的更远。

资料干货、数据、技术交流提升，均可加交流群获取，群友已超过2000人，添加时最好的备注方式为：来源+兴趣方向，方便找到志同道合的朋友。

技术交流、代码、数据获取方式如下

方式①、微信搜索公众号：Python学习与数据挖掘，后台回复：交流

方式②、添加微信号：dkl88194，备注：交流

我们打造了《100个超强算法模型》，特点：从0到1轻松学习，原理、代码、案例应有尽有，所有的算法模型都是按照这样的节奏进行表述，所以是一套完完整整的案例库。

很多初学者是有这么一个痛点，就是案例，案例的完整性直接影响同学的兴致。因此，我整理了 100个最常见的算法模型，在你的学习路上助推一把！