मैं एक GAN नेटवर्क बनाता हूं जो आकार के आउटपुट की भविष्यवाणी करता है (40,40,6) आकार के दो इनपुट बनाते हैं [(40,40,4),(20,20,6)]

मॉडल वास्तव में काम कर रहा है और पहले से ही परिणाम देता है लेकिन मुझे "केवल" 60 और 70% (एनवीडिया-एसएमआई द्वारा प्रदर्शित) के बीच एक जीपीयू उपयोग मिलता है।

मेरा प्रश्न यह है कि क्या यह इस तरह के मॉडल के लिए आंतरिक है क्योंकि इसे train_on_batch की कॉल के बीच सामान करना है या यदि इस प्रक्रिया को तेज करने का कोई तरीका है?

यादृच्छिक डेटा पर एक न्यूनतम कामकाजी उदाहरण इस तरह दिखेगा:

import numpy as np
import os


import tensorflow as tf

from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import UpSampling3D
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import Add
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import concatenate
from tensorflow.keras.layers import Lambda

from tensorflow.keras.optimizers import Adam


gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)






# =============================================================================
# define the model    
# =============================================================================

def resBlock(X_in, num_of_features, kernel_size, scale):

    x = Conv2D(num_of_features, kernel_size, kernel_initializer='he_uniform', padding='same')(X_in)
    x = Activation('relu')(x)
    x = Conv2D(num_of_features, kernel_size, kernel_initializer='he_uniform', padding='same')(x)
    x = Lambda(lambda x: x * scale)(x)

    X_out = Add()([X_in,x])

    return X_out

class Generator(object):

    def __init__(self, noise_shape):

        self.noise_shape = noise_shape
        self.num_of_features = 128
        self.kernel_size = (3,3)
        self.scale = 0.1
        self.padding=8
        self.hp = int(self.padding/2) # half padding

    def generator(self):

        # get the inputs and do upsampling
        inputs_channels_A = Input((32+self.padding,32+self.padding,4),name = 'input_A')
        inputs_channels_B = Input((16+self.hp,16+self.hp,6),name = 'input_B')
        inputs_channels_B_upsampled = UpSampling3D(size = (2,2,1))(inputs_channels_B)

        # concentrate everything
        concentrated_input = concatenate([inputs_channels_A,
                                          inputs_channels_B_upsampled],
                                          axis=3,)

        # do the first convolution
        x = Conv2D(self.num_of_features,
                   self.kernel_size,
                   activation = 'relu',
                   padding = 'same',
                   kernel_initializer = 'he_normal')(concentrated_input)

        # do the resBlock iterations
        for resblock_index in range(6):
            x = resBlock(x,self.num_of_features, self.kernel_size, self.scale)

        # doing the last conv to resize it to (28,28,6)
        x = Conv2D(6, (3, 3), kernel_initializer='he_uniform', padding='same')(x)

        # last scipt connection
        output = Add()([x,inputs_channels_B_upsampled])

        # defining model
        generator_model = Model(inputs = [inputs_channels_A,inputs_channels_B], outputs = output)

        return generator_model

def discriminator_block(model, filters, kernel_size, strides):

    model = Conv2D(filters = filters, kernel_size = kernel_size, strides = strides, padding = "same")(model)
    model = BatchNormalization(momentum = 0.5)(model)
    model = LeakyReLU(alpha = 0.2)(model)

    return model

class Discriminator(object):

    def __init__(self, image_shape):

        self.image_shape = image_shape

    def discriminator(self):

        dis_input = Input(shape = (self.image_shape))

        model = Conv2D(filters = 64, kernel_size = 3, strides = 1, padding = "same")(dis_input)
        model = LeakyReLU(alpha = 0.2)(model)

        model = discriminator_block(model, 64, 3, 2)
        model = discriminator_block(model, 128, 3, 1)
        model = discriminator_block(model, 128, 3, 2)
        model = discriminator_block(model, 256, 3, 1)
        model = discriminator_block(model, 256, 3, 2)
        model = discriminator_block(model, 512, 3, 1)
        model = discriminator_block(model, 512, 3, 2)

        model = Flatten()(model)
        model = Dense(1024)(model)
        model = LeakyReLU(alpha = 0.2)(model)

        model = Dense(1)(model)
        model = Activation('sigmoid')(model) 

        discriminator_model = Model(inputs = dis_input, outputs = model)

        return discriminator_model   

def get_gan_network(discriminator, shape_list_AB, generator, optimizer, loss):

    discriminator.trainable = False

    gan_input_A = Input(shape=shape_list_AB[0])
    gan_input_B = Input(shape=shape_list_AB[1])

    x = generator([gan_input_A,gan_input_B])
    gan_output = discriminator(x)

    gan = Model(inputs=[gan_input_A,gan_input_B], outputs=[x,gan_output])
    gan.compile(loss=[loss, "binary_crossentropy"], loss_weights=[1., 1e-3], optimizer=optimizer)

    return gan


def get_optimizer():
    adam = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    return adam





# =============================================================================
# choose some parameters and compile the model
# =============================================================================

batch_size = 128
shape_input_A = (40,40,4)
shape_input_B = (20,20,6)
shape_output = (40,40,6)


generator = Generator(shape_input_B).generator() # todo shape
discriminator = Discriminator(shape_output).discriminator() # todo shape

optimizer = get_optimizer()

generator.compile(loss="mse", optimizer=optimizer)
discriminator.compile(loss="binary_crossentropy", optimizer=optimizer)

gan = get_gan_network(discriminator, [shape_input_A,shape_input_B], generator, optimizer, "mse")  




# =============================================================================
# training
# =============================================================================

def get_random_data(mod):

    # get the networks input
    if mod == 0: 
        return [np.random.rand(batch_size,40,40,4),np.random.rand(batch_size,20,20,6)]

    # get the networks output
    else: 
        return np.random.rand(batch_size,40,40,6)


# initalize empty arrays
rand_nums = np.empty(batch_size,dtype=np.int)
image_batch_lr = np.empty((batch_size,)+shape_input_B)
image_batch_hr = np.empty((batch_size,)+shape_output)
generated_images_sr = np.empty_like(image_batch_hr)
real_data_Y = np.empty(batch_size)
fake_data_Y = np.empty(batch_size)

for e in range(1, 10):

    print("epoch:",e)

    for batchindex in range(200):

        generated_images_sr[:] = generator.predict(get_random_data(0))

        real_data_Y[:] = np.ones(batch_size) - np.random.random_sample(batch_size)*0.2
        fake_data_Y[:] = np.random.random_sample(batch_size)*0.2

        discriminator.trainable = True

        d_loss_real = discriminator.train_on_batch(get_random_data(1), real_data_Y)
        d_loss_fake = discriminator.train_on_batch(generated_images_sr, fake_data_Y)
        discriminator_loss = 0.5 * np.add(d_loss_fake, d_loss_real)

        gan_Y = np.ones(batch_size) - np.random.random_sample(batch_size)*0.2
        discriminator.trainable = False
        gan_loss = gan.train_on_batch(get_random_data(0), [get_random_data(1),gan_Y])


    print("discriminator_loss : %f" % discriminator_loss)
    print("gan_loss :", gan_loss)

मैं इस कोड को अपने GTX2080 पर एक डॉकटर कंटेनर tensorflow/tensorflow:2.0.0-gpu-py3 में चलाता हूं।

1
some_name.py 22 अक्टूबर 2019, 13:34

1 उत्तर

सबसे बढ़िया उत्तर

GAN को प्रशिक्षित करने का तात्पर्य कुछ ओवरहेड है जिसे GPU पर निष्पादित नहीं किया जाएगा। आपके मामले में, real_data_Y और fake_data_Y प्राप्त करना, get_random_data() को क्रियान्वित करना और नुकसान की गणना करना GPU निष्क्रिय समय में परिणाम देगा।

आप अपने प्रोग्राम को python -mcProfile -o performance.prof xxx.py के साथ प्रोफाइल करने का प्रयास कर सकते हैं और देख सकते हैं कि क्या ऐसी बाधाएं हैं जिन्हें सुधारा जा सकता है, लेकिन 60 से 70% पहले से ही बहुत बुरा नहीं लगता है।

0
Eric Fournie 22 अक्टूबर 2019, 16:32