run part 2

hiankun · Nov 5, 2019 · 101d3fc · 101d3fc
1 parent 750dab3
commit 101d3fc
Showing 2 changed files with 353 additions and 253 deletions.
diff --git a/ch17/ch17_part2.ipynb b/ch17/ch17_part2.ipynb
diff --git a/ch17/ch17_part2.py b/ch17/ch17_part2.py
@@ -15,8 +15,7 @@
 # 
 # Code License: [MIT License](https://github.com/rasbt/python-machine-learning-book-3rd-edition/blob/master/LICENSE.txt)
 
-# Chapter 17: Generative Adversarial Networks (part 2/2)
-# =====
+# # Chapter 17: Generative Adversarial Networks (Part 2/2)
 
 # Note that the optional watermark extension is a small IPython notebook plugin that I developed to make the code reproducible. You can just skip the following line(s).
 
@@ -70,15 +69,19 @@
 
 
 
-#import tensorflow as tf
-#print("GPU Available: ", tf.test.is_gpu_available())
-#device_name = tf.test.gpu_device_name()
-#device_name
 
 
+print(tf.__version__)
 
+print("GPU Available:", tf.test.is_gpu_available())
 
+if tf.test.is_gpu_available():
+    device_name = tf.test.gpu_device_name()
 
+else:
+    device_name = 'CPU:0'
+
+print(device_name)
 
 
 
@@ -87,11 +90,16 @@
 
 
 
-def make_dcgan_generator(z_size=20, output_size=(28, 28, 1),
-                         n_filters=128, n_blocks=2):
+def make_dcgan_generator(
+        z_size=20, 
+        output_size=(28, 28, 1),
+        n_filters=128, 
+        n_blocks=2):
     size_factor = 2**n_blocks
-    hidden_size = (output_size[0]//size_factor, 
-                   output_size[1]//size_factor)
+    hidden_size = (
+        output_size[0]//size_factor, 
+        output_size[1]//size_factor
+    )
 
     model = tf.keras.Sequential([
         tf.keras.layers.Input(shape=(z_size,)),
@@ -114,21 +122,25 @@ def make_dcgan_generator(z_size=20, output_size=(28, 28, 1),
     nf = n_filters
     for i in range(n_blocks):
         nf = nf // 2
-        model.add(tf.keras.layers.Conv2DTranspose(
-            filters=nf, kernel_size=(5, 5), strides=(2, 2),
-            padding='same', use_bias=False))
+        model.add(
+            tf.keras.layers.Conv2DTranspose(
+                filters=nf, kernel_size=(5, 5), strides=(2, 2),
+                padding='same', use_bias=False))
         model.add(tf.keras.layers.BatchNormalization())
         model.add(tf.keras.layers.LeakyReLU())
 
-
-    model.add(tf.keras.layers.Conv2DTranspose(
-        filters=output_size[2], kernel_size=(5, 5), strides=(1, 1), 
-        padding='same', use_bias=False, activation='tanh'))
+    model.add(
+        tf.keras.layers.Conv2DTranspose(
+            filters=output_size[2], kernel_size=(5, 5), 
+            strides=(1, 1), padding='same', use_bias=False, 
+            activation='tanh'))
 
     return model
 
-def make_dcgan_discriminator(input_size=(28, 28, 1),
-                             n_filters=64, n_blocks=2):
+def make_dcgan_discriminator(
+        input_size=(28, 28, 1),
+        n_filters=64, 
+        n_blocks=2):
     model = tf.keras.Sequential([
         tf.keras.layers.Input(shape=input_size),
         tf.keras.layers.Conv2D(
@@ -141,9 +153,10 @@ def make_dcgan_discriminator(input_size=(28, 28, 1),
     nf = n_filters
     for i in range(n_blocks):
         nf = nf*2
-        model.add(tf.keras.layers.Conv2D(
-            filters=nf, kernel_size=(5, 5), 
-            strides=(2, 2),padding='same'))
+        model.add(
+            tf.keras.layers.Conv2D(
+                filters=nf, kernel_size=(5, 5), 
+                strides=(2, 2),padding='same'))
         model.add(tf.keras.layers.BatchNormalization())
         model.add(tf.keras.layers.LeakyReLU())
         model.add(tf.keras.layers.Dropout(0.3))
@@ -183,7 +196,6 @@ def make_dcgan_discriminator(input_size=(28, 28, 1),
 
 
 
-
 mnist_bldr = tfds.builder('mnist')
 mnist_bldr.download_and_prepare()
 mnist = mnist_bldr.as_dataset(shuffle_files=False)
@@ -194,48 +206,28 @@ def preprocess(ex, mode='uniform'):
 
     image = image*2 - 1.0
     if mode == 'uniform':
-      input_z = tf.random.uniform(shape=(z_size,),
-                                  minval=-1.0, maxval=1.0)
+        input_z = tf.random.uniform(
+            shape=(z_size,), minval=-1.0, maxval=1.0)
     elif mode == 'normal':
-      input_z = tf.random.normal(shape=(z_size,))
+        input_z = tf.random.normal(shape=(z_size,))
     return input_z, image
 
 
 
 
 num_epochs = 100
-batch_size = 64
+batch_size = 128
 image_size = (28, 28)
 z_size = 20
 mode_z = 'uniform'
-gen_hidden_layers = 1
-gen_hidden_size = 100
-disc_hidden_layers = 1
-disc_hidden_size = 100
+lambda_gp = 10.0
 
 tf.random.set_seed(1)
 np.random.seed(1)
 
-
-if mode_z == 'uniform':
-    fixed_z = tf.random.uniform(
-        shape=(batch_size, z_size),
-        minval=-1, maxval=1)
-elif mode_z == 'normal':
-    fixed_z = tf.random.normal(
-        shape=(batch_size, z_size))
-
-def create_samples(g_model, input_z):
-    g_output = g_model(input_z, training=False)
-    images = tf.reshape(g_output, (batch_size, *image_size))    
-    return (images+1)/2.0
-
 ## Set-up the dataset
 mnist_trainset = mnist['train']
-mnist_trainset = mnist_trainset.map(
-    lambda ex: preprocess(ex, mode=mode_z))
-
-input_z, input_real = next(iter(mnist_trainset))
+mnist_trainset = mnist_trainset.map(preprocess)
 
 mnist_trainset = mnist_trainset.shuffle(10000)
 mnist_trainset = mnist_trainset.batch(
@@ -245,84 +237,100 @@ def create_samples(g_model, input_z):
 with tf.device(device_name):
     gen_model = make_dcgan_generator()
     gen_model.build(input_shape=(None, z_size))
+    gen_model.summary()
 
     disc_model = make_dcgan_discriminator()
     disc_model.build(input_shape=(None, np.prod(image_size)))
+    disc_model.summary()
+
+
+
+
+
 
-## Loss function and optimizers:
-loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
-g_optimizer = tf.keras.optimizers.Adam()
-d_optimizer = tf.keras.optimizers.Adam()
+## optimizers:
+g_optimizer = tf.keras.optimizers.Adam(0.0002)
+d_optimizer = tf.keras.optimizers.Adam(0.0002)
+
+if mode_z == 'uniform':
+    fixed_z = tf.random.uniform(
+        shape=(batch_size, z_size),
+        minval=-1, maxval=1)
+elif mode_z == 'normal':
+    fixed_z = tf.random.normal(
+        shape=(batch_size, z_size))
 
-avg_epoch_losses = []
-avg_d_vals = []
+def create_samples(g_model, input_z):
+    g_output = g_model(input_z, training=False)
+    images = tf.reshape(g_output, (batch_size, *image_size))    
+    return (images+1)/2.0
+
+all_losses = []
 epoch_samples = []
 
 start_time = time.time()
+
 for epoch in range(1, num_epochs+1):
-    losses = []
+    epoch_losses = []
     for i,(input_z,input_real) in enumerate(mnist_trainset):
 
-        ## Compute discriminator's real-loss and its gradients:
-        with tf.GradientTape() as d_tape_real:
-            d_logits_real = disc_model(input_real, training=True)
-
-            d_labels_real = tf.ones_like(d_logits_real)# * smoothing_factor
+        ## Compute discriminator's loss and gradients:
+        with tf.GradientTape() as d_tape, tf.GradientTape() as g_tape:
+            g_output = gen_model(input_z, training=True)
+
+            d_critics_real = disc_model(input_real, training=True)
+            d_critics_fake = disc_model(g_output, training=True)
+
+            ## Compute generator's loss:
+            g_loss = -tf.math.reduce_mean(d_critics_fake)
+
+            ## Compute discriminator's losses
+            d_loss_real = -tf.math.reduce_mean(d_critics_real)
+            d_loss_fake =  tf.math.reduce_mean(d_critics_fake)
+            d_loss = d_loss_real + d_loss_fake
+
+            ## Gradient penalty:
+            with tf.GradientTape() as gp_tape:
+                alpha = tf.random.uniform(
+                    shape=[d_critics_real.shape[0], 1, 1, 1], 
+                    minval=0.0, maxval=1.0)
+                interpolated = (
+                    alpha*input_real + (1-alpha)*g_output)
+                gp_tape.watch(interpolated)
+                d_critics_intp = disc_model(interpolated)
 
-            d_loss_real = loss_fn(y_true=d_labels_real,
-                                  y_pred=d_logits_real)
-        d_grads_real = d_tape_real.gradient(
-               d_loss_real, disc_model.trainable_variables)
-        ## Optimization: Apply the gradients
+            grads_intp = gp_tape.gradient(
+                d_critics_intp, [interpolated,])[0]
+            grads_intp_l2 = tf.sqrt(
+                tf.reduce_sum(tf.square(grads_intp), axis=[1, 2, 3]))
+            grad_penalty = tf.reduce_mean(tf.square(grads_intp_l2 - 1.0))
+
+            d_loss = d_loss + lambda_gp*grad_penalty
+
+        ## Optimization: Compute the gradients apply them
+        d_grads = d_tape.gradient(d_loss, disc_model.trainable_variables)
         d_optimizer.apply_gradients(
-            grads_and_vars=zip(d_grads_real,
-                               disc_model.trainable_variables))
-
+            grads_and_vars=zip(d_grads, disc_model.trainable_variables))
 
-        ## Compute generator's loss and its gradients:
-        with tf.GradientTape() as g_tape:
-            g_output = gen_model(input_z)
-            d_logits_fake = disc_model(g_output, training=True)
-            labels_real = tf.ones_like(d_logits_fake)
-            g_loss = loss_fn(y_true=labels_real,
-                             y_pred=d_logits_fake)
-
         g_grads = g_tape.gradient(g_loss, gen_model.trainable_variables)
         g_optimizer.apply_gradients(
             grads_and_vars=zip(g_grads, gen_model.trainable_variables))
-
-
-        ## Compute discriminator's fake-loss and its gradients:
-        with tf.GradientTape() as d_tape_fake:
-            d_logits_fake = disc_model(g_output.numpy(), training=True)
-            d_labels_fake = tf.zeros_like(d_logits_fake)
-
-            d_loss_fake = loss_fn(y_true=d_labels_fake,
-                                  y_pred=d_logits_fake)
 
-            d_grads_fake = d_tape_fake.gradient(
-                d_loss_fake, disc_model.trainable_variables)
-        ## Optimization: Apply the gradients
-        d_optimizer.apply_gradients(
-            grads_and_vars=zip(d_grads_fake, 
-                               disc_model.trainable_variables))
-
-        d_loss = (d_loss_real + d_loss_fake)/2.0
-        losses.append(
+        epoch_losses.append(
             (g_loss.numpy(), d_loss.numpy(), 
              d_loss_real.numpy(), d_loss_fake.numpy()))
-
-
-        d_probs_real = tf.reduce_mean(tf.sigmoid(d_logits_real))
-        d_probs_fake = tf.reduce_mean(tf.sigmoid(d_logits_fake))
-        avg_d_vals.append((d_probs_real.numpy(), d_probs_fake.numpy()))        
-    avg_epoch_losses.append(np.mean(losses, axis=0))
+
+    all_losses.append(epoch_losses)
+
     print('Epoch {:-3d} | ET {:.2f} min | Avg Losses >>'
-          ' G/D {:.4f}/{:.4f} [D-Real: {:.4f} D-Fake: {:.4f}]'
+          ' G/D {:6.2f}/{:6.2f} [D-Real: {:6.2f} D-Fake: {:6.2f}]'
           .format(epoch, (time.time() - start_time)/60, 
-                  *list(avg_epoch_losses[-1])))
-    epoch_samples.append(create_samples(
-          gen_model, num_samples=8).numpy())
+                  *list(np.mean(all_losses[-1], axis=0)))
+    )
+
+    epoch_samples.append(
+        create_samples(gen_model, fixed_z).numpy()
+    )
 
 
 
@@ -367,7 +375,7 @@ def create_samples(g_model, input_z):
 ax.tick_params(axis='both', which='major', labelsize=15)
 ax2.tick_params(axis='both', which='major', labelsize=15)
 
-#plt.savefig('/content/drive/My Drive/Colab Notebooks/PyML-3rd-edition/ch17-wdcgan-learning-curve.pdf')
+#plt.savefig('images/ch17-wdcgan-learning-curve.pdf')
 plt.show()
 
 
@@ -381,30 +389,34 @@ def create_samples(g_model, input_z):
         ax.set_xticks([])
         ax.set_yticks([])
         if j == 0:
-            ax.text(-0.06, 0.5, 'Epoch {}'.format(e),
-                    rotation=90, size=18, color='red',
-                    horizontalalignment='right',
-                    verticalalignment='center', 
-                    transform=ax.transAxes)
+            ax.text(
+                -0.06, 0.5, 'Epoch {}'.format(e),
+                rotation=90, size=18, color='red',
+                horizontalalignment='right',
+                verticalalignment='center', 
+                transform=ax.transAxes)
 
         image = epoch_samples[e-1][j]
         ax.imshow(image, cmap='gray_r')
 
-#plt.savefig('/content/drive/My Drive/Colab Notebooks/PyML-3rd-edition/ch17-wdcgan-samples.pdf')
+#plt.savefig('images/ch17-wdcgan-samples.pdf')
 plt.show()
 
 
-
-
-
-
-
 # ## Mode collapse
 
 
 
 
 
+# 
+# ----
+
+# 
+# 
+# Readers may ignore the next cell.
+# 
+#