For this project I wanted to allow the user to create their own dataset with p5/ml5 then display the results of the training of that dataset live. Originally I thought to use unet/bodypix to crop the person’s figure, but soon realized that the tensorflow dcgan doesn’t consider alpha well, making all generated images entirely black. I doubled back and decided to switch to faceapi, which could give a full square image of the person’s face in webcam, then save these images. I used jquery/ajax to call the python script, executing the training process.
I managed to get much better results in this way.
Dataset:
Result: (Cycled on an async loop)
I soon realized a problem in my method of downloading the dataset and uploading generated images. I realized that for security reasons, I could not save the image to the same webserver that the sketch was running on, for that I’d have to use node.js or something to transfer the files back and forth, so unfortunately as is, I had to manually run the python file in order to receive the generated images.
train.py:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
#from flask import Flask
#app = Flask(__name__)
#@app.route('/')
#def index():
physical_devices = tf.config.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(physical_devices[0], True)
dataset = keras.preprocessing.image_dataset_from_directory(
directory="dataset", label_mode=None, image_size=(64, 64), batch_size=32,
shuffle=True
).map(lambda x: x/255.0)
discriminator = keras.Sequential(
[
keras.Input(shape=(64,64,3)),
layers.Conv2D(64, kernel_size=4, strides=2, padding="same"),
layers.LeakyReLU(0.2),
layers.Conv2D(128, kernel_size=4, strides=2, padding="same"),
layers.LeakyReLU(0.2),
layers.Conv2D(128, kernel_size=4, strides=2, padding="same"),
layers.LeakyReLU(0.2),
layers.Flatten(),
layers.Dropout(0.2),
layers.Dense(1, activation="sigmoid"),
]
)
print(discriminator.summary())
latent_dim = 128
generator = keras.Sequential(
[
layers.Input(shape=(latent_dim,)),
layers.Dense(8*8*128),
layers.Reshape((8, 8, 128)),
layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding="same"),
layers.LeakyReLU(0.2),
layers.Conv2DTranspose(256, kernel_size=4, strides=2, padding="same"),
layers.LeakyReLU(0.2),
layers.Conv2DTranspose(512, kernel_size=4, strides=2, padding="same"),
layers.LeakyReLU(0.2),
layers.Conv2D(3, kernel_size=5, padding="same", activation="sigmoid"),
]
)
generator.summary()
opt_gen = keras.optimizers.Adam(1e-4)
opt_disc = keras.optimizers.Adam(1e-4)
loss_fn = keras.losses.BinaryCrossentropy()
for epoch in range(100):
for idx, real in enumerate(tqdm(dataset)):
batch_size = real.shape[0]
random_latent_vectors = tf.random.normal(shape=(batch_size, latent_dim))
fake = generator(random_latent_vectors)
if idx % 100 == 0:
img = keras.preprocessing.image.array_to_img(fake[0])
img
img.save(f"generated_images/gen_{epoch}.png")
### Train Discriminator: max log(D(x)) + log(1 - D(G(z))
with tf.GradientTape() as disc_tape:
loss_disc_real = loss_fn(tf.ones((batch_size, 1)), discriminator(real))
loss_disc_fake = loss_fn(tf.zeros(batch_size, 1), discriminator(fake))
loss_disc = (loss_disc_real + loss_disc_fake)/2
grads = disc_tape.gradient(loss_disc, discriminator.trainable_weights)
opt_disc.apply_gradients(
zip(grads, discriminator.trainable_weights)
)
### Train Generator min log(1 - D(G(z)) <-> max log(D(G(z))
with tf.GradientTape() as gen_tape:
fake = generator(random_latent_vectors)
output = discriminator(fake)
loss_gen = loss_fn(tf.ones(batch_size, 1), output)
grads = gen_tape.gradient(loss_gen, generator.trainable_weights)
opt_gen.apply_gradients(
zip(grads, generator.trainable_weights)
)
# return render_template('index.html')
#if __name__ == "__main__":
# app.run()
p5 sketch:
let faceapi;
let video;
let detections;
let imgCount = 500;
let db;
let sr = false;
let sr2 = false;
let loadedImgs = [];
let startImg = true;
const detection_options = {
withLandmarks: true,
withDescriptors: false,
}
function setup() {
createCanvas(720, 720);
video = createCapture(VIDEO);
video.hide();
db = createButton("Click to Record Data");
db.position(15, height - 15);
db.mousePressed(startRecording);
faceapi = ml5.faceApi(video, detection_options, modelReady)
}
function modelReady() {
faceapi.detect(gotResults)
}
function draw() {
if (sr==true) {
loadImgs();
}
if (loadedImgs.length > 0 && startImg==true) {
console.log("hi");
startImg = false;
displayImg();
}
image(video, 0,height-120, 160, 120)
}
async function displayImg() {
while(true) {
for (let i=0; i<99; i++) {
await sleep(500);
if (loadedImgs[i] === undefined) {
continue;
}
else {
image(loadedImgs[i],0,0,720,720);
}
}
}
}
function sleep(millisecondsDuration)
{
return new Promise((resolve) => {
setTimeout(resolve, millisecondsDuration);
})
}
function startRecording() {
sr = true;
}
function loadImgs() {
for (let i=0; i<99; i++) {
if (loadedImgs[i] === undefined) {
loadedImgs[i] = loadImage("/generated_images/gen_" + str(i) + ".png",yesImg,noImg);
}
}
}
function yesImg() {
//nothing
}
function noImg() {
console.log("no img");
}
function gotResults(err, result) {
if (err) {
console.log(err)
return
}
detections = result;
if (detections && imgCount < 500) {
if (detections.length > 0) {
// console.log(detections)
siphonFace(detections)
}
}
else if (imgCount == 500 && sr2 == true) {
sr2 = true;
console.log("calling py");
callPy;
}
faceapi.detect(gotResults)
}
function callPy() {
$.ajax({
url:"train.py",
context: document.body
})
}
function siphonFace(detections){
for(let i = 0; i < detections.length; i++){
const alignedRect = detections[i].alignedRect;
const x = alignedRect._box._x
const y = alignedRect._box._y
const xw = alignedRect._box._width
const yw = alignedRect._box._height
if (sr == true) {
let faceSolo = createImage(int(xw), int(yw));
faceSolo.loadPixels();
for (let ix = 0; ix < faceSolo.width; ix++) {
for (let iy = 0; iy < faceSolo.height; iy++) {
faceSolo.set(ix, iy, video.get(x+ix,y+iy));
}
}
faceSolo.updatePixels();
save(faceSolo, "ref_" + str(imgCount) + ".png");
imgCount++;
}
}
}