diff --git a/.gitignore b/.gitignore index 4aff7f80540f9ad7e7d93e3be7265da9fab766a5..181a5519b9ab844dc28fd43e9cefdbaf12c1e505 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ test models terraform.tfstate terraform.tfstate.backup +terraform.tfvars diff --git a/README.md b/README.md index 04334e83b4798a333c4c15efe1892656e3026bcd..4ca23bf632bc54c90f3e04bf573fbd8c88b98579 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ This port is based directly on the torch implementation, and not on an existing # clone this repo git clone https://github.com/affinelayer/pix2pix-tensorflow.git cd pix2pix-tensorflow -# download the CMP Facades dataset http://cmp.felk.cvut.cz/~tylecr1/facade/ +# download the CMP Facades dataset (generated from http://cmp.felk.cvut.cz/~tylecr1/facade/) python tools/download-dataset.py facades # train the model (this may take 1-8 hours depending on GPU, on CPU you will be waiting for a bit) python pix2pix.py \ @@ -197,17 +197,6 @@ The test run will output an HTML file at `facades_test/index.html` that shows in <img src="docs/test-html.png" width="300px"/> -## Exporting - -You can export the model to be served or uploaded with `--mode export`. As with testing, you should specify the checkpoint to use with `--checkpoint`. - -```sh -python pix2pix.py \ - --mode export \ - --output_dir facades_export \ - --checkpoint facades_train -``` - ## Code Validation Validation of the code was performed on a Linux machine with a ~1.3 TFLOPS Nvidia GTX 750 Ti GPU and an Azure NC6 instance with a K80 GPU. diff --git a/pix2pix.py b/pix2pix.py index 65d0fba3b00a64e130e8b93747c7af7d4d09da04..702276030a4ee402d0a89c95264aecda71c3f0fe 100644 --- a/pix2pix.py +++ b/pix2pix.py @@ -42,6 +42,9 @@ parser.add_argument("--lr", type=float, default=0.0002, help="initial learning r parser.add_argument("--beta1", type=float, default=0.5, help="momentum term of adam") parser.add_argument("--l1_weight", type=float, default=100.0, help="weight on L1 term for generator gradient") parser.add_argument("--gan_weight", type=float, default=1.0, help="weight on GAN term for generator gradient") + +# export options +parser.add_argument("--output_filetype", default="png", choices=["png", "jpeg"]) a = parser.parse_args() EPS = 1e-12 @@ -588,7 +591,12 @@ def main(): batch_output = deprocess(create_generator(preprocess(batch_input), 3)) output_image = tf.image.convert_image_dtype(batch_output, dtype=tf.uint8)[0] - output_data = tf.image.encode_png(output_image) + if a.output_filetype == "png": + output_data = tf.image.encode_png(output_image) + elif a.output_filetype == "jpeg": + output_data = tf.image.encode_jpeg(output_image, quality=80) + else: + raise Exception("invalid filetype") output = tf.convert_to_tensor([tf.encode_base64(output_data)]) key = tf.placeholder(tf.string, shape=[1]) diff --git a/server/Dockerfile b/server/Dockerfile index 71da43865eb9fb5c77ccee8f926c85ca43e06c76..ad9910fe50227ccadbb17c918cd1764d05ba4561 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -31,10 +31,9 @@ RUN pip install \ rsa==3.4.2 \ six==1.10.0 \ uritemplate==3.0.0 \ - tensorflow==0.12.1 + tensorflow==1.0.0 WORKDIR /server COPY models models COPY static static -COPY serve.py serve.py -EXPOSE 8080 \ No newline at end of file +COPY serve.py serve.py \ No newline at end of file diff --git a/server/README.md b/server/README.md index 3647819ea65d9a4f1f216c0b5cad15d494e2756f..0804c90a026dfdfbc34a866af9590b936d95203d 100644 --- a/server/README.md +++ b/server/README.md @@ -4,22 +4,33 @@ Host pix2pix-tensorflow models to be used with something like the [Image-to-Imag This is a simple python server that serves models exported from `pix2pix.py --mode export`. It can serve local models or use [Cloud ML](https://cloud.google.com/ml/) to run the model. -## Local +## Exporting + +You can export a model to be served with `--mode export`. As with testing, you should specify the checkpoint to use with `--checkpoint`. + +```sh +python ../pix2pix.py \ + --mode export \ + --output_dir models/facades \ + --checkpoint ../facades_train +``` + +## Local Serving Using the [pix2pix-tensorflow Docker image](https://hub.docker.com/r/affinelayer/pix2pix-tensorflow/): ```sh -# export a model to upload -python ../tools/dockrun.py python export-example-model.py --output_dir models/example +# export a model to upload (if you did not export one above) +python ../tools/dockrun.py python tools/export-example-model.py --output_dir models/example # process an image with the model using local tensorflow -python ../tools/dockrun.py python process-local.py \ +python ../tools/dockrun.py python tools/process-local.py \ --model_dir models/example \ --input_file static/facades-input.png \ --output_file output.png # run local server python ../tools/dockrun.py --port 8000 python serve.py --port 8000 --local_models_dir models # test the local server -python process-remote.py \ +python tools/process-remote.py \ --input_file static/facades-input.png \ --url http://localhost:8000/example \ --output_file output.png @@ -33,32 +44,31 @@ If you open [http://localhost:8000/](http://localhost:8000/) in a browser, you s Extract those to the models directory and restart the server to have it host the models. -## Cloud ML +## Cloud ML Serving For this you'll want to generate a service account JSON file from https://console.cloud.google.com/iam-admin/serviceaccounts/project (select "Furnish a new private key"). If you are already logged in with the gcloud SDK, the script will auto-detect credentials from that if you leave off the `--credentials` option. ```sh # upload model to google cloud ml -python ../tools/dockrun.py python upload-model.py \ +python ../tools/dockrun.py python tools/upload-model.py \ --bucket your-models-bucket-name-here \ --model_name example \ --model_dir models/example \ --credentials service-account.json # process an image with the model using google cloud ml -python ../tools/dockrun.py python process-cloud.py \ +python ../tools/dockrun.py python tools/process-cloud.py \ --model example \ --input_file static/facades-input.png \ --output_file output.png \ --credentials service-account.json ``` -## Google Cloud Platform +## Running serve.py on Google Cloud Platform Assuming you have gcloud and docker setup: ```sh export GOOGLE_PROJECT=<project name> -export GOOGLE_CREDENTIALS="$(cat <path to service-account.json>)" # build image # make sure models are in a directory called "models" in the current directory sudo docker build --rm --tag us.gcr.io/$GOOGLE_PROJECT/pix2pix-server . @@ -66,15 +76,15 @@ sudo docker build --rm --tag us.gcr.io/$GOOGLE_PROJECT/pix2pix-server . sudo docker run --publish 8080:8080 --rm --name server us.gcr.io/$GOOGLE_PROJECT/pix2pix-server python -u serve.py \ --port 8080 \ --local_models_dir models -python process-remote.py \ +python tools/process-remote.py \ --input_file static/facades-input.png \ --url http://localhost:8080/example \ --output_file output.png # publish image to private google container repository -gcloud docker -- push us.gcr.io/$GOOGLE_PROJECT/pix2pix-server +python tools/upload-image.py --project $GOOGLE_PROJECT --version v1 # setup server -# need to change the launch arguments for google_compute_instance.pix2pix-singleton -# to use local models instead of cloud models if desired -python ../tools/dockrun.py terraform plan -var "GOOGLE_PROJECT=$GOOGLE_PROJECT" -target google_compute_instance.pix2pix-singleton -python ../tools/dockrun.py terraform apply -var "GOOGLE_PROJECT=$GOOGLE_PROJECT" -target google_compute_instance.pix2pix-singleton +cp terraform.tfvars.example terraform.tfvars +# edit terraform.tfvars to put your cloud info in there +python ../tools/dockrun.py terraform plan +python ../tools/dockrun.py terraform apply ``` diff --git a/server/deployment.tf b/server/deployment.tf index 2aaf7d4650d42563c761017a39e76fb7f09eacbf..8370d518afd2baa8e6e76fbbabe705e0a2c6f5ed 100644 --- a/server/deployment.tf +++ b/server/deployment.tf @@ -1,12 +1,16 @@ +variable "google_project" {} +variable "google_credentials_file" {} +variable "server_image_version" {} + provider "google" { - region = "us-central1" + region = "us-central1" + credentials = "${file(var.google_credentials_file)}" + project = "${var.google_project}" } -variable "GOOGLE_PROJECT" {} - # cluster -resource "google_compute_instance_template" "pix2pix" { +resource "google_compute_instance_template" "cluster" { name_prefix = "pix2pix-template-" machine_type = "n1-highcpu-2" @@ -49,9 +53,11 @@ write_files: [Service] Environment="HOME=/home/pix2pix" ExecStartPre=/usr/share/google/dockercfg_update.sh - ExecStart=/usr/bin/docker run --log-driver=gcplogs --restart always -u 2000 --publish 80:8080 --name=pix2pix us.gcr.io/${var.GOOGLE_PROJECT}/pix2pix-server:v3 python -u serve.py --port 8080 --local_models_dir models --cloud_model_names facades_BtoA,edges2cats_AtoB,edges2shoes_AtoB,edges2handbags_AtoB + ExecStart=/usr/bin/docker run --rm --log-driver=gcplogs -u 2000 --publish 80:8080 --name=pix2pix us.gcr.io/${var.google_project}/pix2pix-server:${var.server_image_version} python -u serve.py --port 8080 --local_models_dir models --cloud_model_names facades_BtoA,edges2cats_AtoB,edges2shoes_AtoB,edges2handbags_AtoB ExecStop=/usr/bin/docker stop pix2pix ExecStopPost=/usr/bin/docker rm pix2pix + Restart=always + RestartSec=30 runcmd: - iptables -A INPUT -p tcp --dport 80 -j ACCEPT @@ -69,29 +75,30 @@ EOF } } -resource "google_compute_http_health_check" "pix2pix" { +resource "google_compute_http_health_check" "cluster" { name = "pix2pix-check" request_path = "/health" - timeout_sec = 5 - check_interval_sec = 5 + timeout_sec = 5 + check_interval_sec = 10 + unhealthy_threshold = 3 } -resource "google_compute_target_pool" "pix2pix" { +resource "google_compute_target_pool" "cluster" { name = "pix2pix-pool" health_checks = [ - "${google_compute_http_health_check.pix2pix.name}", + "${google_compute_http_health_check.cluster.name}", ] } -resource "google_compute_instance_group_manager" "pix2pix" { +resource "google_compute_instance_group_manager" "cluster" { name = "pix2pix-manager" - instance_template = "${google_compute_instance_template.pix2pix.self_link}" + instance_template = "${google_compute_instance_template.cluster.self_link}" base_instance_name = "pix2pix" zone = "us-central1-c" - target_pools = ["${google_compute_target_pool.pix2pix.self_link}"] + target_pools = ["${google_compute_target_pool.cluster.self_link}"] // don't update instances with terraform, which supposedly can't do a rolling restart // use this to update them instead: @@ -99,25 +106,25 @@ resource "google_compute_instance_group_manager" "pix2pix" { update_strategy = "NONE" } -resource "google_compute_address" "pix2pix" { - name = "pix2pix-address" +resource "google_compute_address" "cluster" { + name = "pix2pix-cluster" } -resource "google_compute_forwarding_rule" "pix2pix" { +resource "google_compute_forwarding_rule" "cluster" { name = "pix2pix-balancer" - target = "${google_compute_target_pool.pix2pix.self_link}" + target = "${google_compute_target_pool.cluster.self_link}" port_range = "80-80" - ip_address = "${google_compute_address.pix2pix.address}" + ip_address = "${google_compute_address.cluster.address}" } -resource "google_compute_autoscaler" "pix2pix" { +resource "google_compute_autoscaler" "cluster" { name = "pix2pix-autoscaler" zone = "us-central1-c" - target = "${google_compute_instance_group_manager.pix2pix.self_link}" + target = "${google_compute_instance_group_manager.cluster.self_link}" autoscaling_policy = { - max_replicas = 0 - min_replicas = 0 + max_replicas = 16 + min_replicas = 1 cooldown_period = 60 cpu_utilization { @@ -125,69 +132,3 @@ resource "google_compute_autoscaler" "pix2pix" { } } } - -# singleton - -resource "google_compute_instance" "pix2pix-singleton" { - name = "pix2pix-singleton" - machine_type = "g1-small" - zone = "us-central1-c" - - tags = ["http-server"] - can_ip_forward = false - - scheduling { - automatic_restart = true - on_host_maintenance = "MIGRATE" - } - - disk { - image = "cos-cloud/cos-stable" - } - - network_interface { - network = "default" - - access_config { - nat_ip = "${google_compute_address.pix2pix-singleton.address}" - } - } - - metadata { - user-data = <<EOF -#cloud-config - -users: -- name: pix2pix - uid: 2000 - -write_files: -- path: /etc/systemd/system/pix2pix.service - permissions: 0644 - owner: root - content: | - [Unit] - Description=Run pix2pix - - [Service] - Environment="HOME=/home/pix2pix" - ExecStartPre=/usr/share/google/dockercfg_update.sh - ExecStart=/usr/bin/docker run --log-driver=gcplogs --restart always -u 2000 --publish 80:8080 --name=pix2pix us.gcr.io/${var.GOOGLE_PROJECT}/pix2pix-server python -u serve.py --port 8080 --cloud_model_names facades_BtoA,edges2cats_AtoB,edges2shoes_AtoB,edges2handbags_AtoB - ExecStop=/usr/bin/docker stop pix2pix - ExecStopPost=/usr/bin/docker rm pix2pix - -runcmd: -- iptables -A INPUT -p tcp --dport 80 -j ACCEPT -- systemctl daemon-reload -- systemctl start pix2pix.service -EOF - } - - service_account { - scopes = ["https://www.googleapis.com/auth/logging.write", "https://www.googleapis.com/auth/devstorage.read_only", "https://www.googleapis.com/auth/cloud-platform"] - } -} - -resource "google_compute_address" "pix2pix-singleton" { - name = "pix2pix-singleton" -} diff --git a/server/serve.py b/server/serve.py index 1ee154b6288ae6ad8e6bacd32088913fed3f28b0..057ced91e80f3174e81c0896d106704f84ca7171 100644 --- a/server/serve.py +++ b/server/serve.py @@ -188,7 +188,10 @@ class Handler(BaseHTTPRequestHandler): # add any missing padding output_b64data += "=" * (-len(output_b64data) % 4) output_data = base64.urlsafe_b64decode(output_b64data) - headers["content-type"] = "image/png" + if output_data.startswith("\x89PNG"): + headers["content-type"] = "image/png" + else: + headers["content-type"] = "image/jpeg" body = output_data except Exception as e: print("exception", traceback.format_exc()) diff --git a/server/terraform.tfvars.example b/server/terraform.tfvars.example new file mode 100644 index 0000000000000000000000000000000000000000..dc870d733f2c360bb9ccbfec30e286dce809bd6c --- /dev/null +++ b/server/terraform.tfvars.example @@ -0,0 +1,5 @@ +google_project = "example" + +google_credentials_file = "service-account.json" + +server_image_version = "v1" \ No newline at end of file diff --git a/server/export-example-model.py b/server/tools/export-example-model.py similarity index 100% rename from server/export-example-model.py rename to server/tools/export-example-model.py diff --git a/server/process-cloud.py b/server/tools/process-cloud.py similarity index 100% rename from server/process-cloud.py rename to server/tools/process-cloud.py diff --git a/server/process-local.py b/server/tools/process-local.py similarity index 100% rename from server/process-local.py rename to server/tools/process-local.py diff --git a/server/process-remote.py b/server/tools/process-remote.py similarity index 87% rename from server/process-remote.py rename to server/tools/process-remote.py index aab76998dd6891fe64df446b02905b117ded99e7..9cd5c48fe0c289ac56572f91e12deaad7bc1d80b 100644 --- a/server/process-remote.py +++ b/server/tools/process-remote.py @@ -3,9 +3,9 @@ from __future__ import division from __future__ import print_function try: - from urllib.request import urlopen + from urllib.request import urlopen # python 3 except ImportError: - from urllib2 import urlopen # python 3 + from urllib2 import urlopen # python 2 import argparse diff --git a/server/rolling-update.py b/server/tools/rolling-update.py similarity index 100% rename from server/rolling-update.py rename to server/tools/rolling-update.py diff --git a/server/tools/upload-image.py b/server/tools/upload-image.py new file mode 100644 index 0000000000000000000000000000000000000000..974999ac00c044d05033374d7450cd4f27bae0ea --- /dev/null +++ b/server/tools/upload-image.py @@ -0,0 +1,20 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import subprocess + +parser = argparse.ArgumentParser() +parser.add_argument("--version", required=True, help="version to build") +parser.add_argument("--project", help="Google Cloud Project to use") +a = parser.parse_args() + +def main(): + version_tag = "us.gcr.io/%s/pix2pix-server:%s" % (a.project, a.version) + latest_tag = "us.gcr.io/%s/pix2pix-server:latest" % (a.project) + subprocess.check_call("docker tag %s %s" % (version_tag, latest_tag)) + for tag in [version_tag, latest_tag]: + subprocess.check_call("gcloud docker -- push %s" % tag, shell=True) + +main() diff --git a/server/upload-model.py b/server/tools/upload-model.py similarity index 100% rename from server/upload-model.py rename to server/tools/upload-model.py diff --git a/tools/dockrun.py b/tools/dockrun.py index 94480d44283d392b2f368f8cacb82dfaa9ee8d2a..611573783bcef0f62973135034657d49bdb64eb4 100644 --- a/tools/dockrun.py +++ b/tools/dockrun.py @@ -102,15 +102,13 @@ def main(): "--env", "CUDA_CACHE_PATH=/host/tmp/cuda-cache", "--env", - "GOOGLE_PROJECT=" + os.environ.get("GOOGLE_PROJECT", ""), - "--env", - "GOOGLE_CREDENTIALS=" + os.environ.get("GOOGLE_CREDENTIALS", ""), + "HOME=/host" + os.environ["HOME"], ] if a.port is not None: docker_args += ["--publish", "%d:%d" % (a.port, a.port)] - args = [docker_path, "run"] + docker_args + ["affinelayer/pix2pix-tensorflow"] + cmd + args = [docker_path, "run"] + docker_args + ["affinelayer/pix2pix-tensorflow:v2"] + cmd if not os.access("/var/run/docker.sock", os.R_OK): args = ["sudo"] + args diff --git a/tools/download-dataset.py b/tools/download-dataset.py index 6f2466c20072d23f1522b72442237b28ce7f76c2..889ebd336a2152b69d163c7e428263e9c290b8b9 100644 --- a/tools/download-dataset.py +++ b/tools/download-dataset.py @@ -3,9 +3,9 @@ from __future__ import division from __future__ import print_function try: - from urllib.request import urlopen + from urllib.request import urlopen # python 3 except ImportError: - from urllib2 import urlopen # python 3 + from urllib2 import urlopen # python 2 import sys import tarfile import tempfile diff --git a/tools/process.py b/tools/process.py index 06ffcffb262a38a763fd08cfe511a7f1d589eb7d..f8298e3db267c728d82d9217470ea6a659680979 100644 --- a/tools/process.py +++ b/tools/process.py @@ -14,6 +14,8 @@ import threading import time import multiprocessing +edge_pool = None + parser = argparse.ArgumentParser() parser.add_argument("--input_dir", required=True, help="path to folder containing images") @@ -126,11 +128,6 @@ def run_caffe(src): net.forward() return net.blobs["sigmoid-fuse"].data[0][0,:,:] - -# create the pool before we launch processing threads -# we must create the pool after run_caffe is defined -if a.operation == "edges": - edge_pool = multiprocessing.Pool(a.workers) def edges(src): # based on https://github.com/phillipi/pix2pix/blob/master/scripts/edges/batch_hed.py @@ -212,7 +209,7 @@ def process(src_path, dst_path): complete_lock = threading.Lock() -start = time.time() +start = None num_complete = 0 total = 0 @@ -241,16 +238,32 @@ def main(): src_paths = [] dst_paths = [] + skipped = 0 for src_path in im.find(a.input_dir): name, _ = os.path.splitext(os.path.basename(src_path)) dst_path = os.path.join(a.output_dir, name + ".png") - if not os.path.exists(dst_path): + if os.path.exists(dst_path): + skipped += 1 + else: src_paths.append(src_path) dst_paths.append(dst_path) + print("skipping %d files that already exist" % skipped) + global total total = len(src_paths) + print("processing %d files" % total) + + global start + start = time.time() + + if a.operation == "edges": + # use a multiprocessing pool for this operation so it can use multiple CPUs + # create the pool before we launch processing threads + global edge_pool + edge_pool = multiprocessing.Pool(a.workers) + if a.workers == 1: with tf.Session() as sess: for src_path, dst_path in zip(src_paths, dst_paths): @@ -290,4 +303,4 @@ def main(): coord.request_stop() coord.join(threads) -main() \ No newline at end of file +main()