From 50b368874cc54e00e7949ed5c84a641196891d5e Mon Sep 17 00:00:00 2001 From: Otthorn Date: Wed, 9 Jun 2021 16:55:29 +0200 Subject: [PATCH] :tada: initial commit --- README.md | 78 ++++++++++++++++- tensorboard_image_extractor.py | 148 +++++++++++++++++++++++++++++++++ 2 files changed, 225 insertions(+), 1 deletion(-) create mode 100644 tensorboard_image_extractor.py diff --git a/README.md b/README.md index 29cf53a..61a1ef8 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,78 @@ -# tensorboard_image_extractor +# Tensorboard Image Extractor +## What is this program and why? +This is short script to extract images and create animated gif from there +using a tensorboard event file. Unfortunatly this feature is not native inside +tensorboard, inside which only the graphing data can downloaded (in `csv` or +`json` format). +The only other program which I found that did a similar thing is +https://github.com/lanpa/tensorboard-dumper/ which I took inspiration from. + +## How to use it + +The repository can be clone with git and the you will maybe need to install +some dependencies (like tensorboard): + +``` +pip3 install -r requirements.txt +``` + +You can then run it: + +``` +python3 tensorboard_image_extractor.py -i event.db +``` + +You can get some help by running: + +``` +python3 tensorboard_image_extractor.py --help +``` + +## Tensorboard datastructure + +The following diagram describes a tree of the log directory found in all +machine learning experiment with a tensorboard writer. + +``` +logs/ +├── lupo +│   ├── args.txt +│   ├── config.txt +│   ├── model_005000.pth +│   ├── model_010000.pth +│   ├── model_015000.pth +│   ├── model_020000.pth +│   └── model_025000.pth +└── summaries + └── lupo + └── events.out.tfevents.1623155921.pop-os +``` + +The file which contains all data and images is the `event` file in +`logs/summaries/{run name}/events`. It can be fairly large because every image +is stored inside in binary format. + +## Example + +You can create an animated gif, only keeping images with a certain `tag`: +``` +python3 tensorboard_image_extractor.py -i lupo.events -t "train/level_1/rgb" -o train_level_1_rgb_24h.gif --gif +``` + +## Performance + +In order to create a gif from a 900 MB event file, it took me just over an +hour. This is due to the fact that Python has to do the I/O reading from binary +data and converting the whole file, which is remarkably slow. + +It can create large gif files. In the experiment described above the images of +a single tag was kept and it created a 52 MB gif file. + +## Notes + +This program is distributed under GNU GNL v3 or later License, which you can +find a copy of in the repository. +This program comes with ABSOLUTELY NO WARRANTY + +Tensorboard Image Extractor - Copyright (C) 2021 - Otthorn diff --git a/tensorboard_image_extractor.py b/tensorboard_image_extractor.py new file mode 100644 index 0000000..78feea5 --- /dev/null +++ b/tensorboard_image_extractor.py @@ -0,0 +1,148 @@ +# Tensorboard Image Extractor Copyright (C) 2021 Otthorn +# License: GNU GPL v3 or later + +import argparse +import io + +import tensorboard.compat.proto.event_pb2 as event_pb2 +from PIL import Image +from tqdm import tqdm + + +def read_event(data): + """ + Read one event from the datastream. + + Returns the event as a string and the trucated data without the event that + was read. + """ + h0 = int.from_bytes(data[:8], "little") + + event_str = data[12 : 12 + h0] + data = data[12 + h0 + 4 :] + + return data, event_str + + +def read_file(input_path): + """ + Read a file. + + Read a file and return the data, throws an error and exits if no file is + found. + """ + try: + with open(input_path, "rb") as f: + data = f.read() + return data + except FileNotFoundError: + print(f"Input file {input_path} is not a valid path.") + exit() + + +def decode_image(img): + """Decodes an image""" + d_img = Image.open(io.BytesIO(img.encoded_image_string)) + return d_img + + +def main(args): + + data = read_file(args.input) + + original_length = len(data) + pbar = tqdm(total=original_length) + + img_list = [] + + while data: + + data, event_str = read_event(data) + pbar.n = original_length - len(data) + pbar.update(0) + + event = event_pb2.Event() + event.ParseFromString(event_str) + + if event.HasField("summary"): + for value in event.summary.value: + if value.HasField("image"): + + tag = value.ListFields()[0][1] + + # if args.Nons is None process everything, else process + # only the given tag + if args.tag is None or args.tag == tag: + img = value.image + img_d = decode_image(img) + + # sanitize tag + tag = tag.replace("/","_") + tag = tag.replace(" ","_") + + if args.gif: + # save an image list for the gif + img_list.append(img_d) + else: + print(f"Saving as: img_{tag}_{event.step}.png") + img_d.save(f"img_{tag}_{event.step}.png", format="png") + + + if args.gif: + # save as an animated gif + print("[DEBUG] saving animated gif") + im = img_list[0] + im.save( + args.output, + save_all=True, + append_images=img_list, + duration=args.second_per_frame, + loop=args.do_not_loop, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Tensorboard image dumper and gif creator" + ) + parser.add_argument( + "--input", + "-i", + type=str, + help="Input file, must be a tensorboard event file", + required=True, + ) + parser.add_argument( + "--output", + "-o", + type=str, + help="Output file for the gif, must have a .gif extension", + ) + parser.add_argument( + "--gif", + default=False, + action="store_true", + help="Save the ouptut as an animated gif", + ) + parser.add_argument( + "--do-not-loop", + default=True, + action="store_false", + help="Prevent the gif from looping", + ) + parser.add_argument( + "--second-per-frame", + "-spf", + type=int, + default=60, + help="Time between each frame (in milisecond)", + ) + parser.add_argument( + "--tag", + "-t", + type=str, + help="Select a single tag for the ouptut", + ) + + args = parser.parse_args() + main(args)