From 254faa1b910fdc0db041ee37364e8943787ca828 Mon Sep 17 00:00:00 2001 From: Wernervanrun Date: Fri, 9 Aug 2024 13:11:15 +0200 Subject: [PATCH] Handle NaN values in TensorBoard scalar extraction - Updated `extract_scalar_data` to handle NaN values in TensorBoard logs. - If a scalar value is NaN, the method now falls back to the previous valid value. - If no previous value is available, a default of 0.0 is used. - This ensures continuity and robustness in the extracted scalar data. --- infer/lib/train/graph_generation.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/infer/lib/train/graph_generation.py b/infer/lib/train/graph_generation.py index c1c82a0..0708aba 100644 --- a/infer/lib/train/graph_generation.py +++ b/infer/lib/train/graph_generation.py @@ -1,6 +1,7 @@ import os from typing import List import matplotlib.pyplot as plt +import numpy as np import logging # Suppress TensorBoard event processing logs @@ -58,11 +59,21 @@ def extract_scalar_data(log_dir): if tag in ea.Tags()['scalars']: scalar_events = ea.Scalars(tag) scalar_data[tag] = {} + previous_value = 0.0 # Initialize fallback value + for event in scalar_events: + value = event.value + # Check if value is NaN, use previous value or fallback to 0.0 + if np.isnan(value): + value = previous_value + if event.step not in scalar_data[tag]: - scalar_data[tag][event.step] = [event.value] + scalar_data[tag][event.step] = [value] else: - scalar_data[tag][event.step].append(event.value) + scalar_data[tag][event.step].append(value) + + previous_value = value # Update previous value for the next iteration + # Calculate the average for each step. Restarting training can cause multiple events for the same step. scalar_data[tag] = {step: sum(values) / len(values) for step, values in scalar_data[tag].items()} else: