Handle NaN values in TensorBoard scalar extraction

- Updated `extract_scalar_data` to handle NaN values in TensorBoard logs.
- If a scalar value is NaN, the method now falls back to the previous valid value.
- If no previous value is available, a default of 0.0 is used.
- This ensures continuity and robustness in the extracted scalar data.
This commit is contained in:
Wernervanrun 2024-08-09 13:11:15 +02:00
parent 1b7add90ea
commit 254faa1b91

View File

@ -1,6 +1,7 @@
import os import os
from typing import List from typing import List
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np
import logging import logging
# Suppress TensorBoard event processing logs # Suppress TensorBoard event processing logs
@ -58,11 +59,21 @@ def extract_scalar_data(log_dir):
if tag in ea.Tags()['scalars']: if tag in ea.Tags()['scalars']:
scalar_events = ea.Scalars(tag) scalar_events = ea.Scalars(tag)
scalar_data[tag] = {} scalar_data[tag] = {}
previous_value = 0.0 # Initialize fallback value
for event in scalar_events: for event in scalar_events:
value = event.value
# Check if value is NaN, use previous value or fallback to 0.0
if np.isnan(value):
value = previous_value
if event.step not in scalar_data[tag]: if event.step not in scalar_data[tag]:
scalar_data[tag][event.step] = [event.value] scalar_data[tag][event.step] = [value]
else: else:
scalar_data[tag][event.step].append(event.value) scalar_data[tag][event.step].append(value)
previous_value = value # Update previous value for the next iteration
# Calculate the average for each step. Restarting training can cause multiple events for the same step. # Calculate the average for each step. Restarting training can cause multiple events for the same step.
scalar_data[tag] = {step: sum(values) / len(values) for step, values in scalar_data[tag].items()} scalar_data[tag] = {step: sum(values) / len(values) for step, values in scalar_data[tag].items()}
else: else: