BibTeX
@inproceedings{10.1145/3746027.3755448,
author = {Farooq, Muhammad Ali and Shariff, Waseem and Corcoran, Peter},
title = {ThermVision: Exploring FLUX for Synthesizing Hyper-Realistic Thermal Face Data and Animations via Image to Video Translation},
year = {2025},
isbn = {9798400720352},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3746027.3755448},
doi = {10.1145/3746027.3755448},
abstract = {High-quality thermal facial data is essential for advancing biometric recognition, surveillance, in-cabin driver monitoring, and human-computer interaction, all of which are integral for modern multimedia and interactive AI systems. In this work, we optimized the FLUX text-to-image diffusion model on diverse real-world thermal facial datasets to generate hyper-realistic 2D thermal facial images for both males and females, and propose a new dataset, ThermVision. To enhance their multimedia applicability, these images are processed through a video retargeting pipeline, where driving videos animate realistic facial expressions and head pose variations from a single 2D thermal image, producing high-fidelity thermal facial video sequences. The overall rendered dataset incorporates smart transformations, ensuring diversity across gender balance, extreme head pose variations, expressive facial dynamics, and facial accessories, making it a valuable resource for real-world applications. Additionally, we provide facial detection annotations to facilitate precise feature extraction and thermal-face analysis. To validate our synthetic dataset, we evaluate its effectiveness in thermal gender classification, as downstream machine learning task, along with thermal face localization and facial landmarks detection demonstrating its applicability in real-world scenarios. This approach significantly improves the availability, realism, and integration of thermal facial data, paving the way for more robust and immersive AI-powered thermal imaging applications. The dataset, code and associated models are available at- https://mali-farooq.github.io/ThermVision/},
booktitle = {Proceedings of the 33rd ACM International Conference on Multimedia},
pages = {10161–10170},
numpages = {10},
keywords = {diffusion models, flux, lora, lwir, synthetic data, text-to-image, thermal imaging},
location = {Dublin, Ireland},
series = {MM '25}
}
@article{FAROOQ2026112506,
title = {ThermVision-DB: A synthetic LWIR thermal face dataset for privacy-preserving thermal vision research},
journal = {Data in Brief},
volume = {65},
pages = {112506},
year = {2026},
issn = {2352-3409},
doi = {https://doi.org/10.1016/j.dib.2026.112506},
url = {https://www.sciencedirect.com/science/article/pii/S2352340926000594},
author = {Muhammad Ali Farooq and Waseem Shariff and Peter Corcoran},
keywords = {Synthetic data generation, Diffusion models, Thermal imaging, Image-to-video translation, Privacy-preserving AI, Multimodal facial analysis, Computer vision benchmark},
abstract = {ThermVision-DB presents a synthetic long-wave infrared (LWIR) facial dataset designed to support research in privacy-preserving vision, thermal perception, and multimodal facial analysis. The dataset builds upon generative diffusion models to create photorealistic thermal facial images and video sequences capturing controlled variations in facial expression and head pose. Each synthetic identity is generated using text-to-image conditioning followed by video retargeting module, enabling precise control over pose angles, expression intensity, and frame-to-frame consistency. The dataset includes a diverse set of synthetic adult identities of both male and female genders with multiple facial expressions - such as neutral, smile, frown, and surprise and head-pose rotations spanning yaw, pitch, and roll. Data are provided in both image and video formats, accompanied by face localization annotations, landmark detections and identity labels. To ensure reusability and scalability, all samples are generated through a standardized pipeline using open-source models, allowing researchers to easily expand the dataset with additional synthetic identities while maintaining consistent thermal appearance and scene illumination. The synthetic generation process avoids the use of any personally identifiable visual data, ensuring compliance with FAIR and GDPR principles. ThermVision-DB is intended for use in developing and benchmarking algorithms for facial detection, landmark localization, expression recognition, and head-pose estimation in the thermal domain. It also provides a foundation for research in synthetic-to-real transfer learning, privacy-safe biometric analysis, and cross-spectrum data fusion. The dataset is released for open research purposes under a non-commercial license, with full documentation and metadata available to facilitate reproducibility and integration with existing thermal vision benchmarks.}
}