Publications | Kalin Stefanov

2026

Sign-SALD: A Skeleton-Aware Latent Diffusion Model for Text-driven Sign Language Production

Jiayu Shen, Kalin Stefanov, Lay-Ki Soon, Vee Yee Chong, and KokSheik Wong

In Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing, 2026

Bib PDF

@inproceedings{shen2026sign,
  title = {Sign-SALD: A Skeleton-Aware Latent Diffusion Model for Text-driven Sign Language Production},
  author = {Shen, Jiayu and Stefanov, Kalin and Soon, Lay-Ki and Chong, Vee Yee and Wong, KokSheik},
  booktitle = {Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing},
  year = {2026},
  pdf = {shen2026sign.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

PhysHDR: When Lighting Meets Materials and Scene Geometry in HDR Reconstruction

Hrishav Bakul Barua, Kalin Stefanov, Ganesh Krishnasamy, KokSheik Wong, and Abhinav Dhall

In Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing, 2026

Bib PDF

@inproceedings{barua2026phys,
  title = {PhysHDR: When Lighting Meets Materials and Scene Geometry in HDR Reconstruction},
  author = {Barua, Hrishav Bakul and Stefanov, Kalin and Krishnasamy, Ganesh and Wong, KokSheik and Dhall, Abhinav},
  booktitle = {Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing},
  year = {2026},
  pdf = {barua2026phys.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

AuslanSpell: An Interactive Technology for Improving Auslan Fingerspelling Comprehension

Kalin Stefanov, Andre Pham, Antony Loose, Lucy Robertson-Bell, and Louisa Willoughby

In Proceedings of the ACM Conference on Human Factors in Computing Systems, 2026

Bib PDF

@inproceedings{stefanov2026auslanspell,
  title = {AuslanSpell: An Interactive Technology for Improving Auslan Fingerspelling Comprehension},
  author = {Stefanov, Kalin and Pham, Andre and Loose, Antony and Robertson-Bell, Lucy and Willoughby, Louisa},
  booktitle = {Proceedings of the ACM Conference on Human Factors in Computing Systems},
  year = {2026},
  pdf = {stefanov2026auslanspell.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

DexAvatar: 3D Sign Language Reconstruction with Hand and Body Pose Priors

Kaustubh Kundu, Hrishav Bakul Barua, Lucy Robertson-Bell, Zhixi Cai, and Kalin Stefanov

In Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision, 2026

Bib PDF

@inproceedings{kundu2026dexavatar,
  title = {DexAvatar: 3D Sign Language Reconstruction with Hand and Body Pose Priors},
  author = {Kundu, Kaustubh and Barua, Hrishav Bakul and Robertson-Bell, Lucy and Cai, Zhixi and Stefanov, Kalin},
  booktitle = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
  year = {2026},
  pdf = {kundu2026dexavatar.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

2025

S-HR-VQVAE: Sequential Hierarchical Residual Learning Vector Quantized Variational Autoencoder for Video Prediction

Mohammad Adiban, Kalin Stefanov, Sabato Marco Siniscalchi, and Giampiero Salvi

IEEE Transactions on Multimedia, 2025

Bib PDF Code

@article{S-HR-VQVAE,
  title = {S-HR-VQVAE: Sequential Hierarchical Residual Learning Vector Quantized Variational Autoencoder for Video Prediction},
  author = {Adiban, Mohammad and Stefanov, Kalin and Siniscalchi, Sabato Marco and Salvi, Giampiero},
  year = {2025},
  journal = {IEEE Transactions on Multimedia},
  volume = {27},
  doi = {https://doi.org/10.1109/TMM.2025.3535370},
  pdf = {S-HR-VQVAE.pdf},
  code = {https://github.com/mohammad-adiban/Video-Prediction},
  bibtex_show = {true},
  preview = {paper.png}
}

Sign-MExD: An Expert-Infused Diffusion Model for Sign Language Production

Jiayu Shen, Kalin Stefanov, Vee Yee Chong, Lay-Ki Soon, and KokSheik Wong

In Proceedings of the APSIPA Annual Summit and Conference, 2025

Bib PDF

@inproceedings{11249331,
  author = {Shen, Jiayu and Stefanov, Kalin and Chong, Vee Yee and Soon, Lay-Ki and Wong, KokSheik},
  title = {Sign-MExD: An Expert-Infused Diffusion Model for Sign Language Production},
  booktitle = {Proceedings of the APSIPA Annual Summit and Conference},
  year = {2025},
  pages = {1200--1205},
  doi = {10.1109/APSIPAASC65261.2025.11249331},
  pdf = {11249331.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

Enhancing Tactile Learning: A Co-Designed System for Supporting Speech Interaction with Multi-Part 3D Printed Models by Students who are Blind

Ruth Galan Nagassa, Andre Ky Pham, Matthew Butler, Leona Holloway, Kalin Stefanov, Skye Vent, and Kim Marriott

In Proceedings of the ACM Conference on Human Factors in Computing Systems, 2025

Bib PDF

@inproceedings{3713706,
  author = {Nagassa, Ruth Galan and Pham, Andre Ky and Butler, Matthew and Holloway, Leona and Stefanov, Kalin and de Vent, Skye and Marriott, Kim},
  title = {Enhancing Tactile Learning: A Co-Designed System for Supporting Speech Interaction with Multi-Part 3D Printed Models by Students who are Blind},
  booktitle = {Proceedings of the ACM Conference on Human Factors in Computing Systems},
  year = {2025},
  doi = {10.1145/3706598.3713706},
  pdf = {3713706.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

GTA-HDR: A Large-Scale Synthetic Dataset for HDR Image Reconstruction

Hrishav Bakul Barua, Kalin Stefanov, KokSheik Wong, Abhinav Dhall, and Ganesh Krishnasamy

In Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision, 2025

Bib PDF

@inproceedings{10943964,
  author = {Barua, Hrishav Bakul and Stefanov, Kalin and Wong, KokSheik and Dhall, Abhinav and Krishnasamy, Ganesh},
  booktitle = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
  title = {GTA-HDR: A Large-Scale Synthetic Dataset for HDR Image Reconstruction},
  year = {2025},
  pages = {7876--7886},
  doi = {10.1109/WACV61041.2025.00765},
  pdf = {10943964.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

Do Blind Spots Matter for Word-Referent Mapping? A Computational Study with Infant Egocentric Video

Zekai Shi, Zhixi Cai, and Kalin Stefanov

2025

Bib PDF

@misc{2511.11725,
  title = {Do Blind Spots Matter for Word-Referent Mapping? A Computational Study with Infant Egocentric Video},
  author = {Shi, Zekai and Cai, Zhixi and Stefanov, Kalin},
  year = {2025},
  eprint = {2511.11725},
  archiveprefix = {arXiv},
  pdf = {2511.11725.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

2024

Participation Role-Driven Engagement Estimation of ASD Individuals in Neurodiverse Group Discussions

Kalin Stefanov, Yukiko I. Nakano, Chisa Kobayashi, Ibuki Hoshina, Tatsuya Sakato, Fumio Nihei, Chihiro Takayama, Ryo Ishii, and Masatsugu Tsujii

In Proceedings of the ACM International Conference on Multimodal Interaction, 2024

Bib PDF

@inproceedings{3685721,
  author = {Stefanov, Kalin and Nakano, Yukiko I. and Kobayashi, Chisa and Hoshina, Ibuki and Sakato, Tatsuya and Nihei, Fumio and Takayama, Chihiro and Ishii, Ryo and Tsujii, Masatsugu},
  title = {Participation Role-Driven Engagement Estimation of ASD Individuals in Neurodiverse Group Discussions},
  year = {2024},
  doi = {10.1145/3678957.3685721},
  booktitle = {Proceedings of the ACM International Conference on Multimodal Interaction},
  pages = {556--564},
  pdf = {3685721.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

AV-Deepfake1M: A Large-Scale LLM-Driven Audio-Visual Deepfake Dataset

Zhixi Cai, Shreya Ghosh, Aman Pankaj Adatia, Munawar Hayat, Abhinav Dhall, Tom Gedeon, and Kalin Stefanov

In Proceedings of the ACM ACM International Conference on Multimedia, 2024

Bib PDF

@inproceedings{3680795,
  author = {Cai, Zhixi and Ghosh, Shreya and Adatia, Aman Pankaj and Hayat, Munawar and Dhall, Abhinav and Gedeon, Tom and Stefanov, Kalin},
  title = {AV-Deepfake1M: A Large-Scale LLM-Driven Audio-Visual Deepfake Dataset},
  year = {2024},
  doi = {10.1145/3664647.3680795},
  booktitle = {Proceedings of the ACM ACM International Conference on Multimedia},
  pages = {7414--7423},
  pdf = {3680795.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

1M-Deepfakes Detection Challenge

Zhixi Cai, Abhinav Dhall, Shreya Ghosh, Munawar Hayat, Dimitrios Kollias, Kalin Stefanov, and Usman Tariq

In Proceedings of the ACM ACM International Conference on Multimedia, 2024

Bib PDF

@inproceedings{3689145,
  author = {Cai, Zhixi and Dhall, Abhinav and Ghosh, Shreya and Hayat, Munawar and Kollias, Dimitrios and Stefanov, Kalin and Tariq, Usman},
  title = {1M-Deepfakes Detection Challenge},
  year = {2024},
  doi = {10.1145/3664647.3689145},
  booktitle = {Proceedings of the ACM ACM International Conference on Multimedia},
  pages = {11355--11359},
  pdf = {3689145.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

HistoHDR-Net: Histogram Equalization for Single LDR to HDR Image Translation

Hrishav Bakul Barua, Ganesh Krishnasamy, KokSheik Wong, Abhinav Dhall, and Kalin Stefanov

In Proceedings of the IEEE International Conference on Image Processing, 2024

Bib PDF

@inproceedings{10648020,
  author = {Barua, Hrishav Bakul and Krishnasamy, Ganesh and Wong, KokSheik and Dhall, Abhinav and Stefanov, Kalin},
  booktitle = {Proceedings of the IEEE International Conference on Image Processing},
  title = {HistoHDR-Net: Histogram Equalization for Single LDR to HDR Image Translation},
  year = {2024},
  pages = {2730--2736},
  doi = {10.1109/ICIP51287.2024.10648020},
  pdf = {10648020.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

LLM-HDR: Bridging LLM-based Perception and Self-Supervision for Unpaired LDR-to-HDR Image Reconstruction

Hrishav Bakul Barua, Kalin Stefanov, Lemuel Lai En Che, Abhinav Dhall, KokSheik Wong, and Ganesh Krishnasamy

2024

Bib PDF

@misc{2410.15068,
  title = {LLM-HDR: Bridging LLM-based Perception and Self-Supervision for Unpaired LDR-to-HDR Image Reconstruction},
  author = {Barua, Hrishav Bakul and Stefanov, Kalin and Che, Lemuel Lai En and Dhall, Abhinav and Wong, KokSheik and Krishnasamy, Ganesh},
  year = {2024},
  eprint = {2410.15068},
  archiveprefix = {arXiv},
  pdf = {2410.15068.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

Human Brain Exhibits Distinct Patterns When Listening to Fake Versus Real Audio: Preliminary Evidence

Mahsa Salehi, Kalin Stefanov, and Ehsan Shareghi

2024

Bib PDF

@misc{salehi2024human,
  title = {Human Brain Exhibits Distinct Patterns When Listening to Fake Versus Real Audio: Preliminary Evidence},
  author = {Salehi, Mahsa and Stefanov, Kalin and Shareghi, Ehsan},
  year = {2024},
  eprint = {2402.14982},
  archiveprefix = {arXiv},
  pdf = {2402.14982.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

2023

MARLIN: Masked Autoencoder for Facial Video Representation Learning

Zhixi Cai, Shreya Ghosh, Kalin Stefanov, Abhinav Dhall, Jianfei Cai, Hamid Rezatofighi, Reza Haffari, and Munawar Hayat

In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition, 2023

Bib PDF Code

@inproceedings{10203837,
  author = {Cai, Zhixi and Ghosh, Shreya and Stefanov, Kalin and Dhall, Abhinav and Cai, Jianfei and Rezatofighi, Hamid and Haffari, Reza and Hayat, Munawar},
  title = {MARLIN: Masked Autoencoder for Facial Video Representation Learning},
  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  year = {2023},
  pages = {1493--1504},
  doi = {10.1109/CVPR52729.2023.00150},
  pdf = {10203837.pdf},
  code = {https://github.com/ControlNet/MARLIN},
  bibtex_show = {true},
  preview = {paper.png}
}

Glitch in the Matrix: A Large Scale Benchmark for Content Driven Audio-Visual Forgery Detection and Localization

Zhixi Cai, Shreya Ghosh, Abhinav Dhall, Tom Gedeon, Kalin Stefanov, and Munawar Hayat

Computer Vision and Image Understanding, 2023

Bib PDF Code

@article{S1077314223001984,
  title = {Glitch in the Matrix: A Large Scale Benchmark for Content Driven Audio-Visual Forgery Detection and Localization},
  author = {Cai, Zhixi and Ghosh, Shreya and Dhall, Abhinav and Gedeon, Tom and Stefanov, Kalin and Hayat, Munawar},
  year = {2023},
  journal = {Computer Vision and Image Understanding},
  volume = {236},
  doi = {https://doi.org/10.1016/j.cviu.2023.103818},
  pdf = {S1077314223001984.pdf},
  code = {https://github.com/ControlNet/LAV-DF},
  bibtex_show = {true},
  preview = {paper.png}
}

ArtHDR-Net: Perceptually Realistic and Accurate HDR Content Creation

Hrishav Bakul Barua, Ganesh Krishnasamy, KokSheik Wong, Kalin Stefanov, and Abhinav Dhall

In Proceedings of the APSIPA Annual Summit and Conference, 2023

Bib PDF

@inproceedings{10317568,
  title = {ArtHDR-Net: Perceptually Realistic and Accurate HDR Content Creation},
  author = {Barua, Hrishav Bakul and Krishnasamy, Ganesh and Wong, KokSheik and Stefanov, Kalin and Dhall, Abhinav},
  booktitle = {Proceedings of the APSIPA Annual Summit and Conference},
  pages = {806--812},
  year = {2023},
  doi = {10.1109/APSIPAASC58517.2023.10317568},
  pdf = {10317568.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

2022

Graph-Based Group Modelling for Backchannel Detection

Garima Sharma, Kalin Stefanov, Abhinav Dhall, and Jianfei Cai

In Proceedings of the ACM International Conference on Multimedia, 2022

Bib PDF

@inproceedings{10.1145/3503161.3551605,
  author = {Sharma, Garima and Stefanov, Kalin and Dhall, Abhinav and Cai, Jianfei},
  title = {Graph-Based Group Modelling for Backchannel Detection},
  year = {2022},
  doi = {10.1145/3503161.3551605},
  booktitle = {Proceedings of the ACM International Conference on Multimedia},
  pages = {7190--7194},
  pdf = {3503161.3551605.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

Do You Really Mean That? Content Driven Audio-Visual Deepfake Dataset and Multimodal Method for Temporal Forgery Localization

Zhixi Cai, Kalin Stefanov, Abhinav Dhall, and Munawar Hayat

In Proceedings of the International Conference on Digital Image Computing: Techniques and Applications, 2022

Bib PDF Code

@inproceedings{10034605,
  author = {Cai, Zhixi and Stefanov, Kalin and Dhall, Abhinav and Hayat, Munawar},
  booktitle = {Proceedings of the International Conference on Digital Image Computing: Techniques and Applications},
  title = {Do You Really Mean That? Content Driven Audio-Visual Deepfake Dataset and Multimodal Method for Temporal Forgery Localization},
  year = {2022},
  pages = {1--10},
  doi = {10.1109/DICTA56598.2022.10034605},
  pdf = {10034605.pdf},
  code = {https://github.com/ControlNet/LAV-DF},
  bibtex_show = {true},
  preview = {paper.png}
}

Hierarchical Residual Learning Based Vector Quantized Variational Autoencoder for Image Reconstruction and Generation

Mohammad Adiban, Kalin Stefanov, Sabato M. Siniscalchi, and Giampiero Salvi

In Proceedings of the British Machine Vision Conference, 2022

Bib PDF Code

@inproceedings{0636,
  author = {Adiban, Mohammad and Stefanov, Kalin and Siniscalchi, Sabato M. and Salvi, Giampiero},
  title = {Hierarchical Residual Learning Based Vector Quantized Variational Autoencoder for Image Reconstruction and Generation},
  booktitle = {Proceedings of the British Machine Vision Conference},
  year = {2022},
  pdf = {0636.pdf},
  code = {https://github.com/mohammad-adiban/Video-Prediction},
  bibtex_show = {true},
  preview = {paper.png}
}

Visual Representations of Physiological Signals for Fake Video Detection

Kalin Stefanov, Bhawna Paliwal, and Abhinav Dhall

2022

Bib PDF

@misc{2207.08380,
  title = {Visual Representations of Physiological Signals for Fake Video Detection},
  author = {Stefanov, Kalin and Paliwal, Bhawna and Dhall, Abhinav},
  year = {2022},
  eprint = {2207.08380},
  archiveprefix = {arXiv},
  pdf = {2207.08380.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

2021

Spatial Bias in Vision-Based Voice Activity Detection

Kalin Stefanov, Mohammad Adiban, and Giampiero Salvi

In Proceedings of the International Conference on Pattern Recognition, 2021

Bib PDF

@inproceedings{10.1109/ICPR48806.2021.9413345,
  author = {Stefanov, Kalin and Adiban, Mohammad and Salvi, Giampiero},
  booktitle = {Proceedings of the International Conference on Pattern Recognition},
  title = {Spatial Bias in Vision-Based Voice Activity Detection},
  year = {2021},
  pages = {10433--10440},
  doi = {10.1109/ICPR48806.2021.9413345},
  pdf = {ICPR48806.2021.9413345.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

Analysis of Behavior Classification in Motivational Interviewing

Leili Tavabi, Trang Tran, Kalin Stefanov, Brian Borsari, Joshua Woolley, Stefan Scherer, and Mohammad Soleymani

In Proceedings of the Seventh Workshop on Computational Linguistics and Clinical Psychology: Improving Access, 2021

Bib PDF

@inproceedings{10.18653/v1/2021.clpsych-1.13,
  title = {Analysis of Behavior Classification in Motivational Interviewing},
  author = {Tavabi, Leili and Tran, Trang and Stefanov, Kalin and Borsari, Brian and Woolley, Joshua and Scherer, Stefan and Soleymani, Mohammad},
  booktitle = {Proceedings of the Seventh Workshop on Computational Linguistics and Clinical Psychology: Improving Access},
  year = {2021},
  doi = {10.18653/v1/2021.clpsych-1.13},
  pages = {110--115},
  pdf = {2021.clpsych-1.13.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

Group-Level Focus of Visual Attention for Improved Next Speaker Prediction

Chris Birmingham, Kalin Stefanov, and Maja Mataric

In Proceedings of the ACM International Conference on Multimedia, 2021

Bib PDF

@inproceedings{10.1145/3474085.3479213,
  author = {Birmingham, Chris and Stefanov, Kalin and Mataric, Maja},
  title = {Group-Level Focus of Visual Attention for Improved Next Speaker Prediction},
  year = {2021},
  doi = {10.1145/3474085.3479213},
  booktitle = {Proceedings of the ACM International Conference on Multimedia},
  pages = {4838--4842},
  pdf = {3474085.3479213.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

Group-Level Focus of Visual Attention for Improved Active Speaker Detection

Christopher Birmingham, Maja Mataric, and Kalin Stefanov

In Proceedings of the ACM International Conference on Multimodal Interaction, 2021

Bib PDF

@inproceedings{10.1145/3461615.3485430,
  author = {Birmingham, Christopher and Mataric, Maja and Stefanov, Kalin},
  title = {Group-Level Focus of Visual Attention for Improved Active Speaker Detection},
  year = {2021},
  doi = {10.1145/3461615.3485430},
  booktitle = {Proceedings of the ACM International Conference on Multimodal Interaction},
  pages = {37--42},
  pdf = {3461615.3485430.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

2020

Self-Supervised Vision-Based Detection of the Active Speaker as Support for Socially Aware Language Acquisition

Kalin Stefanov, Jonas Beskow, and Giampiero Salvi

IEEE Transactions on Cognitive and Developmental Systems, 2020

Bib PDF

@article{10.1109/TCDS.2019.2927941,
  author = {Stefanov, Kalin and Beskow, Jonas and Salvi, Giampiero},
  journal = {IEEE Transactions on Cognitive and Developmental Systems},
  title = {Self-Supervised Vision-Based Detection of the Active Speaker as Support for Socially Aware Language Acquisition},
  year = {2020},
  volume = {12},
  number = {2},
  pages = {250--259},
  doi = {10.1109/TCDS.2019.2927941},
  issn = {2379-8939},
  pdf = {TCDS.2019.2927941},
  bibtex_show = {true},
  preview = {paper.png}
}

Emotion or Expressivity? An Automated Analysis of Nonverbal Perception in a Social Dilemma

Su Lei, Kalin Stefanov, and Jonathan Gratch

In Proceedings of the IEEE International Conference on Automatic Face and Gesture Recognition, 2020

Bib PDF

@inproceedings{10.1109/FG47880.2020.00123,
  author = {Lei, Su and Stefanov, Kalin and Gratch, Jonathan},
  booktitle = {Proceedings of the IEEE International Conference on Automatic Face and Gesture Recognition},
  title = {Emotion or Expressivity? An Automated Analysis of Nonverbal Perception in a Social Dilemma},
  year = {2020},
  pages = {544--551},
  doi = {10.1109/FG47880.2020.00123},
  pdf = {FG47880.2020.00123.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

Multimodal Automatic Coding of Client Behavior in Motivational Interviewing

Leili Tavabi, Kalin Stefanov, Larry Zhang, Brian Borsari, Joshua D. Woolley, Stefan Scherer, and Mohammad Soleymani

In Proceedings of the ACM International Conference on Multimodal Interaction, 2020

Bib PDF

@inproceedings{10.1145/3382507.3418853,
  author = {Tavabi, Leili and Stefanov, Kalin and Zhang, Larry and Borsari, Brian and Woolley, Joshua D. and Scherer, Stefan and Soleymani, Mohammad},
  title = {Multimodal Automatic Coding of Client Behavior in Motivational Interviewing},
  year = {2020},
  doi = {10.1145/3382507.3418853},
  booktitle = {Proceedings of the ACM International Conference on Multimodal Interaction},
  pages = {406--413},
  pdf = {3382507.3418853.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

OpenSense: A Platform for Multimodal Data Acquisition and Behavior Perception

Kalin Stefanov, Baiyu Huang, Zongjian Li, and Mohammad Soleymani

In Proceedings of the ACM International Conference on Multimodal Interaction, 2020

Bib PDF Code

@inproceedings{10.1145/3382507.3418832,
  author = {Stefanov, Kalin and Huang, Baiyu and Li, Zongjian and Soleymani, Mohammad},
  title = {OpenSense: A Platform for Multimodal Data Acquisition and Behavior Perception},
  year = {2020},
  doi = {10.1145/3382507.3418832},
  booktitle = {Proceedings of the ACM International Conference on Multimodal Interaction},
  pages = {660--664},
  pdf = {3382507.3418832.pdf},
  code = {https://github.com/intelligent-human-perception-laboratory/OpenSense},
  bibtex_show = {true},
  preview = {paper.png}
}

2019

Modeling of Human Visual Attention in Multiparty Open-World Dialogues

Kalin Stefanov, Giampiero Salvi, Dimosthenis Kontogiorgos, Hedvig Kjellström, and Jonas Beskow

ACM Transactions on Human-Robot Interaction, 2019

Bib PDF

@article{10.1145/3323231,
  author = {Stefanov, Kalin and Salvi, Giampiero and Kontogiorgos, Dimosthenis and Kjellstr\"{o}m, Hedvig and Beskow, Jonas},
  title = {Modeling of Human Visual Attention in Multiparty Open-World Dialogues},
  year = {2019},
  volume = {8},
  number = {2},
  doi = {10.1145/3323231},
  journal = {ACM Transactions on Human-Robot Interaction},
  articleno = {8},
  pdf = {3323231.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

Towards Digitally-Mediated Sign Language Communication

Kalin Stefanov, and Mayumi Bono

In Proceedings of the International Conference on Human-Agent Interaction, 2019

Bib PDF

@inproceedings{10.1145/3349537.3352794,
  author = {Stefanov, Kalin and Bono, Mayumi},
  title = {Towards Digitally-Mediated Sign Language Communication},
  year = {2019},
  doi = {10.1145/3349537.3352794},
  booktitle = {Proceedings of the International Conference on Human-Agent Interaction},
  pages = {286--288},
  pdf = {3349537.3352794.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

Multimodal Learning for Identifying Opportunities for Empathetic Responses

Leili Tavabi, Kalin Stefanov, Setareh Nasihati Gilani, David Traum, and Mohammad Soleymani

In Proceedings of the ACM International Conference on Multimodal Interaction, 2019

Bib PDF

@inproceedings{10.1145/3340555.3353750,
  author = {Tavabi, Leili and Stefanov, Kalin and Nasihati Gilani, Setareh and Traum, David and Soleymani, Mohammad},
  title = {Multimodal Learning for Identifying Opportunities for Empathetic Responses},
  year = {2019},
  doi = {10.1145/3340555.3353750},
  booktitle = {Proceedings of the ACM International Conference on Multimodal Interaction},
  pages = {95--104},
  pdf = {3340555.3353750.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

Multimodal Analysis and Estimation of Intimate Self-Disclosure

Mohammad Soleymani, Kalin Stefanov, Sin-Hwa Kang, Jan Ondras, and Jonathan Gratch

In Proceedings of the ACM International Conference on Multimodal Interaction, 2019

Bib PDF

@inproceedings{10.1145/3340555.3353737,
  author = {Soleymani, Mohammad and Stefanov, Kalin and Kang, Sin-Hwa and Ondras, Jan and Gratch, Jonathan},
  title = {Multimodal Analysis and Estimation of Intimate Self-Disclosure},
  year = {2019},
  doi = {10.1145/3340555.3353737},
  booktitle = {Proceedings of the ACM International Conference on Multimodal Interaction},
  pages = {59--68},
  pdf = {3340555.3353737.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

2018

Recognition and Generation of Communicative Signals: Modeling of Hand Gestures, Speech Activity and Eye-Gaze in Human-Machine Interaction

Kalin Stefanov

KTH Royal Institute of Technology, 2018

Bib PDF

@phdthesis{stefanov2018recognition,
  title = {Recognition and Generation of Communicative Signals: Modeling of Hand Gestures, Speech Activity and Eye-Gaze in Human-Machine Interaction},
  author = {Stefanov, Kalin},
  year = {2018},
  school = {KTH Royal Institute of Technology},
  pdf = {phd.thesis.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

2017

Vision-based Active Speaker Detection in Multiparty Interaction

Kalin Stefanov, Jonas Beskow, and Giampiero Salvi

In Proceedings of the International Workshop on Grounding Language Understanding, 2017

Bib PDF

@inproceedings{10.21437/GLU.2017-10,
  author = {Stefanov, Kalin and Beskow, Jonas and Salvi, Giampiero},
  title = {Vision-based Active Speaker Detection in Multiparty Interaction},
  year = {2017},
  booktitle = {Proceedings of the International Workshop on Grounding Language Understanding},
  pages = {47--51},
  doi = {10.21437/GLU.2017-10},
  pdf = {GLU.2017-10.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

A Real-time Gesture Recognition System for Isolated Swedish Sign Language Signs

Kalin Stefanov, and Jonas Beskow

In Proceedings of the European and Nordic Symposium on Multimodal Communication, 2017

Bib PDF

@inproceedings{ecp17141004,
  title = {A Real-time Gesture Recognition System for Isolated Swedish Sign Language Signs},
  author = {Stefanov, Kalin and Beskow, Jonas},
  year = {2017},
  publisher = {Link{\"o}ping University Electronic Press},
  pages = {18--27},
  booktitle = {Proceedings of the European and Nordic Symposium on Multimodal Communication},
  pdf = {ecp17141004.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

2016

A Multi-party Multi-modal Dataset for Focus of Visual Attention in Human-human and Human-robot Interaction

Kalin Stefanov, and Jonas Beskow

In Proceedings of the International Conference on Language Resources and Evaluation, 2016

Bib PDF

@inproceedings{L16-1703,
  title = {A Multi-party Multi-modal Dataset for Focus of Visual Attention in Human-human and Human-robot Interaction},
  author = {Stefanov, Kalin and Beskow, Jonas},
  booktitle = {Proceedings of the International Conference on Language Resources and Evaluation},
  year = {2016},
  pages = {4440--4444},
  pdf = {L16-1703.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

Look Who’s Talking: Visual Identification of the Active Speaker in Multi-Party Human-Robot Interaction

Kalin Stefanov, Akihiro Sugimoto, and Jonas Beskow

In Proceedings of the International Workshop on Advancements in Social Signal Processing for Multimodal Interaction, 2016

Bib PDF

@inproceedings{10.1145/3005467.3005470,
  author = {Stefanov, Kalin and Sugimoto, Akihiro and Beskow, Jonas},
  title = {Look Who's Talking: Visual Identification of the Active Speaker in Multi-Party Human-Robot Interaction},
  year = {2016},
  doi = {10.1145/3005467.3005470},
  booktitle = {Proceedings of the International Workshop on Advancements in Social Signal Processing for Multimodal Interaction},
  pages = {22--27},
  pdf = {3005467.3005470.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

Gesture Recognition System for Isolated Sign Language Signs

Kalin Stefanov, and Jonas Beskow

In Proceedings of the European and Nordic Symposium on Multimodal Communication, 2016

Abs Bib PDF

This paper describes a method for automatic recognition of isolated Swedish Sign Language signs for the purpose of educational signing-based games. Two datasets consisting of 51 signs have been recorded from a total of 7 (experienced) and 10 (inexperienced) adult signers. The signers performed all of the signs 5 times and were captured with a RGB-D (Kinect) sensor, via a purpose-built recording application. A recognizer based on manual components of sign language is presented and tested on the collected datasets. Signer-dependent recognition rate is 95.3% for the most consistent signer. Signer-independent recognition rate is on average 57.9% for the experienced signers and 68.9% for the inexperienced.
@inproceedings{stefanov2016gesture, title = {Gesture Recognition System for Isolated Sign Language Signs}, author = {Stefanov, Kalin and Beskow, Jonas}, booktitle = {Proceedings of the European and Nordic Symposium on Multimodal Communication}, pages = {57--59}, year = {2016}, pdf = {stefanov2016gesture}, bibtex_show = {true}, preview = {paper.png} }

2015

Public Speaking Training with a Multimodal Interactive Virtual Audience Framework

Mathieu Chollet, Kalin Stefanov, Helmut Prendinger, and Stefan Scherer

In Proceedings of the ACM International Conference on Multimodal Interaction, 2015

Bib PDF

@inproceedings{10.1145/2818346.2823294,
  author = {Chollet, Mathieu and Stefanov, Kalin and Prendinger, Helmut and Scherer, Stefan},
  title = {Public Speaking Training with a Multimodal Interactive Virtual Audience Framework},
  year = {2015},
  doi = {10.1145/2818346.2823294},
  booktitle = {Proceedings of the ACM International Conference on Multimodal Interaction},
  pages = {367--368},
  pdf = {2818346.2823294.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

2014

Tutoring Robots

Samer Al Moubayed, Jonas Beskow, Bajibabu Bollepalli, Ahmed Hussen-Abdelaziz, Martin Johansson, Maria Koutsombogera, José David Lopes, Jekaterina Novikova, Catharine Oertel, Gabriel Skantze, Kalin Stefanov, and Gül Varol

In Innovative and Creative Developments in Multimodal Interaction Systems, 2014

Bib PDF

@inproceedings{10.1007/978-3-642-55143-7_4,
  author = {Al Moubayed, Samer and Beskow, Jonas and Bollepalli, Bajibabu and Hussen-Abdelaziz, Ahmed and Johansson, Martin and Koutsombogera, Maria and Lopes, Jos{\'e} David and Novikova, Jekaterina and Oertel, Catharine and Skantze, Gabriel and Stefanov, Kalin and Varol, G{\"u}l},
  title = {Tutoring Robots},
  booktitle = {Innovative and Creative Developments in Multimodal Interaction Systems},
  year = {2014},
  publisher = {Springer Berlin Heidelberg},
  pages = {80--113},
  isbn = {978-3-642-55143-7},
  pdf = {978-3-642-55143-7_4.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

Human-Robot Collaborative Tutoring using Multiparty Multimodal Spoken Dialogue

Samer Al Moubayed, Jonas Beskow, Bajibabu Bollepalli, Joakim Gustafson, Ahmed Hussen-Abdelaziz, Martin Johansson, Maria Koutsombogera, José David Lopes, Jekaterina Novikova, Catharine Oertel, Gabriel Skantze, Kalin Stefanov, and Gül Varol

In Proceedings of the ACM/IEEE International Conference on Human-Robot Interaction, 2014

Bib PDF

@inproceedings{8542606,
  author = {Moubayed, Samer Al and Beskow, Jonas and Bollepalli, Bajibabu and Gustafson, Joakim and Hussen-Abdelaziz, Ahmed and Johansson, Martin and Koutsombogera, Maria and Lopes, José David and Novikova, Jekaterina and Oertel, Catharine and Skantze, Gabriel and Stefanov, Kalin and Varol, Gül},
  booktitle = {Proceedings of the ACM/IEEE International Conference on Human-Robot Interaction},
  title = {Human-Robot Collaborative Tutoring using Multiparty Multimodal Spoken Dialogue},
  year = {2014},
  pages = {112--113},
  pdf = {8542606.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

The Tutorbot Corpus — A Corpus for Studying Tutoring Behaviour in Multiparty Face-to-Face Spoken Dialogue

Maria Koutsombogera, Samer Al Moubayed, Bajibabu Bollepalli, Ahmed Hussen Abdelaziz, Martin Johansson, José David Aguas Lopes, Jekaterina Novikova, Catharine Oertel, Kalin Stefanov, and Gül Varol

In Proceedings of the International Conference on Language Resources and Evaluation, 2014

Bib PDF

@inproceedings{L14-1641,
  title = {The Tutorbot Corpus {---} A Corpus for Studying Tutoring Behaviour in Multiparty Face-to-Face Spoken Dialogue},
  author = {Koutsombogera, Maria and Moubayed, Samer Al and Bollepalli, Bajibabu and Abdelaziz, Ahmed Hussen and Johansson, Martin and Lopes, Jos{\'e} David Aguas and Novikova, Jekaterina and Oertel, Catharine and Stefanov, Kalin and Varol, G{\"u}l},
  booktitle = {Proceedings of the International Conference on Language Resources and Evaluation},
  year = {2014},
  pages = {4196--4201},
  pdf = {L14-1641.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

A Data-Driven Approach to Detection of Interruptions in Human-Human Conversations

Raveesh Meena, Saeed Dabbaghchian, and Kalin Stefanov

In Proceedings of the FONETIK, 2014

Bib PDF

@inproceedings{774962,
  title = {A Data-Driven Approach to Detection of Interruptions in Human-Human Conversations},
  author = {Meena, Raveesh and Dabbaghchian, Saeed and Stefanov, Kalin},
  booktitle = {Proceedings of the FONETIK},
  pages = {29--32},
  year = {2014},
  pdf = {774962.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

Tivoli - Learning Signs Through Games and Interaction for Children with Communicative Disorders

Jonas Beskow, Simon Alexanderson, Kalin Stefanov, Britt Claesson, Sandra Derbring, Morgan Fredriksson, J. Starck, and E. Axelsson

In Proceedings of the Biennial Conference of the International Society for Augmentative and Alternative Communication, 2014

Bib

@inproceedings{beskow2014tivoli,
  title = {Tivoli - Learning Signs Through Games and Interaction for Children with Communicative Disorders},
  author = {Beskow, Jonas and Alexanderson, Simon and Stefanov, Kalin and Claesson, Britt and Derbring, Sandra and Fredriksson, Morgan and Starck, J. and Axelsson, E.},
  booktitle = {Proceedings of the Biennial Conference of the International Society for Augmentative and Alternative Communication},
  year = {2014},
  bibtex_show = {true},
  preview = {paper.png}
}

2013

A Kinect Corpus of Swedish Sign Language Signs

Kalin Stefanov, and Jonas Beskow

In Proceedings of the Workshop on Multimodal Corpora: Beyond Audio and Video, 2013

Bib PDF

@inproceedings{e2e46e0abff94d4d944bec6ab0553087,
  title = {A Kinect Corpus of Swedish Sign Language Signs},
  author = {Stefanov, Kalin and Beskow, Jonas},
  year = {2013},
  booktitle = {Proceedings of the Workshop on Multimodal Corpora: Beyond Audio and Video},
  pdf = {e2e46e0abff94d4d944bec6ab0553087.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

The Tivoli System–A Sign-driven Game for Children with Communicative Disorders

Jonas Beskow, Simon Alexanderson, Kalin Stefanov, Britt Claesson, Sandra Derbring, and Morgan Fredriksson

In Proceedings of the Symposium on Multimodal Communication, 2013

Bib

@inproceedings{beskow2013tivoli,
  title = {The Tivoli System--A Sign-driven Game for Children with Communicative Disorders},
  author = {Beskow, Jonas and Alexanderson, Simon and Stefanov, Kalin and Claesson, Britt and Derbring, Sandra and Fredriksson, Morgan},
  booktitle = {Proceedings of the Symposium on Multimodal Communication},
  year = {2013},
  bibtex_show = {true},
  preview = {paper.png}
}

Web-Enabled 3D Talking Avatars Based on WebGL and HTML5

Jonas Beskow, and Kalin Stefanov

In Proceedings of the International Conference on Intelligent Virtual Agents, 2013

Bib PDF

@inproceedings{3903,
  title = {Web-Enabled 3D Talking Avatars Based on WebGL and HTML5},
  author = {Beskow, Jonas and Stefanov, Kalin},
  booktitle = {Proceedings of the International Conference on Intelligent Virtual Agents},
  volume = {8108},
  pages = {486},
  year = {2013},
  pdf = {3903.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

2012

Multimodal Multiparty Social Interaction with the Furhat Head

Samer Al Moubayed, Gabriel Skantze, Jonas Beskow, Kalin Stefanov, and Joakim Gustafson

In Proceedings of the ACM International Conference on Multimodal Interaction, 2012

Bib PDF

@inproceedings{10.1145/2388676.2388736,
  author = {Al Moubayed, Samer and Skantze, Gabriel and Beskow, Jonas and Stefanov, Kalin and Gustafson, Joakim},
  title = {Multimodal Multiparty Social Interaction with the Furhat Head},
  year = {2012},
  doi = {10.1145/2388676.2388736},
  booktitle = {Proceedings of the ACM International Conference on Multimodal Interaction},
  pages = {293--294},
  pdf = {2388676.2388736.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}

Socially Aware Many-to-Machine Communication

Florian Eyben, Emer Gilmartin, Cyril Joder, Erik Marchi, Christian Munier, Kalin Stefanov, Felix Weninger, and Björn Schuller

In Proceedings of the International Summer Workshop on Multimodal Interfaces, 2012

Bib

@inproceedings{eyben2012socially,
  title = {Socially Aware Many-to-Machine Communication},
  author = {Eyben, Florian and Gilmartin, Emer and Joder, Cyril and Marchi, Erik and Munier, Christian and Stefanov, Kalin and Weninger, Felix and Schuller, Björn},
  booktitle = {Proceedings of the International Summer Workshop on Multimodal Interfaces},
  year = {2012},
  bibtex_show = {true},
  preview = {paper.png}
}

2011

D2. 4.3 Spreading Activation Components v3 - LarKC Project Deliverable

Maurice. Grinberg, Hristo. Stefanov, Kalin Stefanov, and Ivan Peikov

2011

Bib

@article{grinberg2011d2,
  title = {D2. 4.3 Spreading Activation Components v3 - LarKC Project Deliverable},
  author = {Grinberg, Maurice. and Stefanov, Hristo. and Stefanov, Kalin and Peikov, Ivan},
  year = {2011},
  bibtex_show = {true},
  preview = {paper.png}
}

2010

Webcam-based Eye Gaze Tracking under Natural Head Movement

Kalin Stefanov

University of Amsterdam, 2010

Bib PDF

@mastersthesis{1803.11088,
  author = {Stefanov, Kalin},
  title = {Webcam-based Eye Gaze Tracking under Natural Head Movement},
  school = {University of Amsterdam},
  year = {2010},
  eprint = {1803.11088},
  archiveprefix = {arXiv},
  pdf = {1803.11088.pdf},
  bibtex_show = {true},
  preview = {paper.png}
}