In this research, we will focus on the following issues: 1) Multiple simultaneous interpretation methods: multiple simultaneous interpretation using paralinguistic speech translation, video, prior and external knowledge, interpretation output optimization, and progressive voice interpretation method advancement; 2) Evaluation methods and real-time evaluation techniques for interpretation quality: interpretation process analysis, interpreter support techniques, common methods for interpreters and automatic interpretation systems, and the establishment of an objective automatic evaluation method for interpretation quality using sensing, including brain activity; and 3) Establishment of a real-time interpretation quality evaluation method using a real-time interpretation quality evaluation system. Establishment of an objective automatic evaluation method for interpreting quality using sensing including brain activity. Task 3: Corpus construction and systems: Establishment of corpus alignment, quality annotation, corpus augmentation, construction of operational systems, construction of an ecosystem for data collection and improvement, and establishment of active learning and life-long learning methods.
@article{xin24access_jvnv-corpus,title={{JVNV}: A Corpus of Japanese Emotional Speech with Verbal Content and Nonverbal Expressions},author={Xin, Detai and Jiang, Junfeng and Takamichi, Shinnosuke and Saito, Yuki and Aizawa, Akiko and Saruwatari, Hiroshi},year={2024},journal={IEEE Access},}
@article{xin24specom_jnv-corpus,title={{JNV} Corpus: A Corpus of Japanese Nonverbal Vocalizations with Diverse Phrases and Emotions},author={Xin, Detai and Takamichi, Shinnosuke and Saruwatari, Hiroshi},year={2024},journal={Speech Communication},}
@article{luo24apsipa-trans_emotion-synthesis,title={Emotion-controllable Speech Synthesis using Emotion Soft Label, Utterance-level Prosodic Factors, and Word-level Prominence},author={Luo, Xuan and Takamichi, Shinnosuke and Saito, Yuki and Koriyama, Tomoki and Saruwatari, Hiroshi},year={2024},journal={APSIPA Transactions},}
@article{saeki24taslp_text-inductive-tts,title={Text-Inductive Graphone-Based Language Adaptation for Low-Resource Speech Synthesis},author={Saeki, Takaaki and Maiti, Soumi and Li, Xinjian and Watanabe, Shinji and Takamichi, Shinnosuke and Saruwatari, Hiroshi},year={2024},journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing}}
@inproceedings{nakano23slt_visual-text-to-speech,abbr_publisher={Proceedings of IEEE Spoken Language Technology Workshop (SLT)},booktitle={Proceedings of IEEE Spoken Language Technology Workshop (SLT)},title={{vTTS}: visual-text to speech},author={Nakano, Yoshifumi and Saeki, Takaaki and Takamichi, Shinnosuke and Sudoh, Katsuhito and Saruwatari, Hiroshi},year={2023}}
@inproceedings{saeki23ijcai_learning-to-speak,abbr_publisher={Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI)},booktitle={Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI)},title={Learning to Speak from Text: Zero-Shot Multilingual Text-to-Speech with Unsupervised Text Pretraining},author={Saeki, Takaaki and Maiti, Soumi and Li, Xinjian and Watanabe, Shinji and Takamichi, Shinnosuke and Saruwatari, Hiroshi},year={2023}}