2022
|
Quamer, W.; Das, A.; Levis, J.; Chukharev-Hudilainen, E.; Gutierrez-Osuna, R. Zero-Shot Foreign Accent Conversion without a Native Reference Proceedings Article Forthcoming In: Proc. Interspeech, Forthcoming. @inproceedings{waris2022interspeech,
title = {Zero-Shot Foreign Accent Conversion without a Native Reference},
author = {W. Quamer and A. Das and J. Levis and E. Chukharev-Hudilainen and R. Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2023/02/quamer2022interspeech.pdf},
year = {2022},
date = {2022-09-18},
urldate = {2022-09-18},
booktitle = {Proc. Interspeech},
keywords = {Accent conversion, Deep learning, Speech},
pubstate = {forthcoming},
tppubtype = {inproceedings}
}
|
Yang, M.; Dave, D.; Erraguntla, M.; Cote, G.; Gutierrez-Osuna, R. Joint hypoglycemia prediction and glucose forecasting via deep multi-task learning Proceedings Article In: Proc. ICASSP, 2022. @inproceedings{mu2022icassp,
title = {Joint hypoglycemia prediction and glucose forecasting via deep multi-task learning},
author = {M. Yang and D. Dave and M. Erraguntla and G. Cote and R. Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2023/07/yang2022icassp_hypoglycemia.pdf
https://ieeexplore.ieee.org/document/9746129},
year = {2022},
date = {2022-05-22},
urldate = {2022-05-22},
booktitle = {Proc. ICASSP},
keywords = {Continuous glucose monitors, Deep learning, Health, Wearable sensors},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2021
|
Ding, S.; Zhao, G.; Gutierrez-Osuna, R. Accentron: Foreign accent conversion to arbitrary non-native speakers using zero-shot learning Journal Article In: Computer Speech & Language, 2021. @article{shaojin2021accentron,
title = {Accentron: Foreign accent conversion to arbitrary non-native speakers using zero-shot learning},
author = {S. Ding and G. Zhao and R. Gutierrez-Osuna},
url = {https://www.sciencedirect.com/science/article/pii/S0885230821001029
https://psi.engr.tamu.edu/wp-content/uploads/2021/10/1-s2.0-S0885230821001029-main.pdf},
year = {2021},
date = {2021-10-14},
urldate = {2021-10-14},
journal = {Computer Speech & Language},
keywords = {Accent conversion, Deep learning, Speech},
pubstate = {published},
tppubtype = {article}
}
|
Yang, M.; Paromita, P.; Chaspari, T.; Das, A.; Sajjadi, S.; Mortazavi, B. J.; Gutierrez-Osuna, R. A Metric Learning Approach for Personalized Meal Macronutrient Estimation from Postprandial Glucose Response Signals Proceedings Article In: Proc. IEEE/EMBS Intl. Conf. Biomedical And Health Informatics (BHI 2021)., 2021. @inproceedings{theodora2021bhi,
title = {A Metric Learning Approach for Personalized Meal Macronutrient Estimation from Postprandial Glucose Response Signals},
author = {M. Yang and P. Paromita and T. Chaspari and A. Das and S. Sajjadi and B. J. Mortazavi and R. Gutierrez-Osuna},
url = {http://ceur-ws.org/Vol-2903/IUI21WS-HEALTHI-10.pdf
https://psi.engr.tamu.edu/wp-content/uploads/2022/01/IUI21WS-HEALTHI-10.pdf},
year = {2021},
date = {2021-07-27},
urldate = {2021-07-27},
booktitle = {Proc. IEEE/EMBS Intl. Conf. Biomedical And Health Informatics (BHI 2021).},
keywords = {Chemical sensors, Continuous glucose monitors, Deep learning, Health, Wearable sensors},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Zhao, G.; Ding, S.; Gutierrez-Osuna, R. Converting Foreign Accent Speech Without a Reference Journal Article In: IEEE/ACM Transactions on Audio, Speech, and Language Processing, vol. 29, pp. 2367, 2021. @article{guanlong2021reference-free,
title = {Converting Foreign Accent Speech Without a Reference},
author = {G. Zhao and S. Ding and R. Gutierrez-Osuna},
url = {https://ieeexplore.ieee.org/abstract/document/9477581
https://psi.engr.tamu.edu/wp-content/uploads/2021/08/zhao2021reference.pdf},
year = {2021},
date = {2021-07-01},
urldate = {2021-07-01},
journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
volume = {29},
pages = {2367},
keywords = {Accent conversion, Deep learning, Speech},
pubstate = {published},
tppubtype = {article}
}
|
Paromita, P.; Chaspari, T.; Sajjadi, S.; Das, A.; Mortazavi, B. J.; Gutierrez-Osuna, R. Personalized Meal Classification Using Continuous Glucose Monitors Proceedings Article In: In Proc. IUI HEALTHI Workshop, 2021. @inproceedings{psyche2021healthi,
title = {Personalized Meal Classification Using Continuous Glucose Monitors},
author = {P. Paromita and T. Chaspari and S. Sajjadi and A. Das and B. J. Mortazavi and R. Gutierrez-Osuna},
year = {2021},
date = {2021-04-13},
urldate = {2021-04-13},
booktitle = {In Proc. IUI HEALTHI Workshop},
keywords = {Chemical sensors, Continuous glucose monitors, Deep learning, Health, Wearable sensors},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2020
|
Ding, S.; Zhao, G.; Gutierrez-Osuna, R. Improving the Speaker Identity of Non-Parallel Many-to-Many Voice Conversion with Adversarial Speaker Recognition Proceedings Article In: Proc. Interspeech, 2020. @inproceedings{shaojin-2020-interspeech,
title = {Improving the Speaker Identity of Non-Parallel Many-to-Many Voice Conversion with Adversarial Speaker Recognition},
author = {S. Ding and G. Zhao and R. Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2020/08/IS2020_shaojin_Adversarial_speaker_classifier_camera_ready.pdf},
year = {2020},
date = {2020-10-25},
urldate = {2020-10-25},
booktitle = {Proc. Interspeech},
keywords = {Accent conversion, Deep learning, Speech},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Das, A.; Zhao, G.; Levis, J.; Chukharev-Hudilainen, E.; Gutierrez-Osuna, R. Understanding the Effect of Voice Quality and Accent on Talker Similarity Proceedings Article In: Proc. Interspeech, 2020. @inproceedings{anurag-2020-interspeech,
title = {Understanding the Effect of Voice Quality and Accent on Talker Similarity},
author = {A. Das and G. Zhao and J. Levis and E. Chukharev-Hudilainen and R. Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2023/02/das2020interspeech.pdf},
year = {2020},
date = {2020-10-24},
urldate = {2020-10-24},
booktitle = {Proc. Interspeech},
keywords = {Accent conversion, Deep learning, Speech},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2019
|
Zhao, G; Ding, S.; Gutierrez-Osuna, Ricardo Foreign Accent Conversion by Synthesizing Speech from Phonetic Posteriorgrams Proceedings Article In: Proc. Interspeech, 2019. @inproceedings{guanlong2019-interspeech,
title = {Foreign Accent Conversion by Synthesizing Speech from Phonetic Posteriorgrams},
author = {G Zhao and S. Ding and Ricardo Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2019/07/zhao2019interspeech.pdf},
year = {2019},
date = {2019-09-15},
urldate = {2019-09-15},
booktitle = {Proc. Interspeech},
keywords = {Accent conversion, Deep learning, Speech},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Zhao, G; Gutierrez-Osuna, R Using Phonetic Posteriorgram Based Frame Pairing for Segmental Accent Conversion Journal Article In: IEEE/ACM Transactions on Audio, Speech, and Language Processing, vol. 27, no. 10, pp. 1649-1660, 2019, ISSN: 2329-9290. @article{zhao-2019-taslp,
title = {Using Phonetic Posteriorgram Based Frame Pairing for Segmental Accent Conversion},
author = {G Zhao and R Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2020/04/zhao2019taslp.pdf},
doi = {10.1109/TASLP.2019.2926754},
issn = {2329-9290},
year = {2019},
date = {2019-07-04},
urldate = {2019-07-04},
journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
volume = {27},
number = {10},
pages = {1649-1660},
abstract = {Accent conversion (AC) aims to transform non-native utterances to sound as if the speaker had a native accent. This can be achieved by mapping source speech spectra from a native speaker into the acoustic space of the target non-native speaker. In prior work, we proposed an AC approach that matches frames between the two speakers based on their acoustic similarity after compensating for differences in vocal tract length. In this paper, we propose a new approach that matches frames between the two speakers based on their phonetic (rather than acoustic) similarity. Namely, we map frames from the two speakers into a phonetic posteriorgram using speaker-independent acoustic models trained on native speech. We thoroughly evaluate the approach on a speech corpus containing multiple native and non-native speakers. The proposed algorithm outperforms the prior approach, improving ratings of acoustic quality (22% increase in mean opinion score) and native accent (69% preference) while retaining the voice quality of the non-native speaker. Furthermore, we show that the approach can be used in the reverse conversion direction, i.e., generating speech with a native speaker's voice quality and a non-native accent. Finally, we show that this approach can be applied to non-parallel training data, achieving the same accent conversion performance.},
keywords = {Accent conversion, Deep learning, Speech},
pubstate = {published},
tppubtype = {article}
}
Accent conversion (AC) aims to transform non-native utterances to sound as if the speaker had a native accent. This can be achieved by mapping source speech spectra from a native speaker into the acoustic space of the target non-native speaker. In prior work, we proposed an AC approach that matches frames between the two speakers based on their acoustic similarity after compensating for differences in vocal tract length. In this paper, we propose a new approach that matches frames between the two speakers based on their phonetic (rather than acoustic) similarity. Namely, we map frames from the two speakers into a phonetic posteriorgram using speaker-independent acoustic models trained on native speech. We thoroughly evaluate the approach on a speech corpus containing multiple native and non-native speakers. The proposed algorithm outperforms the prior approach, improving ratings of acoustic quality (22% increase in mean opinion score) and native accent (69% preference) while retaining the voice quality of the non-native speaker. Furthermore, we show that the approach can be used in the reverse conversion direction, i.e., generating speech with a native speaker's voice quality and a non-native accent. Finally, we show that this approach can be applied to non-parallel training data, achieving the same accent conversion performance. |
2018
|
Zhao, G; Sonsaat, S; Levis, J; Chukharev-Hudilainen, E; Gutierrez-Osuna, R Accent conversion using phonetic posteriorgrams Proceedings Article In: Proc. ICASSP, 2018. @inproceedings{zhao2018icassp,
title = {Accent conversion using phonetic posteriorgrams},
author = {G Zhao and S Sonsaat and J Levis and E Chukharev-Hudilainen and R Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2018/03/zhao2018icassp.pdf
http://people.tamu.edu/~guanlong.zhao/icassp18_demo.html
https://psi.engr.tamu.edu/l2-arctic-corpus/
https://github.com/guanlongzhao/ppg-gmm},
year = {2018},
date = {2018-04-15},
urldate = {2018-04-15},
booktitle = {Proc. ICASSP},
keywords = {Accent conversion, Deep learning, Speech},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2016
|
Aryal, S; Gutierrez-Osuna, R Data driven articulatory synthesis with deep neural networks Journal Article In: Computer Speech and Language, vol. 36, pp. 260-273, 2016. @article{aryal-2015-cls,
title = {Data driven articulatory synthesis with deep neural networks},
author = {S Aryal and R Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2018/01/aryal2016csl.pdf},
year = {2016},
date = {2016-03-01},
urldate = {2016-03-01},
journal = {Computer Speech and Language},
volume = {36},
pages = {260-273},
keywords = {Accent conversion, Articulatory synthesis, Deep learning, Speech},
pubstate = {published},
tppubtype = {article}
}
|
2015
|
Aryal, S; Gutierrez-Osuna, R Articulatory-based conversion of foreign accents with deep neural networks Proceedings Article In: Proc. Interspeech, pp. 3385-3389, 2015. @inproceedings{aryal2015interspeech,
title = {Articulatory-based conversion of foreign accents with deep neural networks},
author = {S Aryal and R Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2018/01/aryal2015interspeech.pdf},
year = {2015},
date = {2015-09-06},
urldate = {2015-09-06},
booktitle = {Proc. Interspeech},
pages = {3385-3389},
keywords = {Accent conversion, Articulatory synthesis, Deep learning, Speech},
pubstate = {published},
tppubtype = {inproceedings}
}
|