2016
|
Aryal, S; Gutierrez-Osuna, R Comparing Articulatory and Acoustic Strategies for Reducing Non-Native Accents Proceedings Article In: Proc. Interspeech, 2016. @inproceedings{aryal-2016-interspeech,
title = {Comparing Articulatory and Acoustic Strategies for Reducing Non-Native Accents},
author = {S Aryal and R Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2018/01/aryal2016interspeech.pdf},
year = {2016},
date = {2016-09-08},
booktitle = {Proc. Interspeech},
keywords = {Accent conversion, Articulatory synthesis, Speech},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Liberatore, C; Gutierrez-Osuna, R Generating Gestural Scores from Acoustics Through a Sparse Anchor-Based Representation of Speech Proceedings Article In: Proc. Interspeech, 2016. @inproceedings{liberatore2016interspeech,
title = {Generating Gestural Scores from Acoustics Through a Sparse Anchor-Based Representation of Speech},
author = {C Liberatore and R Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2018/01/liberatore2016interspeech.pdf},
year = {2016},
date = {2016-09-08},
booktitle = {Proc. Interspeech},
keywords = {Articulatory synthesis, Speech},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Aryal, S; Gutierrez-Osuna, R Data driven articulatory synthesis with deep neural networks Journal Article In: Computer Speech and Language, vol. 36, pp. 260-273, 2016. @article{aryal-2015-cls,
title = {Data driven articulatory synthesis with deep neural networks},
author = {S Aryal and R Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2018/01/aryal2016csl.pdf},
year = {2016},
date = {2016-03-01},
urldate = {2016-03-01},
journal = {Computer Speech and Language},
volume = {36},
pages = {260-273},
keywords = {Accent conversion, Articulatory synthesis, Deep learning, Speech},
pubstate = {published},
tppubtype = {article}
}
|
2015
|
Aryal, S; Gutierrez-Osuna, R Articulatory-based conversion of foreign accents with deep neural networks Proceedings Article In: Proc. Interspeech, pp. 3385-3389, 2015. @inproceedings{aryal2015interspeech,
title = {Articulatory-based conversion of foreign accents with deep neural networks},
author = {S Aryal and R Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2018/01/aryal2015interspeech.pdf},
year = {2015},
date = {2015-09-06},
urldate = {2015-09-06},
booktitle = {Proc. Interspeech},
pages = {3385-3389},
keywords = {Accent conversion, Articulatory synthesis, Deep learning, Speech},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Liberatore, C; Aryal, S; Wang, Z; Polsley, S; Gutierrez-Osuna, R SABR: Sparse, Anchor-Based Representation of the Speech Signal Proceedings Article In: Proc. Interspeech 2015, pp. 608-612, 2015. @inproceedings{liberatore2015interspeech,
title = {SABR: Sparse, Anchor-Based Representation of the Speech Signal},
author = {C Liberatore and S Aryal and Z Wang and S Polsley and R Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2018/01/liberatore2015interspeech.pdf},
year = {2015},
date = {2015-09-06},
booktitle = {Proc. Interspeech 2015},
pages = {608-612},
abstract = {We present SABR (Sparse, Anchor-Based Representation), an analysis technique to decompose the speech signal into
speaker-dependent and speaker-independent components. Given a collection of utterances for a particular
speaker, SABR uses the centroid for each phoneme as an acoustic “anchor,” then applies Lasso regularization to
the method on a speaker-independent phoneme recognition task and a voice conversion task. Using a linear classifier,
SABR weights achieve significantly higher phoneme recognition rates than Mel frequency Cepstral coefficients. SABR weights can also be used directly to perform accent conversion without the need to train a speakerto-speaker regression model.},
keywords = {Accent conversion, Articulatory synthesis},
pubstate = {published},
tppubtype = {inproceedings}
}
We present SABR (Sparse, Anchor-Based Representation), an analysis technique to decompose the speech signal into
speaker-dependent and speaker-independent components. Given a collection of utterances for a particular
speaker, SABR uses the centroid for each phoneme as an acoustic “anchor,” then applies Lasso regularization to
the method on a speaker-independent phoneme recognition task and a voice conversion task. Using a linear classifier,
SABR weights achieve significantly higher phoneme recognition rates than Mel frequency Cepstral coefficients. SABR weights can also be used directly to perform accent conversion without the need to train a speakerto-speaker regression model. |
Liberatore, C; Gutierrez-Osuna, R Joint Optimization of Anatomical and Gestural Parameters in a Physical Vocal Tract Model Proceedings Article In: ICASSP, IEEE 2015. @inproceedings{liberatore2015icassp,
title = {Joint Optimization of Anatomical and Gestural Parameters in a Physical Vocal Tract Model},
author = {C Liberatore and R Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2018/01/liberatore2015icassp.pdf},
year = {2015},
date = {2015-04-19},
booktitle = {ICASSP},
organization = {IEEE},
keywords = {Accent conversion, Articulatory inversion, Articulatory synthesis},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Aryal, S; Gutierrez-Osuna, R Reduction of non-native accents through statistical parametric articulatory synthesis Journal Article In: Journal of the Acoustical Society of America, vol. 137, no. 1, pp. 433-446, 2015. @article{aryal2015jasa,
title = {Reduction of non-native accents through statistical parametric articulatory synthesis},
author = {S Aryal and R Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2018/01/aryal2015jasa.pdf},
year = {2015},
date = {2015-01-23},
journal = {Journal of the Acoustical Society of America},
volume = {137},
number = {1},
pages = {433-446},
keywords = {Accent conversion, Articulatory synthesis, Speech},
pubstate = {published},
tppubtype = {article}
}
|
2014
|
Aryal, S; Gutierrez-Osuna, R Accent conversion through cross-speaker articulatory synthesis Proceedings Article In: Proc. 39th International Conference on Acoustics, Speech, and Signal Processing (ICASSP), pp. 7744-7748, 2014. @inproceedings{sandesh2014icassp,
title = {Accent conversion through cross-speaker articulatory synthesis},
author = {S Aryal and R Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2018/01/sandesh2014icassp.pdf},
year = {2014},
date = {2014-05-09},
booktitle = {Proc. 39th International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
pages = {7744-7748},
keywords = {Accent conversion, Articulatory synthesis, Speech},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Felps, D; Aryal, S; Gutierrez-Osuna, R Normalization of articulatory data through Procrustes transformations and analysis-by-synthesis Proceedings Article In: Proc. 39th International Conference on Acoustics, Speech, and Signal Processing (ICASSP), pp. 3051-3055, 2014. @inproceedings{danielprocrustes2014icassp,
title = {Normalization of articulatory data through Procrustes transformations and analysis-by-synthesis},
author = {D Felps and S Aryal and R Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2018/01/danielprocrustes2014icassp.pdf},
year = {2014},
date = {2014-05-09},
booktitle = {Proc. 39th International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
pages = {3051-3055},
keywords = {Accent conversion, Articulatory synthesis, Speech},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2013
|
Aryal, S; Gutierrez-Osuna, R Articulatory inversion and synthesis: towards articulatory-based modification of speech Proceedings Article In: 38th International Conference on Acoustics, Speech, and Signal Processing (ICASSP), pp. 7952-7956, 2013. @inproceedings{aryal2013icassp,
title = {Articulatory inversion and synthesis: towards articulatory-based modification of speech},
author = {S Aryal and R Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2018/01/aryal2013icassp.pdf},
year = {2013},
date = {2013-02-28},
booktitle = {38th International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
pages = {7952-7956},
keywords = {Articulatory inversion, Articulatory synthesis, Speech},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2012
|
Aryal, S; Gutierrez-Osuna, R Articulatory Inversion and Synthesis: Towards Articulatory-Based Modification of Speech Technical Report 2012. @techreport{aryal2012techreport,
title = {Articulatory Inversion and Synthesis: Towards Articulatory-Based Modification of Speech},
author = {S Aryal and R Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2018/01/aryal2012techreport.pdf},
year = {2012},
date = {2012-12-04},
abstract = {Certain speech modifications, such as changes in foreign/regional accents or articulatory styles, are performed more effectively in the articulatory domain than in the acoustic domain. Though measuring articulators is cumbersome, articulatory parameters may be estimated from acoustics through inversion. In this paper, we study the impact on synthesis quality when articulators predicted from acoustics are used in articulatory synthesis. For this purpose, we trained a GMM articulatory synthesizer and drove it with articulators predicted with an RBF-based inversion model. Using inverted instead of measured articulators degraded synthesis quality, as measured through Mel cepstral distortion and subjective tests. However, retraining the synthesizer with predicted articulators not only reversed the effect of errors introduced during inversion but also improved synthesis quality relative to using measured articulators. These results suggest that inverted articulators do not compromise synthesis quality, and open up the possibility of performing speech modification in the articulatory domain through inversion.},
keywords = {Articulatory inversion, Articulatory synthesis, Speech},
pubstate = {published},
tppubtype = {techreport}
}
Certain speech modifications, such as changes in foreign/regional accents or articulatory styles, are performed more effectively in the articulatory domain than in the acoustic domain. Though measuring articulators is cumbersome, articulatory parameters may be estimated from acoustics through inversion. In this paper, we study the impact on synthesis quality when articulators predicted from acoustics are used in articulatory synthesis. For this purpose, we trained a GMM articulatory synthesizer and drove it with articulators predicted with an RBF-based inversion model. Using inverted instead of measured articulators degraded synthesis quality, as measured through Mel cepstral distortion and subjective tests. However, retraining the synthesizer with predicted articulators not only reversed the effect of errors introduced during inversion but also improved synthesis quality relative to using measured articulators. These results suggest that inverted articulators do not compromise synthesis quality, and open up the possibility of performing speech modification in the articulatory domain through inversion. |
2010
|
Felps, D; Gutierrez-Osuna, R Normalization of Articulatory Data through Procrustes Transformations and Analysis-by-synthesis Technical Report 2010. @techreport{felps2010techreport,
title = {Normalization of Articulatory Data through Procrustes Transformations and Analysis-by-synthesis},
author = {D Felps and R Gutierrez-Osuna},
url = {https://psi.engr.tamu.edu/wp-content/uploads/2018/01/felps2010techreport.pdf},
year = {2010},
date = {2010-05-05},
abstract = {We describe and compare three methods that can be used to normalize articulatory data across speakers. The methods seek to explain systematic anatomical differences between a source and target speaker without modifying the articulatory velocities of the source speaker. The first method is the classical Procrustes transform, which allows for a global translation, rotation, and scaling of articulator positions. An extension to the Procrustes transform is presented that allows independent translations of each articulator. The additional parameters provide a 35% increase in articulatory similarity between two speakers when compared to classical Procrustes. The proposed extension is also coupled with a data-driven articulatory synthesizer to select model parameters that best explain the predicted acoustic (rather than articulatory) differences.},
keywords = {Articulatory synthesis, Speech},
pubstate = {published},
tppubtype = {techreport}
}
We describe and compare three methods that can be used to normalize articulatory data across speakers. The methods seek to explain systematic anatomical differences between a source and target speaker without modifying the articulatory velocities of the source speaker. The first method is the classical Procrustes transform, which allows for a global translation, rotation, and scaling of articulator positions. An extension to the Procrustes transform is presented that allows independent translations of each articulator. The additional parameters provide a 35% increase in articulatory similarity between two speakers when compared to classical Procrustes. The proposed extension is also coupled with a data-driven articulatory synthesizer to select model parameters that best explain the predicted acoustic (rather than articulatory) differences. |