{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T02:28:46Z","timestamp":1760149726251,"version":"build-2065373602"},"reference-count":97,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2023,8,4]],"date-time":"2023-08-04T00:00:00Z","timestamp":1691107200000},"content-version":"am","delay-in-days":215,"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/http\/www.elsevier.com\/open-access\/userlicense\/1.0\/"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/100010665","name":"H2020 Marie Sk\u0142odowska-Curie Actions","doi-asserted-by":"publisher","award":["101007666"],"award-info":[{"award-number":["101007666"]}],"id":[{"id":"10.13039\/100010665","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100005715","name":"Le Mans Universit\u00e9","doi-asserted-by":"publisher","award":["ANR-19-CE23-0001-01","ARN-17-CHR2-0004-01"],"award-info":[{"award-number":["ANR-19-CE23-0001-01","ARN-17-CHR2-0004-01"]}],"id":[{"id":"10.13039\/501100005715","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010661","name":"Horizon 2020 Framework Programme","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Computer Speech &amp; Language"],"published-print":{"date-parts":[[2023,1]]},"DOI":"10.1016\/j.csl.2022.101437","type":"journal-article","created":{"date-parts":[[2022,7,27]],"date-time":"2022-07-27T11:34:37Z","timestamp":1658921677000},"page":"101437","update-policy":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Towards lifelong human assisted speaker diarization"],"prefix":"10.1016","volume":"77","author":[{"ORCID":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/orcid.org\/0000-0002-4104-9826","authenticated-orcid":false,"given":"Meysam","family":"Shamsi","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/orcid.org\/0000-0003-4398-0224","authenticated-orcid":false,"given":"Anthony","family":"Larcher","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/orcid.org\/0000-0002-0634-6147","authenticated-orcid":false,"given":"Loic","family":"Barrault","sequence":"additional","affiliation":[]},{"given":"Sylvain","family":"Meignier","sequence":"additional","affiliation":[]},{"given":"Yevheni","family":"Prokopalo","sequence":"additional","affiliation":[]},{"given":"Marie","family":"Tahon","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/orcid.org\/0000-0003-4240-9915","authenticated-orcid":false,"given":"Ambuj","family":"Mehrish","sequence":"additional","affiliation":[]},{"given":"Simon","family":"Petitrenaud","sequence":"additional","affiliation":[]},{"given":"Olivier","family":"Galibert","sequence":"additional","affiliation":[]},{"given":"Samuel","family":"Gaist","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/orcid.org\/0000-0001-7248-4014","authenticated-orcid":false,"given":"Andr\u00e9","family":"Anjos","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/orcid.org\/0000-0002-2497-9140","authenticated-orcid":false,"given":"Sebastien","family":"Marcel","sequence":"additional","affiliation":[]},{"given":"Marta R.","family":"Costa-juss\u00e0","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.csl.2022.101437_b1","series-title":"2019 8th International Conference on Affective Computing and Intelligent Interaction (ACII)","first-page":"1","article-title":"Active learning for speech emotion recognition using deep neural network","author":"Abdelwahab","year":"2019"},{"issue":"2","key":"10.1016\/j.csl.2022.101437_b2","doi-asserted-by":"crossref","first-page":"356","DOI":"10.1109\/TASL.2011.2125954","article-title":"Speaker diarization: A review of recent research","volume":"20","author":"Anguera","year":"2012","journal-title":"IEEE Trans. Audio, Speech, Lang. Process."},{"key":"10.1016\/j.csl.2022.101437_b3","series-title":"2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"4002","article-title":"Inter dataset variability compensation for speaker recognition","author":"Aronowitz","year":"2014"},{"year":"2020","series-title":"Efficient active learning for automatic speech recognition via augmented consistency regularization","author":"Bang","key":"10.1016\/j.csl.2022.101437_b4"},{"key":"10.1016\/j.csl.2022.101437_b5","doi-asserted-by":"crossref","unstructured":"Barker, J., Watanabe, S., Vincent, E., Trmal, J., 2018. The fifth CHiME speech separation and recognition challenge: dataset, task and baselines. In: Annual Conference of the International Speech Communication Association (INTERSPEECH), pp. 1561\u20131565.","DOI":"10.21437\/Interspeech.2018-1768"},{"issue":"5","key":"10.1016\/j.csl.2022.101437_b6","doi-asserted-by":"crossref","first-page":"1505","DOI":"10.1109\/TASL.2006.878261","article-title":"Multistage speaker diarization of broadcast news","volume":"14","author":"Barras","year":"2006","journal-title":"IEEE Trans. Audio, Speech, Lang. Process."},{"key":"10.1016\/j.csl.2022.101437_b7","series-title":"2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)","first-page":"687","article-title":"The MGB challenge: Evaluating multi-genre broadcast media recognition","author":"Bell","year":"2015"},{"key":"10.1016\/j.csl.2022.101437_b8","series-title":"InterSpeech","article-title":"On robustness of unsupervised domain adaptation for speaker recognition","author":"Bousquet","year":"2019"},{"key":"10.1016\/j.csl.2022.101437_b9","doi-asserted-by":"crossref","unstructured":"Bredin,\u00a0H., 2017. pyannote.metrics: a toolkit for reproducible evaluation, diagnostic, and error analysis of speaker diarization systems. In: 18th Annual Conference of the International Speech Communication Association. Stockholm, Sweden.","DOI":"10.21437\/Interspeech.2017-411"},{"key":"10.1016\/j.csl.2022.101437_b10","series-title":"ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"7124","article-title":"Pyannote. audio: neural building blocks for speaker diarization","author":"Bredin","year":"2020"},{"key":"10.1016\/j.csl.2022.101437_b11","doi-asserted-by":"crossref","unstructured":"Broux, P.-A., Desnous, F., Larcher, A., Petitrenaud, S., Carrive, J., Meignier, S., 2018. S4D: Speaker diarization toolkit in python. In: Annual Conference of the International Speech Communication Association (INTERSPEECH), pp. 1368\u20131372.","DOI":"10.21437\/Interspeech.2018-1232"},{"key":"10.1016\/j.csl.2022.101437_b12","unstructured":"Broux, P.-A., Doukhan, D., Petitrenaud, S., Meignier, S., Carrive, J., 2018. Computer-assisted speaker diarization: How to evaluate human corrections. In: LREC 2018, Eleventh International Conference on Language Resources and Evaluation."},{"key":"10.1016\/j.csl.2022.101437_b13","article-title":"CALLHOME American english speech LDC97s42","author":"Canavan","year":"1997","journal-title":"Linguistic Data Consortium"},{"year":"2021","series-title":"Towards lifelong learning of end-to-end ASR","author":"Chang","key":"10.1016\/j.csl.2022.101437_b14"},{"article-title":"What makes a speaker recognizable in TV broadcast? Going beyond speaker identification error rate","year":"2015","author":"Charlet","key":"10.1016\/j.csl.2022.101437_b15"},{"issue":"3","key":"10.1016\/j.csl.2022.101437_b16","first-page":"1","article-title":"Lifelong machine learning","volume":"12","author":"Chen","year":"2018","journal-title":"Synth. Lectures Artif. Intell. Mach. Learn."},{"key":"10.1016\/j.csl.2022.101437_b17","series-title":"ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"7284","article-title":"Continuous speech separation: dataset and analysis","author":"Chen","year":"2020"},{"key":"10.1016\/j.csl.2022.101437_b18","doi-asserted-by":"crossref","unstructured":"Chung, J.S., Huh, J., Nagrani, A., Afouras, T., Zisserman, A., 2020. Spot the conversation: speaker diarisation in the wild. In: Annual Conference of the International Speech Communication Association (INTERSPEECH), pp. 299\u2013303.","DOI":"10.21437\/Interspeech.2020-2337"},{"year":"2021","series-title":"Ecapa-TDNN embeddings for speaker diarization","author":"Dawalatabad","key":"10.1016\/j.csl.2022.101437_b19"},{"key":"10.1016\/j.csl.2022.101437_b20","doi-asserted-by":"crossref","unstructured":"Dehak, N., Dehak, R., Kenny, P., Brummer, N., Ouellet, P., Dumouchel, P., 2009. Support vector machines versus fast scoring in the low-dimensional total variability space for speaker verification. In: Tenth Annual Conference of the International Speech Communication Association (INTERSPEECH), pp. 1559\u20131562.","DOI":"10.21437\/Interspeech.2009-385"},{"key":"10.1016\/j.csl.2022.101437_b21","series-title":"The Speaker and Language Recognition Workshop (Odyssey","first-page":"147","article-title":"Speaker diarization based on Bayesian HMM with eigenvoice priors","author":"Diez","year":"2018"},{"key":"10.1016\/j.csl.2022.101437_b22","doi-asserted-by":"crossref","unstructured":"Dimitriadis, D., Fousek, P., 2017. Developing on-line speaker diarization system. In: Annual Conference of the International Speech Communication Association (INTERSPEECH), pp. 2739\u20132743.","DOI":"10.21437\/Interspeech.2017-166"},{"key":"10.1016\/j.csl.2022.101437_b23","doi-asserted-by":"crossref","unstructured":"Dupuy, G., Meignier, S., Esteve, Y., 2014. Is incremental cross-show speaker diarization efficient for processing large volumes of data? In: Annual Conference of the International Speech Communication Association (INTERSPEECH), pp. 587\u2013591.","DOI":"10.21437\/Interspeech.2014-142"},{"key":"10.1016\/j.csl.2022.101437_b24","doi-asserted-by":"crossref","unstructured":"El\u00a0Shafey, L., Soltau, H., Shafran, I., 2019. Joint speech recognition and speaker diarization via sequence transduction. In: Annual Conference of the International Speech Communication Association (INTERSPEECH), pp. 396\u2013400.","DOI":"10.21437\/Interspeech.2019-1943"},{"year":"2021","series-title":"Aishell-4: An open source dataset for speech enhancement, separation, recognition and speaker diarization in conference scenario","author":"Fu","key":"10.1016\/j.csl.2022.101437_b25"},{"key":"10.1016\/j.csl.2022.101437_b26","series-title":"IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","first-page":"296","article-title":"End-to-end neural speaker diarization with self-attention","author":"Fujita","year":"2019"},{"key":"10.1016\/j.csl.2022.101437_b27","series-title":"INTERSPEECH","first-page":"1131","article-title":"Methodologies for the evaluation of speaker diarization and automatic speech recognition in the presence of overlapping speech","author":"Galibert","year":"2013"},{"key":"10.1016\/j.csl.2022.101437_b28","doi-asserted-by":"crossref","unstructured":"Galliano, S., Geoffrois, E., Mostefa, D., Choukri, K., Bonastre, J.-F., Gravier, G., 2005. The ESTER phase II evaluation campaign for the rich transcription of French broadcast news. In: Ninth European Conference on Speech Communication and Technology.","DOI":"10.21437\/Interspeech.2005-441"},{"key":"10.1016\/j.csl.2022.101437_b29","series-title":"Proc. Odyssey 2020 the Speaker and Language Recognition Workshop","first-page":"1","article-title":"MagNetO: X-vector magnitude estimation network plus offset for improved speaker recognition","author":"Garcia-Romero","year":"2020"},{"key":"10.1016\/j.csl.2022.101437_b30","series-title":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"4930","article-title":"Speaker diarization using deep neural network embeddings","author":"Garcia-Romero","year":"2017"},{"key":"10.1016\/j.csl.2022.101437_b31","doi-asserted-by":"crossref","unstructured":"Gauvain, J.-L., Lamel, L.F., Adda, G., 1998. Partitioning and transcription of broadcast news data. In: Fifth International Conference on Spoken Language Processing.","DOI":"10.21437\/ICSLP.1998-618"},{"key":"10.1016\/j.csl.2022.101437_b32","doi-asserted-by":"crossref","unstructured":"Gelly, G., Gauvain, J.-L., 2015. Minimum word error training of RNN-based voice activity detection. In: Sixteenth Annual Conference of the International Speech Communication Association.","DOI":"10.21437\/Interspeech.2015-565"},{"key":"10.1016\/j.csl.2022.101437_b33","unstructured":"Giraudel, A., Carr\u00e9, M., Mapelli, V., Kahn, J., Galibert, O., Quintard, L., 2012. The REPERE Corpus: a multimodal corpus for person recognition. In: International Conference on Language Resources and Evaluation (LREC), pp. 1102\u20131107."},{"key":"10.1016\/j.csl.2022.101437_b34","unstructured":"Gravier, G., Adda, G., Paulson, N., Carr\u00e9, M., Giraudel, A., Galibert, O., 2012. The ETAPE corpus for the evaluation of speech-based TV content processing in the French language. In: International Conference on Language Resources, Evaluation and Corpora."},{"key":"10.1016\/j.csl.2022.101437_b35","unstructured":"Gravier, G., Bonastre, J.-F., Geoffrois, E., Galliano, S., McTait, K., Choukri, K., 2004. The ESTER evaluation campaign for the rich transcription of french broadcast news. In: International Conference on Language Resources and Evaluation (LREC)."},{"key":"10.1016\/j.csl.2022.101437_b36","doi-asserted-by":"crossref","unstructured":"Hansen, J.H., Sangwan, A., Joglekar, A., Bulut, A.E., Kaushik, L., Yu, C., 2018. Fearless steps: Apollo-11 corpus advancements for speech technologies from earth to the moon. In: Annual Conference of the International Speech Communication Association (INTERSPEECH), pp. 2758\u20132762.","DOI":"10.21437\/Interspeech.2018-1942"},{"year":"2021","series-title":"The hitachi-jhu dihard iii system: Competitive end-to-end neural diarization and x-vector clustering systems combined by dover-lap","author":"Horiguchi","key":"10.1016\/j.csl.2022.101437_b37"},{"key":"10.1016\/j.csl.2022.101437_b38","series-title":"2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"4945","article-title":"Convolutional neural network for speaker change detection in telephone speaker diarization system","author":"Hr\u00faz","year":"2017"},{"key":"10.1016\/j.csl.2022.101437_b39","doi-asserted-by":"crossref","unstructured":"Huijbregts, M., Wooters, C., 2007. The blame game: Performance analysis of speaker diarization system components. In: Eighth Annual Conference of the International Speech Communication Association.","DOI":"10.21437\/Interspeech.2007-517"},{"key":"10.1016\/j.csl.2022.101437_b40","unstructured":"Jiaji,\u00a0H., Rewon,\u00a0C., Vinay,\u00a0R., Hairong,\u00a0L., Sanjeev,\u00a0S., Adam,\u00a0C., 2016. Active Learning for Speech Recognition: The Power of Gradients. In: The 30th Conference on Neural Information Processing Systems, NIPS. Barcelona, Spain, pp. 1\u20135."},{"key":"10.1016\/j.csl.2022.101437_b41","doi-asserted-by":"crossref","unstructured":"Jung, J.-w., Kim, S.-b., Shim, H.-j., Kim, J.-h., Yu, H.-J., 2020. Improved RawNet with feature map scaling for text-independent speaker verification using raw waveforms. In: Proc. Interspeech 2020, pp. 1496\u20131500.","DOI":"10.21437\/Interspeech.2020-1011"},{"key":"10.1016\/j.csl.2022.101437_b42","series-title":"INTERSPEECH","first-page":"3678","article-title":"Model adaptation and active learning in the BBN speech activity detection system for the DARPA RATS program","author":"Karakos","year":"2016"},{"issue":"8","key":"10.1016\/j.csl.2022.101437_b43","doi-asserted-by":"crossref","first-page":"1181","DOI":"10.1109\/LSP.2018.2811740","article-title":"Voice activity detection using an adaptive context attention model","volume":"25","author":"Kim","year":"2018","journal-title":"IEEE Signal Process. Lett."},{"key":"10.1016\/j.csl.2022.101437_b44","doi-asserted-by":"crossref","DOI":"10.1016\/j.csl.2021.101254","article-title":"Bayesian HMM clustering of x-vector sequences (VBx) in speaker diarization: Theory, implementation and analysis on standard tasks","volume":"71","author":"Landini","year":"2022","journal-title":"Comput. Speech Lang."},{"key":"10.1016\/j.csl.2022.101437_b45","series-title":"2013 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"7673","article-title":"Phonetically-constrained PLDA modeling for text-dependent speaker verification with multiple short utterances","author":"Larcher","year":"2013"},{"key":"10.1016\/j.csl.2022.101437_b46","series-title":"2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","article-title":"Speaker embeddings for diarization of broadcast data in the allies challenge","author":"Larcher","year":"2021"},{"key":"10.1016\/j.csl.2022.101437_b47","series-title":"Interspeech 2020","article-title":"End-to-end domain-adversarial voice activity detection","author":"Lavechin","year":"2020"},{"key":"10.1016\/j.csl.2022.101437_b48","series-title":"Interspeech 2016","first-page":"2175","article-title":"Iterative PLDA adaptation for speaker diarization","author":"Le\u00a0Lan","year":"2016"},{"issue":"10","key":"10.1016\/j.csl.2022.101437_b49","doi-asserted-by":"crossref","first-page":"1821","DOI":"10.1109\/TASLP.2018.2844025","article-title":"An adaptive method for cross-recording speaker diarization","volume":"26","author":"Le\u00a0Lan","year":"2018","journal-title":"IEEE\/ACM Trans. Audio, Speech, Lang. Process."},{"issue":"3","key":"10.1016\/j.csl.2022.101437_b50","doi-asserted-by":"crossref","first-page":"359","DOI":"10.1007\/s11704-016-6903-6","article-title":"Lifelong machine learning: a paradigm for continuous learning","volume":"11","author":"Liu","year":"2017","journal-title":"Front. Comput. Sci."},{"issue":"24","key":"10.1016\/j.csl.2022.101437_b51","doi-asserted-by":"crossref","first-page":"5412","DOI":"10.3390\/app9245412","article-title":"Albayzin 2018 evaluation: the iberspeech-RTVE challenge on speech technologies for spanish broadcast media","volume":"9","author":"Lleida","year":"2019","journal-title":"Appl. Sci."},{"key":"10.1016\/j.csl.2022.101437_b52","series-title":"The Speaker and Language Recognition Workshop (Odyssey)","article-title":"On the use of agglomerative and spectral clustering in speaker diarization of meetings","author":"Luque","year":"2012"},{"year":"2021","series-title":"End-to-end diarization for variable number of speakers with local-global networks and discriminative speaker embeddings","author":"Maiti","key":"10.1016\/j.csl.2022.101437_b53"},{"key":"10.1016\/j.csl.2022.101437_b54","doi-asserted-by":"crossref","unstructured":"Mao, H.H., Li, S., McAuley, J., Cottrell, G.W., 2020. Speech recognition and multi-speaker diarization of long conversations. In: Annual Conference of the International Speech Communication Association (INTERSPEECH), pp. 691\u2013695.","DOI":"10.21437\/Interspeech.2020-3039"},{"key":"10.1016\/j.csl.2022.101437_b55","series-title":"Workshop on Speech, Language and Audio in Multimedia","article-title":"Active selection with label propagation for minimizing human effort in speaker annotation of tv shows","author":"Mateusz","year":"2014"},{"key":"10.1016\/j.csl.2022.101437_b56","doi-asserted-by":"crossref","unstructured":"McLaren, M., Ferrer, L., Castan, D., Lawson, A., 2016. The speakers in the wild (SITW) speaker recognition database. In: Annual Conference of the International Speech Communication Association (INTERSPEECH), pp. 818\u2013822.","DOI":"10.21437\/Interspeech.2016-1129"},{"key":"10.1016\/j.csl.2022.101437_b57","series-title":"CMU SPUD Workshop","article-title":"Lium SpkDiarization: an open source toolkit for diarization","author":"Meignier","year":"2010"},{"key":"10.1016\/j.csl.2022.101437_b58","series-title":"2006 IEEE International Conference on Acoustics Speech and Signal Processing Proceedings","first-page":"I","article-title":"Nuts and flakes: A study of data characteristics in speaker diarization","volume":"Vol. 1","author":"Mirghafori","year":"2006"},{"issue":"1","key":"10.1016\/j.csl.2022.101437_b59","doi-asserted-by":"crossref","first-page":"521","DOI":"10.1016\/j.patcog.2011.06.019","article-title":"A unifying view on dataset shift in classification","volume":"45","author":"Moreno-Torres","year":"2012","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.csl.2022.101437_b60","doi-asserted-by":"crossref","DOI":"10.1016\/j.csl.2019.101027","article-title":"Voxceleb: Large-scale speaker verification in the wild","volume":"60","author":"Nagrani","year":"2020","journal-title":"Comput. Speech Lang."},{"key":"10.1016\/j.csl.2022.101437_b61","doi-asserted-by":"crossref","unstructured":"Nagrani, A., Chung, J.S., Zisserman, A., 2017. VoxCeleb: A large-scale speaker identification dataset. In: Annual Conference of the International Speech Communication Association (INTERSPEECH), pp. 2616\u20132620.","DOI":"10.21437\/Interspeech.2017-950"},{"key":"10.1016\/j.csl.2022.101437_b62","series-title":"2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"5455","article-title":"Speaker segmentation using i-vector in meetings domain","author":"Neri","year":"2017"},{"key":"10.1016\/j.csl.2022.101437_b63","doi-asserted-by":"crossref","unstructured":"Ng, T., Zhang, B., Nguyen, L., Matsoukas, S., Zhou, X., Mesgarani, N., Vesel\u1ef3, K., Mat\u011bjka, P., 2012. Developing a speech activity detection system for the DARPA RATS program. In: Thirteenth Annual Conference of the International Speech Communication Association.","DOI":"10.21437\/Interspeech.2012-527"},{"key":"10.1016\/j.csl.2022.101437_b64","doi-asserted-by":"crossref","DOI":"10.1016\/j.csl.2020.101137","article-title":"A neural network approach for speech activity detection for Apollo corpus","volume":"65","author":"Pannala","year":"2021","journal-title":"Comput. Speech Lang."},{"key":"10.1016\/j.csl.2022.101437_b65","series-title":"IEEE Workshop on Automatic Speech Recognition and Understanding, 2001. ASRU\u201901","first-page":"107","article-title":"Multispeaker speech activity detection for the ICSI meeting recorder","author":"Pfau","year":"2001"},{"key":"10.1016\/j.csl.2022.101437_b66","unstructured":"Prokopalo, Y., Meignier, S., Galibert, O., Barrault, L., Larcher, A., 2020. Evaluation of lifelong learning systems. In: International Conference on Language Resources and Evaluation."},{"key":"10.1016\/j.csl.2022.101437_b67","doi-asserted-by":"crossref","unstructured":"Prokopalo, Y., Shamsi, M., Barrault, L., Meignier, S., Larcher, A., 2021. Active correction for speaker diarization with human in the loop. In: Proc. IberSPEECH 2021, pp. 260\u2013264.","DOI":"10.21437\/IberSPEECH.2021-55"},{"key":"10.1016\/j.csl.2022.101437_b68","series-title":"2018 IEEE Spoken Language Technology Workshop (SLT)","first-page":"1021","article-title":"Speaker recognition from raw waveform with sincnet","author":"Ravanelli","year":"2018"},{"key":"10.1016\/j.csl.2022.101437_b69","series-title":"2007 IEEE Workshop on Automatic Speech Recognition & Understanding (ASRU)","first-page":"238","article-title":"Recognition and understanding of meetings the AMI and AMIDA projects","author":"Renals","year":"2007"},{"key":"10.1016\/j.csl.2022.101437_b70","series-title":"International Workshop on Machine Learning for Multimodal Interaction","first-page":"385","article-title":"The 2006 athens information technology speech activity detection and speaker diarization systems","author":"Rentzeperis","year":"2006"},{"key":"10.1016\/j.csl.2022.101437_b71","series-title":"Proceedings.(ICASSP\u201905). IEEE International Conference on Acoustics, Speech, and Signal Processing, 2005","first-page":"v","article-title":"Approaches and applications of audio diarization","volume":"Vol. 5","author":"Reynolds","year":"2005"},{"issue":"4","key":"10.1016\/j.csl.2022.101437_b72","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1109\/TSA.2005.848882","article-title":"Active learning: Theory and applications to automatic speech recognition","volume":"13","author":"Riccardi","year":"2005","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"10.1016\/j.csl.2022.101437_b73","unstructured":"Ryant, N., Church, K., Cieri, C., Cristia, A., Du, J., Ganapathy, S., Liberman, M., 2018. The first DIHARD speech diarization challenge. In: Annual Conference of the International Speech Communication Association (INTERSPEECH)."},{"key":"10.1016\/j.csl.2022.101437_b74","doi-asserted-by":"crossref","unstructured":"Ryant, N., Church, K., Cieri, C., Cristia, A., Du, J., Ganapathy, S., Liberman, M., 2019. The second DIHARD diarization challenge: dataset, task, and baselines. In: Proc. Interspeech 2019, pp. 978\u2013982.","DOI":"10.21437\/Interspeech.2019-1268"},{"year":"2020","series-title":"Third DIHARD challenge evaluation plan","author":"Ryant","key":"10.1016\/j.csl.2022.101437_b75"},{"key":"10.1016\/j.csl.2022.101437_b76","series-title":"INTERSPEECH","first-page":"728","article-title":"Speech activity detection on youtube using deep neural networks","author":"Ryant","year":"2013"},{"year":"2020","series-title":"The third DIHARD diarization challenge","author":"Ryant","key":"10.1016\/j.csl.2022.101437_b77"},{"key":"10.1016\/j.csl.2022.101437_b78","series-title":"The Speaker and Language Recognition Workshop (Odyssey","first-page":"266","article-title":"The 2019 NIST speaker recognition evaluation CTS challenge","author":"Sadjadi","year":"2020"},{"issue":"2","key":"10.1016\/j.csl.2022.101437_b79","doi-asserted-by":"crossref","first-page":"461","DOI":"10.1214\/aos\/1176344136","article-title":"Estimating the dimension of a model","volume":"6","author":"Schwarz","year":"1978","journal-title":"Ann. Statist."},{"key":"10.1016\/j.csl.2022.101437_b80","doi-asserted-by":"crossref","unstructured":"Sell, G., Snyder, D., McCree, A., Garcia-Romero, D., Villalba, J., Maciejewski, M., Manohar, V., Dehak, N., Povey, D., Watanabe, S., et al., 2018. Diarization is hard: some experiences and lessons learned for the JHU team in the inaugural DIHARD challenge. In: Annual Conference of the International Speech Communication Association (INTERSPEECH), pp. 2808\u20132812.","DOI":"10.21437\/Interspeech.2018-1893"},{"key":"10.1016\/j.csl.2022.101437_b81","series-title":"2017 Iranian Conference on Electrical Engineering (ICEE)","first-page":"1564","article-title":"Speech activity detection using deep neural networks","author":"Shahsavari","year":"2017"},{"issue":"10","key":"10.1016\/j.csl.2022.101437_b82","doi-asserted-by":"crossref","first-page":"2015","DOI":"10.1109\/TASL.2013.2264673","article-title":"Unsupervised methods for speaker diarization: An integrated and iterative approach","volume":"21","author":"Shum","year":"2013","journal-title":"IEEE Trans. Audio, Speech, Lang. Process."},{"key":"10.1016\/j.csl.2022.101437_b83","doi-asserted-by":"crossref","unstructured":"Shum, S.H., Dehak, N., Glass, J.R., 2014. Limited labels for unlimited data: Active learning for speaker recognition. In: Fifteenth Annual Conference of the International Speech Communication Association.","DOI":"10.21437\/Interspeech.2014-89"},{"key":"10.1016\/j.csl.2022.101437_b84","series-title":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"5329","article-title":"X-vectors: Robust dnn embeddings for speaker recognition","author":"Snyder","year":"2018"},{"key":"10.1016\/j.csl.2022.101437_b85","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.csl.2019.06.005","article-title":"rVAD: An unsupervised segment-based robust voice activity detection method","volume":"59","author":"Tan","year":"2020","journal-title":"Comput. Speech Lang."},{"key":"10.1016\/j.csl.2022.101437_b86","doi-asserted-by":"crossref","unstructured":"Tran, V.-A., Le, V., Barras, C., Lamel, L., 2011. Comparing multi-stage approaches for cross-show speaker diarization. In: Annual Conference of the International Speech Communication Association (INTERSPEECH).","DOI":"10.21437\/Interspeech.2011-392"},{"issue":"5","key":"10.1016\/j.csl.2022.101437_b87","doi-asserted-by":"crossref","first-page":"1557","DOI":"10.1109\/TASL.2006.878256","article-title":"An overview of automatic speaker diarization systems","volume":"14","author":"Tranter","year":"2006","journal-title":"IEEE Trans. Audio, Speech, Lang. Process."},{"key":"10.1016\/j.csl.2022.101437_b88","series-title":"2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"4052","article-title":"Deep neural networks for small footprint text-dependent speaker verification","author":"Variani","year":"2014"},{"key":"10.1016\/j.csl.2022.101437_b89","series-title":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"5239","article-title":"Speaker diarization with lstm","author":"Wang","year":"2018"},{"year":"2021","series-title":"USTC-NELSLIP system description for DIHARD-III challenge","author":"Wang","key":"10.1016\/j.csl.2022.101437_b90"},{"issue":"2","key":"10.1016\/j.csl.2022.101437_b91","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1899412.1899414","article-title":"Active learning in multimedia annotation and retrieval: A survey","volume":"2","author":"Wang","year":"2011","journal-title":"ACM Trans. Intell. Syst. Technol. (TIST)"},{"key":"10.1016\/j.csl.2022.101437_b92","series-title":"Proceedings-Institute of Acoustics","first-page":"315","article-title":"Windmill-the use of a parsing algorithm to produce predictions for disabled persons","volume":"Vol. 18","author":"Wood","year":"1996"},{"key":"10.1016\/j.csl.2022.101437_b93","doi-asserted-by":"crossref","unstructured":"Yang, Q., Jin, Q., Schultz, T., 2011. Investigation of cross-show speaker diarization. In: Twelfth Annual Conference of the International Speech Communication Association.","DOI":"10.21437\/Interspeech.2011-732"},{"key":"10.1016\/j.csl.2022.101437_b94","series-title":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","first-page":"91","article-title":"Language diarization for semi-supervised bilingual acoustic model training","author":"Yilmaz","year":"2017"},{"key":"10.1016\/j.csl.2022.101437_b95","doi-asserted-by":"crossref","unstructured":"Yin, R., Bredin, H., Barras, C., 2018. Neural speech turn segmentation and affinity propagation for speaker diarization. In: Annual Conference of the International Speech Communication Association.","DOI":"10.21437\/Interspeech.2018-1750"},{"issue":"11","key":"10.1016\/j.csl.2022.101437_b96","doi-asserted-by":"crossref","first-page":"2188","DOI":"10.1109\/TASLP.2017.2747097","article-title":"Active learning based constrained clustering for speaker diarization","volume":"25","author":"Yu","year":"2017","journal-title":"IEEE\/ACM Trans. Audio, Speech, Lang. Process."},{"key":"10.1016\/j.csl.2022.101437_b97","series-title":"ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"6301","article-title":"Fully supervised speaker diarization","author":"Zhang","year":"2019"}],"container-title":["Computer Speech &amp; Language"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/api.elsevier.com\/content\/article\/PII:S0885230822000638?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/api.elsevier.com\/content\/article\/PII:S0885230822000638?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T20:45:48Z","timestamp":1760129148000},"score":1,"resource":{"primary":{"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/linkinghub.elsevier.com\/retrieve\/pii\/S0885230822000638"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1]]},"references-count":97,"alternative-id":["S0885230822000638"],"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/doi.org\/10.1016\/j.csl.2022.101437","relation":{},"ISSN":["0885-2308"],"issn-type":[{"type":"print","value":"0885-2308"}],"subject":[],"published":{"date-parts":[[2023,1]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Towards lifelong human assisted speaker diarization","name":"articletitle","label":"Article Title"},{"value":"Computer Speech & Language","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/doi.org\/10.1016\/j.csl.2022.101437","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2022 Elsevier Ltd. All rights reserved.","name":"copyright","label":"Copyright"}],"article-number":"101437"}}