{"id":"https://openalex.org/W4415277372","doi":"https://doi.org/10.48550/arxiv.2510.13747","title":"InteractiveOmni: A Unified Omni-modal Model for Audio-Visual Multi-turn Dialogue","display_name":"InteractiveOmni: A Unified Omni-modal Model for Audio-Visual Multi-turn Dialogue","publication_year":2025,"publication_date":"2025-10-15","ids":{"openalex":"https://openalex.org/W4415277372","doi":"https://doi.org/10.48550/arxiv.2510.13747"},"language":null,"primary_location":{"id":"pmh:oai:arXiv.org:2510.13747","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.13747","pdf_url":"https://arxiv.org/pdf/2510.13747","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2510.13747","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035679060","display_name":"Wenwen Tong","orcid":"https://orcid.org/0000-0003-0176-9834"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Tong, Wenwen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Guo, Hewei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Hewei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120034785","display_name":"Dongchuan Ran","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ran, Dongchuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004702455","display_name":"Jiangnan Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jiangnan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050357261","display_name":"Jiefan Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Jiefan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023407762","display_name":"Kaibin Wang","orcid":"https://orcid.org/0000-0001-5967-5692"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Kaibin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031855986","display_name":"Keqiang Li","orcid":"https://orcid.org/0000-0002-9333-7416"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Keqiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064668113","display_name":"Xiaoxu Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Xiaoxu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060389661","display_name":"Jiakui Li","orcid":"https://orcid.org/0000-0002-6065-6648"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jiakui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100377518","display_name":"Kun Li","orcid":"https://orcid.org/0000-0001-9100-0302"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Kehan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110996721","display_name":"Xueheng Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xueheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044397668","display_name":"Linjie Li","orcid":"https://orcid.org/0000-0002-8317-6735"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Lumin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Guo, Chenxu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Chenxu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Zhou, Jiasheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Jiasheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101498585","display_name":"Jiandong Chen","orcid":"https://orcid.org/0000-0002-9625-7008"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jiandong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021691628","display_name":"Xiaoxin Wu","orcid":"https://orcid.org/0000-0001-9785-8916"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Xianye","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100329219","display_name":"Jiahao Wang","orcid":"https://orcid.org/0000-0002-8768-4913"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jiahao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111094357","display_name":"Silei Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Silei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100333453","display_name":"Lei Chen","orcid":"https://orcid.org/0000-0002-1859-384X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Lei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077375709","display_name":"Hanming Deng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Hanming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021851960","display_name":"Yuxuan Song","orcid":"https://orcid.org/0000-0002-5302-7835"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Yuxuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002904368","display_name":"Dinghao Zhou","orcid":"https://orcid.org/0009-0000-8519-4630"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Dinghao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028715445","display_name":"Guang Zhong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong, Guiping","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109480444","display_name":"Ken Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Ken","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083463839","display_name":"Shiyin Kang","orcid":"https://orcid.org/0000-0001-8304-5260"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kang, Shiyin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5109786494","display_name":"Lewei Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Lewei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":26,"corresponding_author_ids":["https://openalex.org/A5035679060"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11148","display_name":"Language, Metaphor, and Cognition","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13310","display_name":"Subtitles and Audiovisual Media","score":0.9660999774932861,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.7037000060081482},{"id":"https://openalex.org/keywords/conversation","display_name":"Conversation","score":0.6977999806404114},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5774000287055969},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5504000186920166},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.536899983882904},{"id":"https://openalex.org/keywords/unified-model","display_name":"Unified Model","score":0.3862000107765198},{"id":"https://openalex.org/keywords/memory-model","display_name":"Memory model","score":0.3431999981403351}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8101000189781189},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.7037000060081482},{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.6977999806404114},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5774000287055969},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5504000186920166},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.536899983882904},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4634000062942505},{"id":"https://openalex.org/C45493050","wikidata":"https://www.wikidata.org/wiki/Q7884934","display_name":"Unified Model","level":2,"score":0.3862000107765198},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3855000138282776},{"id":"https://openalex.org/C12186640","wikidata":"https://www.wikidata.org/wiki/Q6815743","display_name":"Memory model","level":3,"score":0.3431999981403351},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3409000039100647},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.30309998989105225},{"id":"https://openalex.org/C83195618","wikidata":"https://www.wikidata.org/wiki/Q590951","display_name":"Cued speech","level":2,"score":0.29649999737739563},{"id":"https://openalex.org/C504749915","wikidata":"https://www.wikidata.org/wiki/Q9010971","display_name":"Speech technology","level":3,"score":0.28299999237060547},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2750999927520752},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2703999876976013},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2702000141143799},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.2687999904155731},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.25}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2510.13747","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.13747","pdf_url":"https://arxiv.org/pdf/2510.13747","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2510.13747","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.13747","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2510.13747","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.13747","pdf_url":"https://arxiv.org/pdf/2510.13747","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415277372.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,63],"introduce":[1],"InteractiveOmni,":[2],"a":[3,55,65,97,146],"unified":[4,56],"and":[5,34,51,60,85,109,118,129,144,172,203],"open-source":[6,142,211],"omni-modal":[7,32,77],"large":[8,48],"language":[9,49],"model":[10,57,166,189],"for":[11,58,76,213],"audio-visual":[12,86,150],"multi-turn":[13,98,110,116,126,131,149],"interaction,":[14],"ranging":[15],"from":[16],"4B":[17],"to":[18,22,69,106,162],"8B":[19],"parameters,":[20],"designed":[21],"lead":[23],"the":[24,43,103,115,124,130,163,178,181,188],"field":[25],"of":[26,177,180,187],"lightweight":[27],"models":[28,143,197],"by":[29,80],"offering":[30],"comprehensive":[31],"understanding":[33,59],"speech":[35,52,83,119,132,204],"generation":[36,61,205],"capabilities.":[37,157],"To":[38,88,112],"achieve":[39],"this,":[40],"we":[41,94,122],"integrate":[42],"vision":[44],"encoder,":[45,47],"audio":[46],"model,":[50],"decoder":[53],"into":[54],"tasks.":[62],"design":[64],"multi-stage":[66],"training":[67,99],"strategy":[68],"ensure":[70],"robust":[71],"cross-modal":[72],"capabilities,":[73,121],"including":[74],"pre-training":[75],"understanding,":[78,202],"followed":[79],"post-training":[81],"with":[82],"conversation":[84],"interaction.":[87],"enable":[89],"human-like":[90],"long-term":[91,155],"conversational":[92],"ability,":[93],"meticulously":[95],"curate":[96],"dataset":[100],"that":[101,137],"enhances":[102],"model's":[104],"ability":[105],"handle":[107],"complex":[108],"interactions.":[111],"effectively":[113],"evaluate":[114],"memory":[117,127,156],"interaction":[120,133],"construct":[123],"multi-modal":[125],"benchmark":[128],"benchmark.":[134],"Experiments":[135],"demonstrate":[136],"InteractiveOmni":[138,207],"significantly":[139],"outperforms":[140],"leading":[141],"provides":[145],"more":[147],"intelligent":[148,215],"experience,":[151],"particularly":[152],"in":[153],"its":[154],"Notably,":[158],"InteractiveOmni-4B":[159],"is":[160,208],"comparable":[161],"much":[164],"larger":[165],"like":[167],"Qwen2.5-Omni-7B":[168],"on":[169],"general":[170],"benchmarks,":[171],"it":[173],"can":[174],"retain":[175],"97%":[176],"performance":[179],"InteractiveOmni-8B":[182],"while":[183],"utilizing":[184],"only":[185],"50%":[186],"size.":[190],"Achieving":[191],"state-of-the-art":[192],"results":[193],"against":[194],"similarly":[195],"sized":[196],"across":[198],"image,":[199],"audio,":[200],"video":[201],"tasks,":[206],"an":[209],"accessible,":[210],"foundation":[212],"next-generation":[214],"interactive":[216],"systems.":[217]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2025-10-17T00:00:00"}
