{"id":"https://openalex.org/W4416203724","doi":"https://doi.org/10.1145/3712285.3759859","title":"RingX: Scalable Parallel Attention for Long-Context Learning on HPC","display_name":"RingX: Scalable Parallel Attention for Long-Context Learning on HPC","publication_year":2025,"publication_date":"2025-11-12","ids":{"openalex":"https://openalex.org/W4416203724","doi":"https://doi.org/10.1145/3712285.3759859"},"language":null,"primary_location":{"id":"doi:10.1145/3712285.3759859","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3712285.3759859","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3712285.3759859","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051882926","display_name":"Junqi Yin","orcid":"https://orcid.org/0000-0003-3843-5520"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Junqi Yin","raw_affiliation_strings":["Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0003-3843-5520","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041845799","display_name":"Mijanur Palash","orcid":"https://orcid.org/0000-0002-9751-1370"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mijanur Palash","raw_affiliation_strings":["Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0002-9751-1370","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035099577","display_name":"Mallikarjun Shankar","orcid":"https://orcid.org/0000-0001-5289-7460"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mallikarjun Shankar","raw_affiliation_strings":["Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0001-5289-7460","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101916963","display_name":"Feiyi Wang","orcid":"https://orcid.org/0000-0002-0099-1559"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Feiyi Wang","raw_affiliation_strings":["Oak Ridge National Laboratory (ORNL), Oak Ridge, USA"],"raw_orcid":"https://orcid.org/0000-0002-0099-1559","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory (ORNL), Oak Ridge, USA","institution_ids":["https://openalex.org/I1289243028"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I1289243028"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.2914453,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1395","last_page":"1408"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.7777000069618225,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.7777000069618225,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.0560000017285347,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.029500000178813934,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.8371000289916992},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7498000264167786},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5968000292778015},{"id":"https://openalex.org/keywords/flops","display_name":"FLOPS","score":0.47870001196861267},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4652000069618225},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.43230000138282776},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.41999998688697815},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.37529999017715454}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.8371000289916992},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8245999813079834},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7498000264167786},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5968000292778015},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4869000017642975},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.47870001196861267},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4652000069618225},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44110000133514404},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.43230000138282776},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.41999998688697815},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.384799987077713},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38100001215934753},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.37529999017715454},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.3278999924659729},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.3237000107765198},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.31839999556541443},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3086000084877014},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.30660000443458557},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2973000109195709},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.28839999437332153},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.2784000039100647},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.27549999952316284},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.25780001282691956},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.25519999861717224},{"id":"https://openalex.org/C40506919","wikidata":"https://www.wikidata.org/wiki/Q7452469","display_name":"Sequence learning","level":2,"score":0.2551000118255615}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3712285.3759859","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3712285.3759859","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3712285.3759859","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3712285.3759859","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W1974991834","https://openalex.org/W2122861381","https://openalex.org/W2131613942","https://openalex.org/W3025949386","https://openalex.org/W3204998121","https://openalex.org/W4388697414","https://openalex.org/W4389518760","https://openalex.org/W4399452094","https://openalex.org/W4405387414","https://openalex.org/W4406157736"],"related_works":[],"abstract_inverted_index":{"The":[0],"attention":[1,50,91,120,131],"mechanism":[2],"has":[3],"become":[4],"foundational":[5],"for":[6,19,59,95,126,182,202],"remarkable":[7],"AI":[8],"breakthroughs":[9],"since":[10],"the":[11,14,17,73,122,175,197],"introduction":[12],"of":[13,75,89,140,165,196],"Transformer,":[15],"driving":[16],"demand":[18],"increasingly":[20],"longer":[21],"context":[22],"to":[23,70,112,117],"power":[24],"frontier":[25],"models":[26,32],"such":[27,52],"as":[28,53],"large-scale":[29],"reasoning":[30],"language":[31],"and":[33,41,105,129,149],"high-resolution":[34],"image/video":[35],"generators.":[36],"However,":[37],"its":[38,135],"quadratic":[39],"computational":[40],"memory":[42],"complexities":[43],"present":[44],"substantial":[45],"challenges.":[46],"Current":[47],"state-of-the-art":[48],"parallel":[49,90],"methods,":[51],"ring":[54,119],"attention,":[55],"are":[56],"widely":[57],"adopted":[58],"long-context":[60,203],"training":[61,138,163,183,199],"but":[62],"utilize":[63],"a":[64,86,141,146,150,187],"point-to-point":[65],"communication":[66,103],"strategy":[67],"that":[68],"fails":[69],"fully":[71],"exploit":[72],"capabilities":[74],"modern":[76],"HPC":[77,96,206],"network":[78],"architectures.":[79],"In":[80],"this":[81],"work,":[82],"we":[83],"propose":[84],"ringX,":[85],"scalable":[87],"family":[88],"methods":[92],"optimized":[93],"explicitly":[94],"systems.":[97,207],"By":[98],"enhancing":[99],"workload":[100],"partitioning,":[101],"refining":[102],"patterns,":[104],"improving":[106],"load":[107],"balancing,":[108],"ringX":[109,133],"achieves":[110],"up":[111],"3.4":[113],"\u00d7":[114,168],"speedup":[115,164],"compared":[116],"conventional":[118],"on":[121,145,191,205],"Frontier":[123],"supercomputer.":[124],"Optimized":[125],"both":[127,170],"bi-directional":[128],"causal":[130],"mechanisms,":[132],"demonstrates":[134],"effectiveness":[136],"through":[137],"benchmarks":[139],"Vision":[142],"Transformer":[143,153],"(ViT)":[144],"climate":[147],"dataset":[148],"Generative":[151],"Pre-Trained":[152],"(GPT)":[154],"model,":[155],"Llama3":[156,184],"8B.":[157],"Our":[158,208],"method":[159],"attains":[160],"an":[161],"end-to-end":[162],"approximately":[166],"1.5":[167],"in":[169],"scenarios.":[171],"To":[172],"our":[173],"knowledge,":[174],"achieved":[176],"38%":[177],"model":[178],"FLOPs":[179],"utilization":[180],"(MFU)":[181],"8B":[185],"with":[186],"1M-token":[188],"sequence":[189],"length":[190],"4,096":[192],"GPUs":[193],"represents":[194],"one":[195],"highest":[198],"efficiencies":[200],"reported":[201],"learning":[204],"code":[209],"implementation":[210],"is":[211],"available":[212],"at":[213],"https://github.com/jqyin/ringX-attention.":[214]},"counts_by_year":[],"updated_date":"2026-06-27T08:28:00.272161","created_date":"2025-11-12T00:00:00"}
