{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,18]],"date-time":"2026-06-18T15:42:30Z","timestamp":1781797350605,"version":"3.54.5"},"reference-count":58,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,10]]},"DOI":"10.1109\/iccv.2017.317","type":"proceedings-article","created":{"date-parts":[[2017,12,25]],"date-time":"2017-12-25T21:51:45Z","timestamp":1514238705000},"page":"2933-2942","source":"Crossref","is-referenced-by-count":701,"title":["Temporal Action Detection with Structured Segment Networks"],"prefix":"10.1109","author":[{"given":"Yue","family":"Zhao","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuanjun","family":"Xiong","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Limin","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhirong","family":"Wu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiaoou","family":"Tang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dahua","family":"Lin","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref39","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","author":"simonyan","year":"2014","journal-title":"NIPS"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.89"},{"key":"ref33","author":"pont-tuset","year":"2015","journal-title":"Multiscale Combinatorial Grouping for Image Segmentation and Object Proposal Generation"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.85"},{"key":"ref31","article-title":"Multi-region two-stream r-cnn for action detection","author":"peng","year":"2016","journal-title":"ECCV"},{"key":"ref30","article-title":"The lear submission at thumos 2014","author":"oneata","year":"2014","journal-title":"THUMOS Action Recognition challenge"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.119"},{"key":"ref36","doi-asserted-by":"crossref","first-page":"187","DOI":"10.3233\/FI-2000-411207","article-title":"The watershed transform: Definitions, algorithms and parallelization strategies","volume":"41","author":"roerdink","year":"2000","journal-title":"Fundamenta Informaticae"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.341"},{"key":"ref34","first-page":"91","article-title":"Faster r-cnn: Towards real-time object detection with region proposal networks","author":"ren","year":"2015","journal-title":"NIPS"},{"key":"ref28","first-page":"392","article-title":"Modeling temporal structure of decomposable motion segments for activity classification","author":"niebles","year":"2010","journal-title":"ECCV"},{"key":"ref27","first-page":"4694","article-title":"Beyond short snippets: Deep networks for video classification","author":"ng","year":"2015","journal-title":"CVPR"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.228"},{"key":"ref2","first-page":"269","article-title":"Online action detection","author":"de geest","year":"2016","journal-title":"ECCV"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206754"},{"key":"ref20","first-page":"282","article-title":"Conditional random fields: Probabilistic models for segmenting and labeling sequence data","volume":"1","author":"lafferty","year":"2001","journal-title":"ICML"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2006.68"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-005-1838-7"},{"key":"ref24","doi-asserted-by":"crossref","first-page":"427","DOI":"10.1145\/2671188.2749404","article-title":"Bag-of-fragments: Selecting and encoding video fragments for event detection and recounting","author":"mettes","year":"2015","journal-title":"ICMR"},{"key":"ref23","first-page":"379","article-title":"R-fcn: Object detection via region-based fully convolutional networks","author":"li","year":"2016","journal-title":"NIPS"},{"key":"ref26","article-title":"Temporal activity detection in untrimmed videos with recurrent neural networks","author":"montes","year":"2016","journal-title":"NIPS Workshop"},{"key":"ref25","first-page":"437","article-title":"Spot on: Action localization from pointly-supervised proposals","author":"mettes","year":"2016","journal-title":"ECCV"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.296"},{"key":"ref51","first-page":"20","article-title":"Temporal segment networks: Towards good practices for deep action recognition","author":"wang","year":"2016","journal-title":"ECCV"},{"key":"ref58","first-page":"391","article-title":"Edge boxes: Locating object proposals from edges","author":"zitnick","year":"2014","journal-title":"ECCV"},{"key":"ref57","first-page":"2718","article-title":"Realtime action recognition with enhanced motion vector CNNs","author":"zhang","year":"2016","journal-title":"CVPR"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.337"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.293"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.362"},{"key":"ref53","article-title":"UTS at activitynet 2016","author":"wang","year":"2016","journal-title":"AcitivityNet Large Scale Activity Recognition Challenge 2016"},{"key":"ref52","article-title":"Temporal pyramid pooling based convolutional neural network for action recognition","author":"wang","year":"2016","journal-title":"IEEE TCSVT"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.65"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.216"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298676"},{"key":"ref14","first-page":"346","article-title":"Spatial pyramid pooling in deep convolutional networks for visual recognition","author":"he","year":"2014","journal-title":"ECCV"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995470"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-008-0137-5"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.100"},{"key":"ref18","author":"jiang","year":"2014","journal-title":"THUMOS Challenge Action Recognition with A Large Number of Classes"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.211"},{"key":"ref5","first-page":"768","article-title":"Daps: Deep action proposals for action understanding","author":"escorcia","year":"2016","journal-title":"ECCV"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2009.167"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299059"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299176"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.441"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126456"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.214"},{"key":"ref47","article-title":"Action recognition and detection by combining motion and appearance features","author":"wang","year":"2014","journal-title":"THUMOS Action Recognition challenge"},{"key":"ref42","author":"soomro","year":"2012","journal-title":"Ucf101 A Dataset of 101 Human Actions Classes from Videos in the Wild"},{"key":"ref41","author":"singh","year":"2016","journal-title":"Untrimmed video classification for activity detection submission to activitynet challenge"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.335"}],"event":{"name":"2017 IEEE International Conference on Computer Vision (ICCV)","location":"Venice","start":{"date-parts":[[2017,10,22]]},"end":{"date-parts":[[2017,10,29]]}},"container-title":["2017 IEEE International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/http\/xplorestaging.ieee.org\/ielx7\/8234942\/8237262\/08237579.pdf?arnumber=8237579","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,25]],"date-time":"2022-01-25T21:18:53Z","timestamp":1643145533000},"score":1,"resource":{"primary":{"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/http\/ieeexplore.ieee.org\/document\/8237579\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,10]]},"references-count":58,"URL":"https:\/\/summer-heart-0930.chufeiyun1688.workers.dev:443\/https\/doi.org\/10.1109\/iccv.2017.317","relation":{},"subject":[],"published":{"date-parts":[[2017,10]]}}}