| <div> <div id=3a29a7b6-5a6e-4e51-aabe-515dafefbd63 class=plotly-graph-div style="height:500px; width:850px;"></div> <script>window.PLOTLYENV=window.PLOTLYENV||{},document.getElementById("3a29a7b6-5a6e-4e51-aabe-515dafefbd63")&&Plotly.newPlot("3a29a7b6-5a6e-4e51-aabe-515dafefbd63",[{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(78, 165, 183)"},name:"parameters",showlegend:!0,visible:!0,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAAEjnD0AAAAAASPcPQAAAAACkCxBAAAAAAKQrEEA="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(227, 138, 66)"},name:"gradients",showlegend:!0,visible:!0,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAAEjnD0AAAAAASPcPQAAAAACkCxBAAAAAAKQrEEA="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(232, 137, 171)"},name:"optimizer",showlegend:!0,visible:!0,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAAEjnH0AAAAAASPcfQAAAAACkCyBAAAAAAKQrIEA="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(206, 192, 250)"},name:"activations",showlegend:!0,visible:!0,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAAACEDEAAAAAAAEIoQAAAAAAAIUZAAAAAAIAQZUA="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(78, 165, 183)"},name:"parameters",showlegend:!0,visible:!1,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAAKWXKkAAAAAApZ0qQAAAAAClqSpAAAAAAKXBKkA="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(227, 138, 66)"},name:"gradients",showlegend:!0,visible:!1,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAAKWXKkAAAAAApZ0qQAAAAAClqSpAAAAAAKXBKkA="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(232, 137, 171)"},name:"optimizer",showlegend:!0,visible:!1,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAAKWXOkAAAAAApZ06QAAAAAClqTpAAAAAAKXBOkA="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(206, 192, 250)"},name:"activations",showlegend:!0,visible:!1,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAAABLGEAAAAAAgLUyQAAAAACA1U9AAAAAAMAKbUA="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(78, 165, 183)"},name:"parameters",showlegend:!0,visible:!1,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAAKL6OUAAAAAAov45QAAAAACiBjpAAAAAAKIWOkA="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(227, 138, 66)"},name:"gradients",showlegend:!0,visible:!1,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAAKL6OUAAAAAAov45QAAAAACiBjpAAAAAAKIWOkA="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(232, 137, 171)"},name:"optimizer",showlegend:!0,visible:!1,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAAKL6SUAAAAAAov5JQAAAAACiBkpAAAAAAKIWSkA="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(206, 192, 250)"},name:"activations",showlegend:!0,visible:!1,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAAACCIkAAAAAAAII8QAAAAAAAQVhAAAAAAIAgdkA="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(78, 165, 183)"},name:"parameters",showlegend:!0,visible:!1,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAgER/bkAAAACARIBuQAAAAIBEgm5AAAAAgESGbkA="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(227, 138, 66)"},name:"gradients",showlegend:!0,visible:!1,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAgER/bkAAAACARIBuQAAAAIBEgm5AAAAAgESGbkA="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(232, 137, 171)"},name:"optimizer",showlegend:!0,visible:!1,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAgER/fkAAAACARIB+QAAAAIBEgn5AAAAAgESGfkA="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(206, 192, 250)"},name:"activations",showlegend:!0,visible:!1,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAAAAhR0AAAAAAgNBhQAAAAACAUH5AAAAAAECom0A="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(78, 165, 183)"},name:"parameters",showlegend:!0,visible:!1,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAgPa/l0AAAACANsCXQAAAAIC2wJdAAAAAgLbBl0A="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(227, 138, 66)"},name:"gradients",showlegend:!0,visible:!1,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAgPa/l0AAAACANsCXQAAAAIC2wJdAAAAAgLbBl0A="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(232, 137, 171)"},name:"optimizer",showlegend:!0,visible:!1,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAgPa/p0AAAACANsCnQAAAAIC2wKdAAAAAgLbBp0A="},type:"bar"},{hovertemplate:"Seq len=%{x}<br>Mem=%{y:.1f}GB<br>%{data.name}<extra></extra>",marker:{color:"rgb(206, 192, 250)"},name:"activations",showlegend:!0,visible:!1,x:["1024","2048","4096","8192"],y:{dtype:"f8",bdata:"AAAAAIA2YkAAAAAAgA58QAAAAABA35dAAAAAAKDHtUA="},type:"bar"}],{template:{data:{barpolar:[{marker:{line:{color:"white",width:.5},pattern:{fillmode:"overlay",size:10,solidity:.2}},type:"barpolar"}],bar:[{error_x:{color:"#2a3f5f"},error_y:{color:"#2a3f5f"},marker:{line:{color:"white",width:.5},pattern:{fillmode:"overlay",size:10,solidity:.2}},type:"bar"}],carpet:[{aaxis:{endlinecolor:"#2a3f5f",gridcolor:"#C8D4E3",linecolor:"#C8D4E3",minorgridcolor:"#C8D4E3",startlinecolor:"#2a3f5f"},baxis:{endlinecolor:"#2a3f5f",gridcolor:"#C8D4E3",linecolor:"#C8D4E3",minorgridcolor:"#C8D4E3",startlinecolor:"#2a3f5f"},type:"carpet"}],choropleth:[{colorbar:{outlinewidth:0,ticks:""},type:"choropleth"}],contourcarpet:[{colorbar:{outlinewidth:0,ticks:""},type:"contourcarpet"}],contour:[{colorbar:{outlinewidth:0,ticks:""},colorscale:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]],type:"contour"}],heatmap:[{colorbar:{outlinewidth:0,ticks:""},colorscale:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]],type:"heatmap"}],histogram2dcontour:[{colorbar:{outlinewidth:0,ticks:""},colorscale:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]],type:"histogram2dcontour"}],histogram2d:[{colorbar:{outlinewidth:0,ticks:""},colorscale:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]],type:"histogram2d"}],histogram:[{marker:{pattern:{fillmode:"overlay",size:10,solidity:.2}},type:"histogram"}],mesh3d:[{colorbar:{outlinewidth:0,ticks:""},type:"mesh3d"}],parcoords:[{line:{colorbar:{outlinewidth:0,ticks:""}},type:"parcoords"}],pie:[{automargin:!0,type:"pie"}],scatter3d:[{line:{colorbar:{outlinewidth:0,ticks:""}},marker:{colorbar:{outlinewidth:0,ticks:""}},type:"scatter3d"}],scattercarpet:[{marker:{colorbar:{outlinewidth:0,ticks:""}},type:"scattercarpet"}],scattergeo:[{marker:{colorbar:{outlinewidth:0,ticks:""}},type:"scattergeo"}],scattergl:[{marker:{colorbar:{outlinewidth:0,ticks:""}},type:"scattergl"}],scattermapbox:[{marker:{colorbar:{outlinewidth:0,ticks:""}},type:"scattermapbox"}],scattermap:[{marker:{colorbar:{outlinewidth:0,ticks:""}},type:"scattermap"}],scatterpolargl:[{marker:{colorbar:{outlinewidth:0,ticks:""}},type:"scatterpolargl"}],scatterpolar:[{marker:{colorbar:{outlinewidth:0,ticks:""}},type:"scatterpolar"}],scatter:[{fillpattern:{fillmode:"overlay",size:10,solidity:.2},type:"scatter"}],scatterternary:[{marker:{colorbar:{outlinewidth:0,ticks:""}},type:"scatterternary"}],surface:[{colorbar:{outlinewidth:0,ticks:""},colorscale:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]],type:"surface"}],table:[{cells:{fill:{color:"#EBF0F8"},line:{color:"white"}},header:{fill:{color:"#C8D4E3"},line:{color:"white"}},type:"table"}]},layout:{annotationdefaults:{arrowcolor:"#2a3f5f",arrowhead:0,arrowwidth:1},autotypenumbers:"strict",coloraxis:{colorbar:{outlinewidth:0,ticks:""}},colorscale:{diverging:[[0,"#8e0152"],[.1,"#c51b7d"],[.2,"#de77ae"],[.3,"#f1b6da"],[.4,"#fde0ef"],[.5,"#f7f7f7"],[.6,"#e6f5d0"],[.7,"#b8e186"],[.8,"#7fbc41"],[.9,"#4d9221"],[1,"#276419"]],sequential:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]],sequentialminus:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]]},colorway:["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],font:{color:"#2a3f5f"},geo:{bgcolor:"white",lakecolor:"white",landcolor:"white",showlakes:!0,showland:!0,subunitcolor:"#C8D4E3"},hoverlabel:{align:"left"},hovermode:"closest",mapbox:{style:"light"},paper_bgcolor:"white",plot_bgcolor:"white",polar:{angularaxis:{gridcolor:"#EBF0F8",linecolor:"#EBF0F8",ticks:""},bgcolor:"white",radialaxis:{gridcolor:"#EBF0F8",linecolor:"#EBF0F8",ticks:""}},scene:{xaxis:{backgroundcolor:"white",gridcolor:"#DFE8F3",gridwidth:2,linecolor:"#EBF0F8",showbackground:!0,ticks:"",zerolinecolor:"#EBF0F8"},yaxis:{backgroundcolor:"white",gridcolor:"#DFE8F3",gridwidth:2,linecolor:"#EBF0F8",showbackground:!0,ticks:"",zerolinecolor:"#EBF0F8"},zaxis:{backgroundcolor:"white",gridcolor:"#DFE8F3",gridwidth:2,linecolor:"#EBF0F8",showbackground:!0,ticks:"",zerolinecolor:"#EBF0F8"}},shapedefaults:{line:{color:"#2a3f5f"}},ternary:{aaxis:{gridcolor:"#DFE8F3",linecolor:"#A2B1C6",ticks:""},baxis:{gridcolor:"#DFE8F3",linecolor:"#A2B1C6",ticks:""},bgcolor:"white",caxis:{gridcolor:"#DFE8F3",linecolor:"#A2B1C6",ticks:""}},title:{x:.05},xaxis:{automargin:!0,gridcolor:"#EBF0F8",linecolor:"#EBF0F8",ticks:"",title:{standoff:15},zerolinecolor:"#EBF0F8",zerolinewidth:2},yaxis:{automargin:!0,gridcolor:"#EBF0F8",linecolor:"#EBF0F8",ticks:"",title:{standoff:15},zerolinecolor:"#EBF0F8",zerolinewidth:2}}},title:{text:"Memory Usage with Recomputation",x:.5,y:.95},margin:{r:80},legend:{y:.95,x:1.02,xanchor:"left",yanchor:"top"},updatemenus:[{active:0,buttons:[{args:[{visible:[!0,!0,!0,!0,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1]},{"yaxis.range":[0,203.1547058105469]}],label:"1B",method:"update"},{args:[{visible:[!1,!1,!1,!1,!0,!0,!0,!0,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1]},{"yaxis.range":[0,314.4336639404297]}],label:"3B",method:"update"},{args:[{visible:[!1,!1,!1,!1,!1,!1,!1,!1,!0,!0,!0,!0,!1,!1,!1,!1,!1,!1,!1,!1]},{"yaxis.range":[0,504.2233764648438]}],label:"8B",method:"update"},{args:[{visible:[!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!0,!0,!0,!0,!1,!1,!1,!1]},{"yaxis.range":[0,3021.530541992188]}],label:"70B",method:"update"},{args:[{visible:[!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!1,!0,!0,!0,!0]},{"yaxis.range":[0,12823.0716796875]}],label:"405B",method:"update"}],direction:"down",showactive:!0,x:1.035,xanchor:"left",y:.6,yanchor:"top"},{active:0,buttons:[{args:[{y:[[3.9879302978515625,3.9957427978515625,4.0113677978515625,4.0426177978515625],[3.9879302978515625,3.9957427978515625,4.0113677978515625,4.0426177978515625],[7.975860595703125,7.991485595703125,8.022735595703125,8.085235595703125],[3.564453125,12.12890625,44.2578125,168.515625],[13.296180725097656,13.307899475097656,13.331336975097656,13.378211975097656],[13.296180725097656,13.307899475097656,13.331336975097656,13.378211975097656],[26.592361450195313,26.615798950195313,26.662673950195313,26.756423950195313],[6.0732421875,18.708984375,63.66796875,232.3359375],[25.979034423828125,25.994659423828125,26.025909423828125,26.088409423828125],[25.979034423828125,25.994659423828125,26.025909423828125,26.088409423828125],[51.95806884765625,51.98931884765625,52.05181884765625,52.17681884765625],[9.25390625,28.5078125,97.015625,354.03125],[243.97711181640625,244.00836181640625,244.07086181640625,244.19586181640625],[243.97711181640625,244.00836181640625,244.07086181640625,244.19586181640625],[487.9542236328125,488.0167236328125,488.1417236328125,488.3917236328125],[46.2578125,142.515625,485.03125,1770.0625],[1519.99072265625,1520.05322265625,1520.17822265625,1520.42822265625],[1519.99072265625,1520.05322265625,1520.17822265625,1520.42822265625],[3039.9814453125,3040.1064453125,3040.3564453125,3040.8564453125],[145.703125,448.90625,1527.8125,5575.625]]}],label:"None",method:"restyle"},{args:[{y:[[3.9879302978515625,3.9957427978515625,4.0113677978515625,4.0426177978515625],[3.9879302978515625,3.9957427978515625,4.0113677978515625,4.0426177978515625],[7.975860595703125,7.991485595703125,8.022735595703125,8.085235595703125],[1.064453125,2.12890625,4.2578125,8.515625],[13.296180725097656,13.307899475097656,13.331336975097656,13.378211975097656],[13.296180725097656,13.307899475097656,13.331336975097656,13.378211975097656],[26.592361450195313,26.615798950195313,26.662673950195313,26.756423950195313],[2.7919921875,5.583984375,11.16796875,22.3359375],[25.979034423828125,25.994659423828125,26.025909423828125,26.088409423828125],[25.979034423828125,25.994659423828125,26.025909423828125,26.088409423828125],[51.95806884765625,51.98931884765625,52.05181884765625,52.17681884765625],[4.25390625,8.5078125,17.015625,34.03125],[243.97711181640625,244.00836181640625,244.07086181640625,244.19586181640625],[243.97711181640625,244.00836181640625,244.07086181640625,244.19586181640625],[487.9542236328125,488.0167236328125,488.1417236328125,488.3917236328125],[21.2578125,42.515625,85.03125,170.0625],[1519.99072265625,1520.05322265625,1520.17822265625,1520.42822265625],[1519.99072265625,1520.05322265625,1520.17822265625,1520.42822265625],[3039.9814453125,3040.1064453125,3040.3564453125,3040.8564453125],[66.953125,133.90625,267.8125,535.625]]}],label:"selective",method:"restyle"},{args:[{y:[[3.9879302978515625,3.9957427978515625,4.0113677978515625,4.0426177978515625],[3.9879302978515625,3.9957427978515625,4.0113677978515625,4.0426177978515625],[7.975860595703125,7.991485595703125,8.022735595703125,8.085235595703125],[.064453125,.12890625,.2578125,.515625],[13.296180725097656,13.307899475097656,13.331336975097656,13.378211975097656],[13.296180725097656,13.307899475097656,13.331336975097656,13.378211975097656],[26.592361450195313,26.615798950195313,26.662673950195313,26.756423950195313],[.1669921875,.333984375,.66796875,1.3359375],[25.979034423828125,25.994659423828125,26.025909423828125,26.088409423828125],[25.979034423828125,25.994659423828125,26.025909423828125,26.088409423828125],[51.95806884765625,51.98931884765625,52.05181884765625,52.17681884765625],[.25390625,.5078125,1.015625,2.03125],[243.97711181640625,244.00836181640625,244.07086181640625,244.19586181640625],[243.97711181640625,244.00836181640625,244.07086181640625,244.19586181640625],[487.9542236328125,488.0167236328125,488.1417236328125,488.3917236328125],[1.2578125,2.515625,5.03125,10.0625],[1519.99072265625,1520.05322265625,1520.17822265625,1520.42822265625],[1519.99072265625,1520.05322265625,1520.17822265625,1520.42822265625],[3039.9814453125,3040.1064453125,3040.3564453125,3040.8564453125],[3.953125,7.90625,15.8125,31.625]]}],label:"full",method:"restyle"}],direction:"down",showactive:!0,x:1.035,xanchor:"left",y:.4,yanchor:"top"}],barmode:"stack",xaxis:{title:{text:"Sequence Length"}},yaxis:{title:{text:"Memory (GB)"},range:[0,203.1547058105469]},width:850,height:500,annotations:[{showarrow:!1,text:"Model Size:",x:1.035,xanchor:"left",xref:"paper",y:.6,yanchor:"bottom",yref:"paper"},{showarrow:!1,text:"Recomputation:",x:1.035,xanchor:"left",xref:"paper",y:.4,yanchor:"bottom",yref:"paper"}]},{responsive:!0,scrollZoom:!1})</script> </div> | |

