File size: 9,093 Bytes
			
			| 60aea95 | 1 | <div>                            <div id=f6b00dd8-6230-46cf-9b38-f7fd425a1dd3 class=plotly-graph-div style="height:400px; width:1000px;"></div>            <script>window.PLOTLYENV=window.PLOTLYENV||{},document.getElementById("f6b00dd8-6230-46cf-9b38-f7fd425a1dd3")&&Plotly.newPlot("f6b00dd8-6230-46cf-9b38-f7fd425a1dd3",[{marker:{color:"#4ea5b7"},name:"Throughput (tokens/sec/GPU)",width:.7,x:["8","16","32","64","128","256"],y:[40149.94,37609.69,35367.61,31112.23,26446.44,15700.38],type:"bar",xaxis:"x",yaxis:"y"},{base:[37609.69],marker:{color:"#e889ab"},name:"Performance Drop",showlegend:!0,width:.0875,x:["16"],y:[2540.25],type:"bar",xaxis:"x",yaxis:"y"},{base:[35367.61],marker:{color:"#e889ab"},showlegend:!1,width:.0875,x:["32"],y:[2242.0800000000017],type:"bar",xaxis:"x",yaxis:"y"},{base:[31112.23],marker:{color:"#e889ab"},showlegend:!1,width:.0875,x:["64"],y:[4255.380000000001],type:"bar",xaxis:"x",yaxis:"y"},{base:[26446.44],marker:{color:"#e889ab"},showlegend:!1,width:.0875,x:["128"],y:[4665.790000000001],type:"bar",xaxis:"x",yaxis:"y"},{base:[15700.38],marker:{color:"#e889ab"},showlegend:!1,width:.0875,x:["256"],y:[10746.06],type:"bar",xaxis:"x",yaxis:"y"},{line:{color:"#e889ab"},marker:{color:"#e889ab"},mode:"lines+markers",name:"Memory Usage (GB)",x:["8","16","32","64","128","256"],y:[36.66,36.66,36.66,36.66,36.66,36.66],type:"scatter",xaxis:"x2",yaxis:"y2"}],{template:{data:{histogram2dcontour:[{type:"histogram2dcontour",colorbar:{outlinewidth:0,ticks:""},colorscale:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]]}],choropleth:[{type:"choropleth",colorbar:{outlinewidth:0,ticks:""}}],histogram2d:[{type:"histogram2d",colorbar:{outlinewidth:0,ticks:""},colorscale:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]]}],heatmap:[{type:"heatmap",colorbar:{outlinewidth:0,ticks:""},colorscale:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]]}],heatmapgl:[{type:"heatmapgl",colorbar:{outlinewidth:0,ticks:""},colorscale:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]]}],contourcarpet:[{type:"contourcarpet",colorbar:{outlinewidth:0,ticks:""}}],contour:[{type:"contour",colorbar:{outlinewidth:0,ticks:""},colorscale:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]]}],surface:[{type:"surface",colorbar:{outlinewidth:0,ticks:""},colorscale:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]]}],mesh3d:[{type:"mesh3d",colorbar:{outlinewidth:0,ticks:""}}],scatter:[{fillpattern:{fillmode:"overlay",size:10,solidity:.2},type:"scatter"}],parcoords:[{type:"parcoords",line:{colorbar:{outlinewidth:0,ticks:""}}}],scatterpolargl:[{type:"scatterpolargl",marker:{colorbar:{outlinewidth:0,ticks:""}}}],bar:[{error_x:{color:"#2a3f5f"},error_y:{color:"#2a3f5f"},marker:{line:{color:"#E5ECF6",width:.5},pattern:{fillmode:"overlay",size:10,solidity:.2}},type:"bar"}],scattergeo:[{type:"scattergeo",marker:{colorbar:{outlinewidth:0,ticks:""}}}],scatterpolar:[{type:"scatterpolar",marker:{colorbar:{outlinewidth:0,ticks:""}}}],histogram:[{marker:{pattern:{fillmode:"overlay",size:10,solidity:.2}},type:"histogram"}],scattergl:[{type:"scattergl",marker:{colorbar:{outlinewidth:0,ticks:""}}}],scatter3d:[{type:"scatter3d",line:{colorbar:{outlinewidth:0,ticks:""}},marker:{colorbar:{outlinewidth:0,ticks:""}}}],scattermapbox:[{type:"scattermapbox",marker:{colorbar:{outlinewidth:0,ticks:""}}}],scatterternary:[{type:"scatterternary",marker:{colorbar:{outlinewidth:0,ticks:""}}}],scattercarpet:[{type:"scattercarpet",marker:{colorbar:{outlinewidth:0,ticks:""}}}],carpet:[{aaxis:{endlinecolor:"#2a3f5f",gridcolor:"white",linecolor:"white",minorgridcolor:"white",startlinecolor:"#2a3f5f"},baxis:{endlinecolor:"#2a3f5f",gridcolor:"white",linecolor:"white",minorgridcolor:"white",startlinecolor:"#2a3f5f"},type:"carpet"}],table:[{cells:{fill:{color:"#EBF0F8"},line:{color:"white"}},header:{fill:{color:"#C8D4E3"},line:{color:"white"}},type:"table"}],barpolar:[{marker:{line:{color:"#E5ECF6",width:.5},pattern:{fillmode:"overlay",size:10,solidity:.2}},type:"barpolar"}],pie:[{automargin:!0,type:"pie"}]},layout:{autotypenumbers:"strict",colorway:["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],font:{color:"#2a3f5f"},hovermode:"closest",hoverlabel:{align:"left"},paper_bgcolor:"white",plot_bgcolor:"#E5ECF6",polar:{bgcolor:"#E5ECF6",angularaxis:{gridcolor:"white",linecolor:"white",ticks:""},radialaxis:{gridcolor:"white",linecolor:"white",ticks:""}},ternary:{bgcolor:"#E5ECF6",aaxis:{gridcolor:"white",linecolor:"white",ticks:""},baxis:{gridcolor:"white",linecolor:"white",ticks:""},caxis:{gridcolor:"white",linecolor:"white",ticks:""}},coloraxis:{colorbar:{outlinewidth:0,ticks:""}},colorscale:{sequential:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]],sequentialminus:[[0,"#0d0887"],[.1111111111111111,"#46039f"],[.2222222222222222,"#7201a8"],[.3333333333333333,"#9c179e"],[.4444444444444444,"#bd3786"],[.5555555555555556,"#d8576b"],[.6666666666666666,"#ed7953"],[.7777777777777778,"#fb9f3a"],[.8888888888888888,"#fdca26"],[1,"#f0f921"]],diverging:[[0,"#8e0152"],[.1,"#c51b7d"],[.2,"#de77ae"],[.3,"#f1b6da"],[.4,"#fde0ef"],[.5,"#f7f7f7"],[.6,"#e6f5d0"],[.7,"#b8e186"],[.8,"#7fbc41"],[.9,"#4d9221"],[1,"#276419"]]},xaxis:{gridcolor:"white",linecolor:"white",ticks:"",title:{standoff:15},zerolinecolor:"white",automargin:!0,zerolinewidth:2},yaxis:{gridcolor:"white",linecolor:"white",ticks:"",title:{standoff:15},zerolinecolor:"white",automargin:!0,zerolinewidth:2},scene:{xaxis:{backgroundcolor:"#E5ECF6",gridcolor:"white",linecolor:"white",showbackground:!0,ticks:"",zerolinecolor:"white",gridwidth:2},yaxis:{backgroundcolor:"#E5ECF6",gridcolor:"white",linecolor:"white",showbackground:!0,ticks:"",zerolinecolor:"white",gridwidth:2},zaxis:{backgroundcolor:"#E5ECF6",gridcolor:"white",linecolor:"white",showbackground:!0,ticks:"",zerolinecolor:"white",gridwidth:2}},shapedefaults:{line:{color:"#2a3f5f"}},annotationdefaults:{arrowcolor:"#2a3f5f",arrowhead:0,arrowwidth:1},geo:{bgcolor:"white",landcolor:"#E5ECF6",subunitcolor:"white",showland:!0,showlakes:!0,lakecolor:"white"},title:{x:.05},mapbox:{style:"light"}}},xaxis:{anchor:"y",domain:[0,.45],title:{text:"Data Parallelism (DP)"},showgrid:!0,gridcolor:"LightGray"},yaxis:{anchor:"x",domain:[0,1],title:{text:"Throughput (tokens/sec/GPU)"},showgrid:!0,gridcolor:"LightGray"},xaxis2:{anchor:"y2",domain:[.55,1],title:{text:"Data Parallelism (DP)"},showgrid:!0,gridcolor:"LightGray"},yaxis2:{anchor:"x2",domain:[0,1],title:{text:"Memory Usage (GB)"},showgrid:!0,gridcolor:"LightGray"},annotations:[{font:{size:16},showarrow:!1,text:"Throughput Scaling with Data Parallelism",x:.225,xanchor:"center",xref:"paper",y:1,yanchor:"bottom",yref:"paper"},{font:{size:16},showarrow:!1,text:"Memory Usage Scaling with Data Parallelism",x:.775,xanchor:"center",xref:"paper",y:1,yanchor:"bottom",yref:"paper"},{font:{color:"#e889ab"},showarrow:!1,text:"-6.3%",x:1,xanchor:"center",xref:"x",xshift:30,y:38879.815,yanchor:"middle",yref:"y"},{font:{color:"#e889ab"},showarrow:!1,text:"-6.0%",x:2,xanchor:"center",xref:"x",xshift:30,y:36488.65,yanchor:"middle",yref:"y"},{font:{color:"#e889ab"},showarrow:!1,text:"-12.0%",x:3,xanchor:"center",xref:"x",xshift:30,y:33239.92,yanchor:"middle",yref:"y"},{font:{color:"#e889ab"},showarrow:!1,text:"-15.0%",x:4,xanchor:"center",xref:"x",xshift:30,y:28779.335,yanchor:"middle",yref:"y"},{font:{color:"#e889ab"},showarrow:!1,text:"-40.6%",x:5,xanchor:"center",xref:"x",xshift:30,y:21073.41,yanchor:"middle",yref:"y"}],legend:{x:.55,y:1},width:1e3,height:400,barmode:"stack"},{responsive:!0})</script>        </div> | 
