VMS/0__Writings/kshalle

changeset 65:8bbaf1544726

perf-tuning -- added levels-of-UCC figure
author Sean Halle <seanhalle@yahoo.com>
date Mon, 09 Jul 2012 04:36:18 -0700
parents 06073dc28f72
children 56a95617a079
files 0__Papers/Holistic_Model/Perf_Tune/figures/UCC_Levels_2.pdf 0__Papers/Holistic_Model/Perf_Tune/figures/UCC_Levels_2.svg 0__Papers/Holistic_Model/Perf_Tune/latex/Holistic_Perf_Tuning.tex 0__Papers/VMS/VMS__Foundation_Paper/VMS__Full_conference_version/latex/VMS__Full_conf_paper.tex
diffstat 4 files changed, 574 insertions(+), 117 deletions(-) [+]
line diff
     1.1 Binary file 0__Papers/Holistic_Model/Perf_Tune/figures/UCC_Levels_2.pdf has changed
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/0__Papers/Holistic_Model/Perf_Tune/figures/UCC_Levels_2.svg	Mon Jul 09 04:36:18 2012 -0700
     2.3 @@ -0,0 +1,490 @@
     2.4 +<?xml version="1.0" encoding="UTF-8" standalone="no"?>
     2.5 +<!-- Created with Inkscape (http://www.inkscape.org/) -->
     2.6 +
     2.7 +<svg
     2.8 +   xmlns:dc="http://purl.org/dc/elements/1.1/"
     2.9 +   xmlns:cc="http://creativecommons.org/ns#"
    2.10 +   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
    2.11 +   xmlns:svg="http://www.w3.org/2000/svg"
    2.12 +   xmlns="http://www.w3.org/2000/svg"
    2.13 +   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
    2.14 +   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
    2.15 +   width="744.09448819"
    2.16 +   height="1052.3622047"
    2.17 +   id="svg2"
    2.18 +   version="1.1"
    2.19 +   inkscape:version="0.48.1 "
    2.20 +   sodipodi:docname="UCC_Levels_2.svg">
    2.21 +  <defs
    2.22 +     id="defs4">
    2.23 +    <inkscape:path-effect
    2.24 +       effect="spiro"
    2.25 +       id="path-effect4859"
    2.26 +       is_visible="true" />
    2.27 +    <marker
    2.28 +       inkscape:stockid="Arrow2Mend"
    2.29 +       orient="auto"
    2.30 +       refY="0.0"
    2.31 +       refX="0.0"
    2.32 +       id="Arrow2Mend"
    2.33 +       style="overflow:visible;">
    2.34 +      <path
    2.35 +         id="path17998"
    2.36 +         style="font-size:12.0;fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
    2.37 +         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
    2.38 +         transform="scale(0.6) rotate(180) translate(0,0)" />
    2.39 +    </marker>
    2.40 +    <marker
    2.41 +       inkscape:stockid="Arrow2Mstart"
    2.42 +       orient="auto"
    2.43 +       refY="0.0"
    2.44 +       refX="0.0"
    2.45 +       id="Arrow2Mstart"
    2.46 +       style="overflow:visible">
    2.47 +      <path
    2.48 +         id="path17995"
    2.49 +         style="font-size:12.0;fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round"
    2.50 +         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
    2.51 +         transform="scale(0.6) translate(0,0)" />
    2.52 +    </marker>
    2.53 +    <path
    2.54 +       style="fill:none;stroke:none"
    2.55 +       d="m 29.5,198.86218 c 25.5,-0.5 34.26,1.5 34.26,1.5 6.111967,2.74943 6.270559,39.8642 2.74,43 -2.636563,4.50583 -9.877722,4.341 -37,4.5"
    2.56 +       id="path17945"
    2.57 +       inkscape:connector-curvature="0"
    2.58 +       sodipodi:nodetypes="cccc" />
    2.59 +    <marker
    2.60 +       inkscape:stockid="Arrow2Mstart"
    2.61 +       orient="auto"
    2.62 +       refY="0"
    2.63 +       refX="0"
    2.64 +       id="Arrow2Mstart-9"
    2.65 +       style="overflow:visible">
    2.66 +      <path
    2.67 +         inkscape:connector-curvature="0"
    2.68 +         id="path3788"
    2.69 +         style="font-size:12px;fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
    2.70 +         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
    2.71 +         transform="scale(0.6,0.6)" />
    2.72 +    </marker>
    2.73 +    <marker
    2.74 +       inkscape:stockid="Arrow2Mend"
    2.75 +       orient="auto"
    2.76 +       refY="0"
    2.77 +       refX="0"
    2.78 +       id="Arrow2Mend-1"
    2.79 +       style="overflow:visible">
    2.80 +      <path
    2.81 +         inkscape:connector-curvature="0"
    2.82 +         id="path3791"
    2.83 +         style="font-size:12px;fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
    2.84 +         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
    2.85 +         transform="scale(-0.6,-0.6)" />
    2.86 +    </marker>
    2.87 +    <marker
    2.88 +       inkscape:stockid="Arrow2Mstart"
    2.89 +       orient="auto"
    2.90 +       refY="0"
    2.91 +       refX="0"
    2.92 +       id="marker5592"
    2.93 +       style="overflow:visible">
    2.94 +      <path
    2.95 +         inkscape:connector-curvature="0"
    2.96 +         id="path5594"
    2.97 +         style="font-size:12px;fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
    2.98 +         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
    2.99 +         transform="scale(0.6,0.6)" />
   2.100 +    </marker>
   2.101 +    <marker
   2.102 +       inkscape:stockid="Arrow2Mend"
   2.103 +       orient="auto"
   2.104 +       refY="0"
   2.105 +       refX="0"
   2.106 +       id="marker5596"
   2.107 +       style="overflow:visible">
   2.108 +      <path
   2.109 +         inkscape:connector-curvature="0"
   2.110 +         id="path5598"
   2.111 +         style="font-size:12px;fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
   2.112 +         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
   2.113 +         transform="scale(-0.6,-0.6)" />
   2.114 +    </marker>
   2.115 +  </defs>
   2.116 +  <sodipodi:namedview
   2.117 +     id="base"
   2.118 +     pagecolor="#ffffff"
   2.119 +     bordercolor="#666666"
   2.120 +     borderopacity="1.0"
   2.121 +     inkscape:pageopacity="0.0"
   2.122 +     inkscape:pageshadow="2"
   2.123 +     inkscape:zoom="1.33"
   2.124 +     inkscape:cx="497.19723"
   2.125 +     inkscape:cy="578.53934"
   2.126 +     inkscape:document-units="px"
   2.127 +     inkscape:current-layer="layer1"
   2.128 +     showgrid="false"
   2.129 +     inkscape:window-width="1600"
   2.130 +     inkscape:window-height="848"
   2.131 +     inkscape:window-x="-8"
   2.132 +     inkscape:window-y="-8"
   2.133 +     inkscape:window-maximized="1" />
   2.134 +  <metadata
   2.135 +     id="metadata7">
   2.136 +    <rdf:RDF>
   2.137 +      <cc:Work
   2.138 +         rdf:about="">
   2.139 +        <dc:format>image/svg+xml</dc:format>
   2.140 +        <dc:type
   2.141 +           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
   2.142 +        <dc:title></dc:title>
   2.143 +      </cc:Work>
   2.144 +    </rdf:RDF>
   2.145 +  </metadata>
   2.146 +  <g
   2.147 +     inkscape:label="Layer 1"
   2.148 +     inkscape:groupmode="layer"
   2.149 +     id="layer1">
   2.150 +    <g
   2.151 +       id="g15927">
   2.152 +      <g
   2.153 +         transform="matrix(0.93969262,-0.34202014,1.2652501,0.28372618,-323.47751,514.75128)"
   2.154 +         id="g15874">
   2.155 +        <rect
   2.156 +           transform="matrix(0.99714719,-0.07548172,-0.08765568,0.99615083,0,0)"
   2.157 +           style="fill:none;stroke:#000000;stroke-width:1.2038151"
   2.158 +           id="rect15876"
   2.159 +           width="276.67776"
   2.160 +           height="140.14508"
   2.161 +           x="103.80169"
   2.162 +           y="344.93823" />
   2.163 +      </g>
   2.164 +      <text
   2.165 +         transform="matrix(0.77410869,0.20583095,-0.80109956,1.0788006,0,0)"
   2.166 +         xml:space="preserve"
   2.167 +         style="font-size:15.35861111px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
   2.168 +         x="866.8421"
   2.169 +         y="303.21371"
   2.170 +         id="text15878"
   2.171 +         sodipodi:linespacing="125%"><tspan
   2.172 +           style="font-size:13.16452408px;text-align:start;text-anchor:start"
   2.173 +           id="tspan15880"
   2.174 +           sodipodi:role="line"
   2.175 +           x="866.8421"
   2.176 +           y="303.21371">Board-runtime-level Units</tspan><tspan
   2.177 +           style="font-size:13.16452408px;text-align:center;text-anchor:middle"
   2.178 +           sodipodi:role="line"
   2.179 +           x="866.8421"
   2.180 +           y="319.66937"
   2.181 +           id="tspan15882" /></text>
   2.182 +      <path
   2.183 +         sodipodi:type="arc"
   2.184 +         style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:5.70473766;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:5.7047378, 17.1142134;stroke-dashoffset:0"
   2.185 +         id="path15884"
   2.186 +         sodipodi:cx="189.43661"
   2.187 +         sodipodi:cy="304.47485"
   2.188 +         sodipodi:rx="114.08451"
   2.189 +         sodipodi:ry="24.647888"
   2.190 +         d="m 303.52113,304.47485 c 0,13.61266 -51.07738,24.64789 -114.08452,24.64789 -63.00713,0 -114.084506,-11.03523 -114.084506,-24.64789 0,-13.61265 51.077376,-24.64788 114.084506,-24.64788 63.00714,0 114.08452,11.03523 114.08452,24.64788 z"
   2.191 +         transform="matrix(0.24908935,-0.07600266,0.13110258,0.42907314,264.46655,470.36122)" />
   2.192 +      <path
   2.193 +         transform="matrix(0.24908935,-0.07600266,0.13110258,0.42907314,336.46655,434.36122)"
   2.194 +         d="m 303.52113,304.47485 c 0,13.61266 -51.07738,24.64789 -114.08452,24.64789 -63.00713,0 -114.084506,-11.03523 -114.084506,-24.64789 0,-13.61265 51.077376,-24.64788 114.084506,-24.64788 63.00714,0 114.08452,11.03523 114.08452,24.64788 z"
   2.195 +         sodipodi:ry="24.647888"
   2.196 +         sodipodi:rx="114.08451"
   2.197 +         sodipodi:cy="304.47485"
   2.198 +         sodipodi:cx="189.43661"
   2.199 +         id="path15886"
   2.200 +         style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:5.70473766;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:5.7047378, 17.1142134;stroke-dashoffset:0"
   2.201 +         sodipodi:type="arc" />
   2.202 +      <path
   2.203 +         transform="matrix(0.24908935,-0.07600266,0.13110258,0.42907314,218.46655,448.36122)"
   2.204 +         d="m 303.52113,304.47485 c 0,13.61266 -51.07738,24.64789 -114.08452,24.64789 -63.00713,0 -114.084506,-11.03523 -114.084506,-24.64789 0,-13.61265 51.077376,-24.64788 114.084506,-24.64788 63.00714,0 114.08452,11.03523 114.08452,24.64788 z"
   2.205 +         sodipodi:ry="24.647888"
   2.206 +         sodipodi:rx="114.08451"
   2.207 +         sodipodi:cy="304.47485"
   2.208 +         sodipodi:cx="189.43661"
   2.209 +         id="path15888"
   2.210 +         style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:5.70473766;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:5.7047378, 17.1142134;stroke-dashoffset:0"
   2.211 +         sodipodi:type="arc" />
   2.212 +      <path
   2.213 +         sodipodi:type="arc"
   2.214 +         style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:5.70473766;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:5.7047378, 17.1142134;stroke-dashoffset:0"
   2.215 +         id="path15890"
   2.216 +         sodipodi:cx="189.43661"
   2.217 +         sodipodi:cy="304.47485"
   2.218 +         sodipodi:rx="114.08451"
   2.219 +         sodipodi:ry="24.647888"
   2.220 +         d="m 303.52113,304.47485 c 0,13.61266 -51.07738,24.64789 -114.08452,24.64789 -63.00713,0 -114.084506,-11.03523 -114.084506,-24.64789 0,-13.61265 51.077376,-24.64788 114.084506,-24.64788 63.00714,0 114.08452,11.03523 114.08452,24.64788 z"
   2.221 +         transform="matrix(0.24908935,-0.07600266,0.13110258,0.42907314,284.46655,414.36122)" />
   2.222 +      <g
   2.223 +         id="g15898">
   2.224 +        <path
   2.225 +           transform="matrix(0.07884783,-0.02377253,0.04149978,0.13420785,327.5472,544.84749)"
   2.226 +           d="m 303.52113,304.47485 c 0,13.61266 -51.07738,24.64789 -114.08452,24.64789 -63.00713,0 -114.084506,-11.03523 -114.084506,-24.64789 0,-13.61265 51.077376,-24.64788 114.084506,-24.64788 63.00714,0 114.08452,11.03523 114.08452,24.64788 z"
   2.227 +           sodipodi:ry="24.647888"
   2.228 +           sodipodi:rx="114.08451"
   2.229 +           sodipodi:cy="304.47485"
   2.230 +           sodipodi:cx="189.43661"
   2.231 +           id="path15892"
   2.232 +           style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:10.69198513;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
   2.233 +           sodipodi:type="arc" />
   2.234 +        <path
   2.235 +           sodipodi:type="arc"
   2.236 +           style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:10.69198513;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
   2.237 +           id="path15894"
   2.238 +           sodipodi:cx="189.43661"
   2.239 +           sodipodi:cy="304.47485"
   2.240 +           sodipodi:rx="114.08451"
   2.241 +           sodipodi:ry="24.647888"
   2.242 +           d="m 303.52113,304.47485 c 0,13.61266 -51.07738,24.64789 -114.08452,24.64789 -63.00713,0 -114.084506,-11.03523 -114.084506,-24.64789 0,-13.61265 51.077376,-24.64788 114.084506,-24.64788 63.00714,0 114.08452,11.03523 114.08452,24.64788 z"
   2.243 +           transform="matrix(0.07884783,-0.02377253,0.04149978,0.13420785,307.5472,552.84749)" />
   2.244 +        <path
   2.245 +           transform="matrix(0.07884783,-0.02377253,0.04149978,0.13420785,333.5472,552.84749)"
   2.246 +           d="m 303.52113,304.47485 c 0,13.61266 -51.07738,24.64789 -114.08452,24.64789 -63.00713,0 -114.084506,-11.03523 -114.084506,-24.64789 0,-13.61265 51.077376,-24.64788 114.084506,-24.64788 63.00714,0 114.08452,11.03523 114.08452,24.64788 z"
   2.247 +           sodipodi:ry="24.647888"
   2.248 +           sodipodi:rx="114.08451"
   2.249 +           sodipodi:cy="304.47485"
   2.250 +           sodipodi:cx="189.43661"
   2.251 +           id="path15896"
   2.252 +           style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:10.69198513;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
   2.253 +           sodipodi:type="arc" />
   2.254 +      </g>
   2.255 +      <g
   2.256 +         transform="translate(-42,-24)"
   2.257 +         id="g15903">
   2.258 +        <path
   2.259 +           sodipodi:type="arc"
   2.260 +           style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:10.69198513;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
   2.261 +           id="path15905"
   2.262 +           sodipodi:cx="189.43661"
   2.263 +           sodipodi:cy="304.47485"
   2.264 +           sodipodi:rx="114.08451"
   2.265 +           sodipodi:ry="24.647888"
   2.266 +           d="m 303.52113,304.47485 c 0,13.61266 -51.07738,24.64789 -114.08452,24.64789 -63.00713,0 -114.084506,-11.03523 -114.084506,-24.64789 0,-13.61265 51.077376,-24.64788 114.084506,-24.64788 63.00714,0 114.08452,11.03523 114.08452,24.64788 z"
   2.267 +           transform="matrix(0.07884783,-0.02377253,0.04149978,0.13420785,327.5472,544.84749)" />
   2.268 +        <path
   2.269 +           transform="matrix(0.07884783,-0.02377253,0.04149978,0.13420785,307.5472,552.84749)"
   2.270 +           d="m 303.52113,304.47485 c 0,13.61266 -51.07738,24.64789 -114.08452,24.64789 -63.00713,0 -114.084506,-11.03523 -114.084506,-24.64789 0,-13.61265 51.077376,-24.64788 114.084506,-24.64788 63.00714,0 114.08452,11.03523 114.08452,24.64788 z"
   2.271 +           sodipodi:ry="24.647888"
   2.272 +           sodipodi:rx="114.08451"
   2.273 +           sodipodi:cy="304.47485"
   2.274 +           sodipodi:cx="189.43661"
   2.275 +           id="path15907"
   2.276 +           style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:10.69198513;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
   2.277 +           sodipodi:type="arc" />
   2.278 +        <path
   2.279 +           sodipodi:type="arc"
   2.280 +           style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:10.69198513;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
   2.281 +           id="path15909"
   2.282 +           sodipodi:cx="189.43661"
   2.283 +           sodipodi:cy="304.47485"
   2.284 +           sodipodi:rx="114.08451"
   2.285 +           sodipodi:ry="24.647888"
   2.286 +           d="m 303.52113,304.47485 c 0,13.61266 -51.07738,24.64789 -114.08452,24.64789 -63.00713,0 -114.084506,-11.03523 -114.084506,-24.64789 0,-13.61265 51.077376,-24.64788 114.084506,-24.64788 63.00714,0 114.08452,11.03523 114.08452,24.64788 z"
   2.287 +           transform="matrix(0.07884783,-0.02377253,0.04149978,0.13420785,333.5472,552.84749)" />
   2.288 +      </g>
   2.289 +      <g
   2.290 +         id="g15911"
   2.291 +         transform="translate(24,-60)">
   2.292 +        <path
   2.293 +           transform="matrix(0.07884783,-0.02377253,0.04149978,0.13420785,327.5472,544.84749)"
   2.294 +           d="m 303.52113,304.47485 c 0,13.61266 -51.07738,24.64789 -114.08452,24.64789 -63.00713,0 -114.084506,-11.03523 -114.084506,-24.64789 0,-13.61265 51.077376,-24.64788 114.084506,-24.64788 63.00714,0 114.08452,11.03523 114.08452,24.64788 z"
   2.295 +           sodipodi:ry="24.647888"
   2.296 +           sodipodi:rx="114.08451"
   2.297 +           sodipodi:cy="304.47485"
   2.298 +           sodipodi:cx="189.43661"
   2.299 +           id="path15913"
   2.300 +           style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:10.69198513;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
   2.301 +           sodipodi:type="arc" />
   2.302 +        <path
   2.303 +           sodipodi:type="arc"
   2.304 +           style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:10.69198513;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
   2.305 +           id="path15915"
   2.306 +           sodipodi:cx="189.43661"
   2.307 +           sodipodi:cy="304.47485"
   2.308 +           sodipodi:rx="114.08451"
   2.309 +           sodipodi:ry="24.647888"
   2.310 +           d="m 303.52113,304.47485 c 0,13.61266 -51.07738,24.64789 -114.08452,24.64789 -63.00713,0 -114.084506,-11.03523 -114.084506,-24.64789 0,-13.61265 51.077376,-24.64788 114.084506,-24.64788 63.00714,0 114.08452,11.03523 114.08452,24.64788 z"
   2.311 +           transform="matrix(0.07884783,-0.02377253,0.04149978,0.13420785,307.5472,552.84749)" />
   2.312 +        <path
   2.313 +           transform="matrix(0.07884783,-0.02377253,0.04149978,0.13420785,333.5472,552.84749)"
   2.314 +           d="m 303.52113,304.47485 c 0,13.61266 -51.07738,24.64789 -114.08452,24.64789 -63.00713,0 -114.084506,-11.03523 -114.084506,-24.64789 0,-13.61265 51.077376,-24.64788 114.084506,-24.64788 63.00714,0 114.08452,11.03523 114.08452,24.64788 z"
   2.315 +           sodipodi:ry="24.647888"
   2.316 +           sodipodi:rx="114.08451"
   2.317 +           sodipodi:cy="304.47485"
   2.318 +           sodipodi:cx="189.43661"
   2.319 +           id="path15917"
   2.320 +           style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:10.69198513;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
   2.321 +           sodipodi:type="arc" />
   2.322 +      </g>
   2.323 +      <g
   2.324 +         transform="translate(74,-38)"
   2.325 +         id="g15919">
   2.326 +        <path
   2.327 +           sodipodi:type="arc"
   2.328 +           style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:10.69198513;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
   2.329 +           id="path15921"
   2.330 +           sodipodi:cx="189.43661"
   2.331 +           sodipodi:cy="304.47485"
   2.332 +           sodipodi:rx="114.08451"
   2.333 +           sodipodi:ry="24.647888"
   2.334 +           d="m 303.52113,304.47485 c 0,13.61266 -51.07738,24.64789 -114.08452,24.64789 -63.00713,0 -114.084506,-11.03523 -114.084506,-24.64789 0,-13.61265 51.077376,-24.64788 114.084506,-24.64788 63.00714,0 114.08452,11.03523 114.08452,24.64788 z"
   2.335 +           transform="matrix(0.07884783,-0.02377253,0.04149978,0.13420785,327.5472,544.84749)" />
   2.336 +        <path
   2.337 +           transform="matrix(0.07884783,-0.02377253,0.04149978,0.13420785,307.5472,552.84749)"
   2.338 +           d="m 303.52113,304.47485 c 0,13.61266 -51.07738,24.64789 -114.08452,24.64789 -63.00713,0 -114.084506,-11.03523 -114.084506,-24.64789 0,-13.61265 51.077376,-24.64788 114.084506,-24.64788 63.00714,0 114.08452,11.03523 114.08452,24.64788 z"
   2.339 +           sodipodi:ry="24.647888"
   2.340 +           sodipodi:rx="114.08451"
   2.341 +           sodipodi:cy="304.47485"
   2.342 +           sodipodi:cx="189.43661"
   2.343 +           id="path15923"
   2.344 +           style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:10.69198513;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
   2.345 +           sodipodi:type="arc" />
   2.346 +        <path
   2.347 +           sodipodi:type="arc"
   2.348 +           style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:10.69198513;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
   2.349 +           id="path15925"
   2.350 +           sodipodi:cx="189.43661"
   2.351 +           sodipodi:cy="304.47485"
   2.352 +           sodipodi:rx="114.08451"
   2.353 +           sodipodi:ry="24.647888"
   2.354 +           d="m 303.52113,304.47485 c 0,13.61266 -51.07738,24.64789 -114.08452,24.64789 -63.00713,0 -114.084506,-11.03523 -114.084506,-24.64789 0,-13.61265 51.077376,-24.64788 114.084506,-24.64788 63.00714,0 114.08452,11.03523 114.08452,24.64788 z"
   2.355 +           transform="matrix(0.07884783,-0.02377253,0.04149978,0.13420785,333.5472,552.84749)" />
   2.356 +      </g>
   2.357 +    </g>
   2.358 +    <flowRoot
   2.359 +       xml:space="preserve"
   2.360 +       id="flowRoot5626"
   2.361 +       style="fill:black;stroke:none;stroke-opacity:1;stroke-width:1px;stroke-linejoin:miter;stroke-linecap:butt;fill-opacity:1;font-family:Sans;font-style:normal;font-weight:normal;font-size:14px;line-height:125%;letter-spacing:0px;word-spacing:0px"><flowRegion
   2.362 +         id="flowRegion5628"><rect
   2.363 +           id="rect5630"
   2.364 +           width="139.43661"
   2.365 +           height="30.985916"
   2.366 +           x="53.521126"
   2.367 +           y="332.64386" /></flowRegion><flowPara
   2.368 +         id="flowPara5632" /></flowRoot>    <path
   2.369 +       sodipodi:type="arc"
   2.370 +       style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:1.63552327999999990;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1.63552327999999990, 4.90656984000000040;stroke-dashoffset:0"
   2.371 +       id="path15864"
   2.372 +       sodipodi:cx="189.43661"
   2.373 +       sodipodi:cy="304.47485"
   2.374 +       sodipodi:rx="114.08451"
   2.375 +       sodipodi:ry="24.647888"
   2.376 +       d="m 303.52113,304.47485 a 114.08451,24.647888 0 1 1 -228.169026,0 114.08451,24.647888 0 1 1 228.169026,0 z"
   2.377 +       transform="matrix(0.86223914,-0.26671706,0.45382019,1.5057516,59.557893,51.735663)" />
   2.378 +    <path
   2.379 +       sodipodi:type="arc"
   2.380 +       style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:5.70473766;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
   2.381 +       id="path15050"
   2.382 +       sodipodi:cx="189.43661"
   2.383 +       sodipodi:cy="304.47485"
   2.384 +       sodipodi:rx="114.08451"
   2.385 +       sodipodi:ry="24.647888"
   2.386 +       d="m 303.52113,304.47485 a 114.08451,24.647888 0 1 1 -228.169026,0 114.08451,24.647888 0 1 1 228.169026,0 z"
   2.387 +       transform="matrix(0.24908935,-0.07600266,0.13110258,0.42907314,264.46655,370.36122)" />
   2.388 +    <path
   2.389 +       transform="matrix(0.24908935,-0.07600266,0.13110258,0.42907314,336.46655,334.36122)"
   2.390 +       d="m 303.52113,304.47485 a 114.08451,24.647888 0 1 1 -228.169026,0 114.08451,24.647888 0 1 1 228.169026,0 z"
   2.391 +       sodipodi:ry="24.647888"
   2.392 +       sodipodi:rx="114.08451"
   2.393 +       sodipodi:cy="304.47485"
   2.394 +       sodipodi:cx="189.43661"
   2.395 +       id="path15858"
   2.396 +       style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:5.70473766;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
   2.397 +       sodipodi:type="arc" />
   2.398 +    <path
   2.399 +       transform="matrix(0.24908935,-0.07600266,0.13110258,0.42907314,218.46655,348.36122)"
   2.400 +       d="m 303.52113,304.47485 a 114.08451,24.647888 0 1 1 -228.169026,0 114.08451,24.647888 0 1 1 228.169026,0 z"
   2.401 +       sodipodi:ry="24.647888"
   2.402 +       sodipodi:rx="114.08451"
   2.403 +       sodipodi:cy="304.47485"
   2.404 +       sodipodi:cx="189.43661"
   2.405 +       id="path15856"
   2.406 +       style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:5.70473766;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
   2.407 +       sodipodi:type="arc" />
   2.408 +    <path
   2.409 +       sodipodi:type="arc"
   2.410 +       style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:5.70473766;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
   2.411 +       id="path15860"
   2.412 +       sodipodi:cx="189.43661"
   2.413 +       sodipodi:cy="304.47485"
   2.414 +       sodipodi:rx="114.08451"
   2.415 +       sodipodi:ry="24.647888"
   2.416 +       d="m 303.52113,304.47485 a 114.08451,24.647888 0 1 1 -228.169026,0 114.08451,24.647888 0 1 1 228.169026,0 z"
   2.417 +       transform="matrix(0.24908935,-0.07600266,0.13110258,0.42907314,284.46655,314.36122)" />
   2.418 +    <g
   2.419 +       transform="matrix(0.93969262,-0.34202014,1.2652501,0.28372618,-323.47751,414.75128)"
   2.420 +       id="g15040">
   2.421 +      <rect
   2.422 +         transform="matrix(0.99714719,-0.07548172,-0.08765568,0.99615083,0,0)"
   2.423 +         style="fill:none;stroke:#000000;stroke-width:1.2038151"
   2.424 +         id="rect15042"
   2.425 +         width="276.67776"
   2.426 +         height="140.14508"
   2.427 +         x="103.80169"
   2.428 +         y="344.93823" />
   2.429 +    </g>
   2.430 +    <g
   2.431 +       id="g15866"
   2.432 +       transform="translate(0,-30)">
   2.433 +      <text
   2.434 +         transform="matrix(0.77410869,0.20583095,-0.80109956,1.0788006,0,0)"
   2.435 +         xml:space="preserve"
   2.436 +         style="font-size:15.35861111px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
   2.437 +         x="808.13654"
   2.438 +         y="179.07872"
   2.439 +         id="text4698"
   2.440 +         sodipodi:linespacing="125%"><tspan
   2.441 +           style="font-size:13.16452408px;text-align:start;text-anchor:start"
   2.442 +           id="tspan4700"
   2.443 +           sodipodi:role="line"
   2.444 +           x="808.13654"
   2.445 +           y="179.07872">OS-level Unit</tspan><tspan
   2.446 +           style="font-size:13.16452408px;text-align:center;text-anchor:middle"
   2.447 +           sodipodi:role="line"
   2.448 +           x="808.13654"
   2.449 +           y="195.53438"
   2.450 +           id="tspan4702" /></text>
   2.451 +      <g
   2.452 +         transform="matrix(0.93969262,-0.34202014,1.2652501,0.28372618,-323.47751,354.75128)"
   2.453 +         id="g19763">
   2.454 +        <rect
   2.455 +           transform="matrix(0.99714719,-0.07548172,-0.08765568,0.99615083,0,0)"
   2.456 +           style="fill:none;stroke:#000000;stroke-width:1.2038151"
   2.457 +           id="rect19761"
   2.458 +           width="276.67776"
   2.459 +           height="140.14508"
   2.460 +           x="103.80169"
   2.461 +           y="344.93823" />
   2.462 +      </g>
   2.463 +      <path
   2.464 +         sodipodi:type="arc"
   2.465 +         style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:1.63552332;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
   2.466 +         id="path15020"
   2.467 +         sodipodi:cx="189.43661"
   2.468 +         sodipodi:cy="304.47485"
   2.469 +         sodipodi:rx="114.08451"
   2.470 +         sodipodi:ry="24.647888"
   2.471 +         d="m 303.52113,304.47485 c 0,13.61266 -51.07738,24.64789 -114.08452,24.64789 -63.00713,0 -114.084506,-11.03523 -114.084506,-24.64789 0,-13.61265 51.077376,-24.64788 114.084506,-24.64788 63.00714,0 114.08452,11.03523 114.08452,24.64788 z"
   2.472 +         transform="matrix(0.86223914,-0.26671706,0.45382019,1.5057516,59.557893,-8.2643372)" />
   2.473 +    </g>
   2.474 +    <text
   2.475 +       transform="matrix(0.77410869,0.20583095,-0.80109956,1.0788006,0,0)"
   2.476 +       xml:space="preserve"
   2.477 +       style="font-size:15.35861111px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans"
   2.478 +       x="788.88983"
   2.479 +       y="225.39107"
   2.480 +       id="text15044"
   2.481 +       sodipodi:linespacing="125%"><tspan
   2.482 +         style="font-size:13.16452408px;text-align:start;text-anchor:start"
   2.483 +         id="tspan15046"
   2.484 +         sodipodi:role="line"
   2.485 +         x="788.88983"
   2.486 +         y="225.39107">Rack-runtime-level Units</tspan><tspan
   2.487 +         style="font-size:13.16452408px;text-align:center;text-anchor:middle"
   2.488 +         sodipodi:role="line"
   2.489 +         x="788.88983"
   2.490 +         y="241.84673"
   2.491 +         id="tspan15048" /></text>
   2.492 +  </g>
   2.493 +</svg>
     3.1 --- a/0__Papers/Holistic_Model/Perf_Tune/latex/Holistic_Perf_Tuning.tex	Sat Jul 07 21:43:23 2012 -0700
     3.2 +++ b/0__Papers/Holistic_Model/Perf_Tune/latex/Holistic_Perf_Tuning.tex	Mon Jul 09 04:36:18 2012 -0700
     3.3 @@ -408,7 +408,7 @@
     3.4  
     3.5  \begin{figure}[ht]
     3.6    \centering
     3.7 -  \includegraphics[width = 2in, height = 1.8in]{../figures/UCC_levels.pdf}
     3.8 +  \includegraphics[width = 3.2in, height = 1.8in]{../figures/UCC_levels_2.pdf}
     3.9    \caption{Representation of multiple levels of  UCC.}
    3.10    \label{fig:UCC_Levels}
    3.11  \end{figure}
     4.1 --- a/0__Papers/VMS/VMS__Foundation_Paper/VMS__Full_conference_version/latex/VMS__Full_conf_paper.tex	Sat Jul 07 21:43:23 2012 -0700
     4.2 +++ b/0__Papers/VMS/VMS__Foundation_Paper/VMS__Full_conference_version/latex/VMS__Full_conf_paper.tex	Mon Jul 09 04:36:18 2012 -0700
     4.3 @@ -85,10 +85,10 @@
     4.4  
     4.5  \begin{abstract}
     4.6  
     4.7 -Software has not been keeping up with new parallel hardware, which slows the economy and retards adoption of  new hardware. Many believe the productivity and portability challenges of parallel software can be solved with domain-specific languages. But adoption is hindered by practical problems due to the small user-base, which means the language development time must be small and porting it across machines must be low effort.
     4.8 +Software has not been keeping up with new parallel hardware, an many believe the productivity and portability challenges of parallel software can be solved with domain-specific languages. But adoption is hindered by practical problems due to the small user-base, which means the language development time must be small and porting it across machines must be low effort.
     4.9  
    4.10  
    4.11 -To address this,  we propose the proto-runtime, which is a full runtime, but with two key pieces  replaced with an interface. A new language is created just by providing: 1) the behavior of language constructs and 2) assignment of work onto hardware resources.  The pieces are simplified by keeping concurrency issues inside the proto-runtime, so the pieces are implemented using sequential reasoning. The high reuse of the proto-runtime allows intense hardware-specific tuning, which all languages inherit, keeping overhead low. 
    4.12 +We address this by introducing the proto-runtime, which is a full runtime, but with two key pieces  replaced with an interface, allowing a new domain-specific language to be created just by providing code for: 1) handling dependencies of language constructs and 2) assignment of ready work onto hardware resources.  The pieces are simplified by keeping concurrency issues inside the proto-runtime, so the pieces are implemented using sequential reasoning. The high reuse of the proto-runtime allows intense hardware-specific tuning inside it, which all languages inherit, keeping overhead low. 
    4.13  
    4.14  
    4.15  
    4.16 @@ -364,145 +364,112 @@
    4.17  
    4.18  
    4.19  
    4.20 +%%%%%%%%%%%%%%%%%%%%%
    4.21 +\section{Getting my head around this}
    4.22 +Problem: parallel programming hard and costly
    4.23 +Soln: domain specific
    4.24 +DS problem: practical -- small user base, so needs to be low dev effort and low porting effort
    4.25 +Soln: modularize runtime, to reduce part have to mess with, hide part that has low-level details, reuse low-level tuning effort, and reuse lang-spec parts.
    4.26 +Benefits: lang impl doesn't have to touch low-level details, inherit centralized services, can reuse code from other languages to add features.
    4.27 +
    4.28 +Demonstrate this:
    4.29 +All the languages have copied singleton, atomic, critical section and transaction. In VOMP, took the task code from VSS, in VSS, took the send and receive code from SSR..  for DKU, took the code almost verbatim from earlier incarnation of these ideas, and welded it into SSR, and took VSs tasks and put into SSR. Thus, circle completes.. VSs took from SSR, now SSR takes from VSs..  pieces and parts are being borrowed all over the place and welded in where they're needed.
    4.30 + 
    4.31 +Part of what makes this so easy is the dispatch pattern.. adding a construct reduces to adding into switch and writing handler..  borrow constructs by taking the handler from the other lang. 
    4.32 +
    4.33 +Another part is that code for the constructs is isolated from concurrency details, which are inside the proto-runtime. All the dynamic system issues, and best way to impl locks, and need for fences, and so on is isolated from the construct logic.  This isolation is also how porting effort is lowered (or in many cases eliminated), and is how runtime performance is kept high.
    4.34 +
    4.35 +Performance must be high, or the labor savings don't matter.  By isolating the low-level details inside the proto-runtime, they can be intensively tuned, then all the languages inherit the effort.  Compare that to current practices, where the runtime code is monolithic.. each language has to separately modify the runtime, understanding and dealing with the concurrency, and then on a new machine, each language has to re-tune the low-level details, worrying about the consistency model on that machine, how its particular fence and atomic instructions work, and so on.
    4.36 +We spent 2 months performance tuning the current version, but only 18 hours implementing VSs on top of it, and VSs inherited the benefit from all that effort.  So did VOMP, and SSR, and VCilk, and so on..  each time we improved the proto-runtime, all the languages improved, with no effort on the part of the language creator. 
    4.37 +
    4.38 +In addition to runtime performance, application level performance must be high.  The runtime's performance only affects overhead, and so is only a factor for small work-unit (task) sizes.  But data affinity affects performance for all work.  The proto-runtime approach partially addresses this by giving the language the opportunity to directly control placement of work.  This isn't possible when building on top of threads, because the scheduling is in a separate, lower-level, layer where assignment of work to core is made in isolation, blind to language constructs and other application features.
    4.39  
    4.40  %%%%%%%%%%%%%%%%%%%%%
    4.41 -\section{Context:  PStack}
    4.42 +\section{Conclusion}
    4.43  
    4.44 +We have shown how the proto-runtime approach modularizes runtime code, allowing the low-level hardware-intensive portion to be inherited by all languages built on top of it.  This modularization reduces the effort of implementing a new language, especially an embedded-style one, by simplifying the language-specific portion, reducing it, and  reusing it, as seen in practice. While reducing the effort, the proto-runtime approach also maintains high runtime performance, by separating out the tuning-intensive part, so the tuning effort is amortized over all the languages, which inherit the improved runtime performance without any effort by the language creators.
    4.45  
    4.46 +We have demonstrated the benefits of centralized services inherited by the languages, such as debugging facilities, automated verification, concurrency handling, hardware performance information gathering, and so on.  By genericizing these
    4.47  
    4.48 -As seen in Figure \ref{figPStack}, at the top, a standard set of information is defined, which must be gathered from the application.  Current languages don't capture all the required information. So PStack defines a set of constructs to be added to a language to fill its gaps. The added constructs are denoted ``+P'' appended to the language name.
    4.49 +We have also demonstrated how application features and language features gain access to the decision-maker for which core work is assigned to.  This provides the language with the opportunity to directly control placement of work, which isn't possible when building on top of a thread package.
    4.50  
    4.51 -In the middle, standard runtimes require too much effort to create, and discourage reusing schedulers across languages.  So PStack defines a hardware abstraction that removes as much as possible from the runtime, including concurrency in the runtime itself.  The abstraction makes the runtimes all have similar structure, which simplifies reuse of complex scheduler code among languages.
    4.52 +%%%%%%%%%%%%%%%%%%%%%
    4.53 +\section{Supporting Claims}
    4.54  
    4.55 -At the bottom, performance of the runtime itself requires intense low-level hand-tuning and debugging.  This is captured inside the implementation of the abstraction.  It is done once for each hardware target, then reused across the runtimes from all languages.  So the intense hand-tuning is taken out of the runtimes, in the middle layer, while it benefits all  languages and hence applications in the higher layers.
    4.56 +============================================================================
    4.57 +=
    4.58 +=      Claims section of Intro -- drives paper
    4.59 +=
    4.60 +============================================================================
    4.61  
    4.62 -\subsection{How VMS  influences the stack}
    4.63  
    4.64 -VMS was chosen as the abstraction in the bottom layer. However, VMS affects multiple interfaces and layers of the stack. At the top, it determines the way parallel constructs are embedded into base languages, and how custom-syntax languages generate their runtime-interactions.  Next, between the top and middle, VMS defines the interface for the language layer to talk to the runtime layer. Then within the middle layer, VMS defines two standard function prototypes, so that a runtime consists of implementations of just these two functions.  Between middle and bottom, VMS defines a number of services that runtimes in the middle can call, and also defines the interaction between the VMS-implementation and the two runtime functions.
    4.65 +Overall Claim: 1) speed up dev of D.S  2) take perf tuning out of app, put into lang  3) Low overhead
    4.66  
    4.67 +In detail, we claim the following features and benefits.
    4.68 +We claim our interface has the following features:
    4.69 +-] It modularizes runtimes, cleanly separating out the language-specific parts.
    4.70 +-] The language-specific parts *inherit* the performance effort put into the base proto-runtime as demonstrated in subsection X
    4.71 +-] Services are centralized in the proto-runtime, and so inherited by the new language with small or no extra effort as demonstrated in subsection X
    4.72 +-] The language directly controls hardware resources, as described in  subsection X. This enables assignment that uses construct-implied information to reduce movement of data, for high performance.
    4.73 +-] The language-specific portion can be treated as trusted code
    4.74 +-] Makes it practical to reuse behavior and assignment (scheduling) code as demonstrated in subsection X
    4.75 +-] domain-constructs co-designed w/resource assignment (not possible w/library, and higher perf due to control over comm pattern)
    4.76  
    4.77 -\begin{figure*}[ht]
    4.78 -\center
    4.79 - \includegraphics[width=6in]{Portability_stack_combined.pdf} 
    4.80 - \caption
    4.81 - {Depiction of PStack, with layers named on the left, and interfaces between layers named on the right. At the top are  toolchains plus specializers, in the middle are runtimes connecting languages to hardware, and below that are hardware abstractions that collect similar hardware below a single interface and simplify runtime implementation.
    4.82 -  }
    4.83 -\label{figPStack}
    4.84 +Two kinds of services:
    4.85 +-] Visible to application-writer, vs visible to language implementer
    4.86 +-- --] These include services visible to the application programmer such as debugging, verification, and performance tuning
    4.87 +-- --] These also include services for the runtime implementer such as generic hardware information related to performance, generic performance counters in the form relevant to assigner-writers, optimized versions of data structures commonly used inside construct behavior implementations.
    4.88  
    4.89 -\end{figure*}
    4.90 +We further claim that these features lead to the following benefits:
    4.91 +-] Good runtime overhead performance
    4.92 +-] Ultra low time to create a new langauge runtime
    4.93 +-] Consequent reduced time to port a language to new hardware (assuming the proto-runtime is available for that hardware)
    4.94 +-] Amortized effort of proto-runtime, across many languages 
    4.95 +-] Attractive to reuse language implementation of constructs and assignment (subsection X)
    4.96 +-] Improved overhead performance achieved from a fixed amount of impl effort
    4.97 +-] Improved application visible features achieved from a given effort
    4.98 +-] Enables high application performance
    4.99 +-] Reduces application-effort to achieve high app-perf (due to domain-constructs pulled out of app and into lang, where integrated w/resource assignment)
   4.100  
   4.101 + (due to lang trusted and controlling resources for low-comm placement) 
   4.102  
   4.103 +=========================================================================
   4.104 + 
   4.105 +What have to show to support Features Claims: 
   4.106 +-] details of *things in action* that contribute to simplification
   4.107 +-- --] interface details.. what's involved with creating a plugin.. example (modular reduces effort of learning and effort of creating..  freedom from details of internals reduces effort)
   4.108 +-- --] services avail to plugin writer, as helpers (helpers reduce effort)
   4.109 +-- --] example of reuse of assigner code (reuse reduces effort)
   4.110 +-- --] example of reuse of construct code (singleton, atomic, trans, SSR into VSs) (reuse reduces effort)
   4.111  
   4.112 -%%%%%%%%%%%%%%%%%%%%%%%%%%%
   4.113 -\section{Requirements}
   4.114 +-] details of modularizing
   4.115 +-- --] interface details.. point out, in example of impl plugin, how the construct behavior is cleanly collected inside the handler, and the assignment behavior is cleanly collected inside the assigner.. more detail on assigner services avail to get hardware info
   4.116 +-- --] example of reusing SSR constructs inside VSs.. show how dispatch approach and separate handlers modularizes (also point out reuse here)
   4.117  
   4.118 +-] details of centralizing runtime perf tuning
   4.119 +-- --] In example, when going through code, point out that internal runtime communications are inside proto-runtime, and that these are what determine the overhead of runtime.
   4.120  
   4.121 +-] details of central services available.
   4.122 +-- --] app-services.. debugging phases, probes, perf tuning (companion paper), (planned) replay, (planned) verification (because interface provides simplifications and opportunities) 
   4.123 +-- --] plugin services.. send request to runtime, suspend VP, create VP, perf-counters for assigner use, migration of VP between cores
   4.124  
   4.125 -\subsection{Top Layer: Language Requirements}
   4.126 -The languages must be designed to capture all information required to specialize the source for high performance on any target hardware. A computation model, called The Holistic Model\cite{HolisticHome}, suggests that such a canonical set of information exists. 
   4.127 +-] Details of lang inside resource control.
   4.128 +-- --] when show assigner example, point out how lang is impl it, give example of constructs providing info the 
   4.129  
   4.130 -PStack proposes to develop the constructs that gather the canonical information set, where some constructs are in the form of specialization helpers such as task-resizers and layout modifiers. The application implements the specialization helpers, thereby encoding information about data structures and how to manipulate them.  The seeds of such an approach were laid with work on DKU\cite{DKUSourceForge}, which demonstrated the success of task-resizing constructs.
   4.131 +Measurements to support Benefits Claims: time-to-create for variety of languages, including at least one DSL from scratch.  Overhead in head-to-head comparisons.
   4.132  
   4.133 -PStack also calls for the use of the BLIS\cite{BLISHome} approach for managing multiple toolchains, where each toolchain specializes to a different target. The management covers the  install process, during which the correct toolchain output is paired to the installation target.  Further specialization can thus be naturally added during installation, when exact hardware details are known.  If required, runtime tuning and optimization also fit naturally  within the approach.
   4.134 +Done.
   4.135  
   4.136 -\subsection{Middle Layer: Runtime Requirements}
   4.137 +Creation simplification from: sequential plugin code -- show impl of at least two constructs (mutex and send)..  show equiv done with locks (?)
   4.138 +Simpl from: standard pattern
   4.139  
   4.140 -Below the top layer, a collection of runtime systems acts as a sort of cross-bar switch, connecting the languages above to the hardware abstractions below. Such a ``cross-bar'' switch made up of runtimes implies  a large number of runtimes. 
   4.141 +Hmmm.. actually have interactions, and VMS is a proposed compromise.. can do equally simple construct implementations using global lock..  but interface hides things like the random backoff had  to include for larger machines, in contrast, the simple CAS method grinds to a halt on the larger machine.
   4.142  
   4.143 -To be practical, the number of runtimes must be reduced; the effort of creating one must be reduced; and reuse of sophisticated runtime code must be encouraged. 
   4.144 +Okay -- claim THAT: VMS is a balance point btwn 
   4.145  
   4.146 -\subsection{Bottom Layer: Abstraction Requirements}
   4.147  
   4.148 -The primary purpose of the bottom abstraction is to reduce the effort of creating the runtime layer.
   4.149 -
   4.150 -\begin{itemize}
   4.151 -\item The abstraction must hide details, making multiple hardware targets present the same interface and use a common runtime.
   4.152 -\item The abstraction must hide low-level tuning of the runtime itself, like synchronization-related tuning.
   4.153 -\item The asbstraction must provide common services, such as handling internal synchronization of the runtime, creation of tasks, communication, etc.
   4.154 -\item The abstraction must create uniform patterns for runtime implementation, making reuse between runtimes more practical and reducing the effort of making multiple runtimes.
   4.155 -
   4.156 -\end{itemize}
   4.157 -
   4.158 -However, the abstraction must not hide \textit{application}-performance-critical information from the runtime, which holds the scheduler that decides when tasks become ready and where to execute them.  The scheduling choices need to know the communication paths and memory pools in the hardware, along with latency, bandwidth, capacity and computation rate.
   4.159 -
   4.160 -A single abstraction can't both hide details and expose those required by the runtimes to attain high \emph{application} performance.  Instead, PStack calls for a  family of abstractions, one for each major type of architecture, including a ``hierarchy'' abstraction used to glue together heterogeneous hardware.  In each, only the details critical to application performance are exposed to the scheduler in the runtime, thus keeping the number of abstractions needed manageably small, on the order of tens in total.
   4.161 -
   4.162 -
   4.163 -
   4.164 -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   4.165 -\section{Relating VMS Details to Requirements}
   4.166 -
   4.167 -Given the requirements, how does VMS meet them? We given more detail on VMS, at each place it affects the stack, and show how the details satisfy the requirements.
   4.168 -
   4.169 -
   4.170 -\subsection{Top-layer}
   4.171 -
   4.172 -With VMS, a language is implemented as either a collection of wrapper-library calls embedded into a base language, or as custom syntax. The  wrapper-lib functions call a primitive supplied by VMS that suspends the virtual-processor animating the call, and sends a request to the runtime. This same VMS primitive is also used to implement custom syntax, inside the compiler.  Thus, the VMS primitive is the means for the language layer to interact with the runtime layer. 
   4.173 -
   4.174 -
   4.175 -VMS is invisible to the application, only language constructs are visible -- either wrapper-library calls or custom syntax.  From the application-programmer point of view, even an embedded parallelism construct looks like a function call, albeit the data-structure of the virtual-processor animating the code has to be passed as a parameter to the wrapper-lib call.
   4.176 -
   4.177 -
   4.178 -\subsection{Interface from top to middle}
   4.179 -
   4.180 -The interface between application-executable and language-runtime is fixed, as the VMS-primitive that sends a request to the runtime. Even though PStack allows executables to be modified during installation or even runtime, via BLIS management of auto-tuners, multi-stage compilers, or binary re-writers, the VMS-primitive still must be used for the executable to interact with the runtime.  
   4.181 -
   4.182 -
   4.183 -Such a standard interaction mechanism serves not only to modularize the stack, cleanly separating runtime from toolchain, but also to decouple executable from VMS implementation. The VMS primitive is naturally a custom instruction, but can also be, a trap to the OS, a message sent on a port, or a function call -- given appropriate executable modification under BLIS.
   4.184 -
   4.185 -\subsection{Middle layer}
   4.186 -
   4.187 -
   4.188 -
   4.189 -VMS causes the middle-layer portion of a runtime to be implemented as two functions. The first is the request-handler, which is the part of a scheduler that handles constraints. It determines which work units (tasks) are ready to be animated (executed).
   4.190 - The other function, sched-assigner, assigns ready work to hardware. This provides uniform patterns for the runtimes.
   4.191 -
   4.192 -  
   4.193 -
   4.194 -When a request is ready for the runtime, VMS calls the request-handler function, and when  hardware is free for work, VMS calls the scheduler-assign function. Thus, the language portion of the runtime is passive. 
   4.195 -
   4.196 -By keeping control-flow inside VMS, the language-supplied portion of the runtime is simplified. Control flow includes any concurrency, and so is inside the VMS-implementation.  Hence, the language-supplied runtime functions are sequential code, even though they implement the \textit{semantics} of language-level synchronization constructs. This simplifies runtime implementation.
   4.197 -
   4.198 -This structure is also the reason VMS encourages reuse of scheduler code.  Scheduling is  sub-divided into distinct modules: constraint-management (IE enforcing dependencies); and assigning work to resources. The assignment module is especially straight-forward to share between languages.
   4.199 -
   4.200 -Because application performance is most strongly influenced by communication within the hardware, the assignment module is  critical. For high performance, it also tends to be complex.  Thus, simple reuse of it is a significant benefit.
   4.201 -
   4.202 -
   4.203 -\subsection{Interface from middle to bottom}
   4.204 -
   4.205 -VMS's plugin API is the interface between the runtime and the bottom abstraction-implementation. The API has calls to register  language-supplied runtime functions with the bottom abstraction, as well as support services. 
   4.206 -
   4.207 -
   4.208 -Reduction of the number of runtimes is accomplished this way. Hardware targets with similar structure present the same interface, requiring only one runtime. 
   4.209 -
   4.210 -Only structural elements that affect assignment choices are exposed in the API.  For example, memory hierarchy is exposed as a VMS-defined data-structure made available to the sched-assign function. The details in the data convey the connectivity, communication, and sizes, which the assigner may use to optimize choices.  
   4.211 -
   4.212 -\subsection{Bottom layer}
   4.213 -
   4.214 -The bottom layer consists of implementations of the VMS API and VMS primitives used in the upper levels, as well as the control-flow of the runtimes.  Each hardware platform has its own implementation, allowing low-level hand-tweaking. This effort is performed once per hardware target, so is amortized across applications. Pulling this tuning below the interface also simplifies the runtime-portion in the middle layer. 
   4.215 -
   4.216 -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   4.217 -\section{Experimental Setup}
   4.218 -The experiments to measure VMS overhead were run on three machines: a one-socket 2 core 3GHz workstation (``1x2''), a one-socket 4 core SandyBridge 3.3GHz workstation (``1x4''), and a four-socket by 10 core each Westmere EX 2.4GHz server (``4x10'').
   4.219 -
   4.220 -The code consists of two loops: the innermost is a single task, while the outer repeats that task a number of times.  The inner does throw-away work entirely within registers, where the number of iterations sets the amount of work in the task.  After the inner completes, a synchronization is performed, which pairs each task to a sync operation.  The outer then repeats the sequence of task-then-sync a large number of times to gain statistical accuracy and dominate any other sources of overhead.
   4.221 -
   4.222 -Two versions of the code were written: one that used pthread, a second that used a VMS-implemented equivalent called Vthread.  Both have the same semantics, differing only in the implementation of scheduling triggered by the  construct.  Hence, any difference in execution time is due to the difference in scheduling overhead.   
   4.223 -
   4.224 -%%%%%%%%%%%%%%%%%%%%%%%
   4.225 -\section{Results}
   4.226 -The new experimental results given in this paper focus on the overhead of the runtime, with the goal of showing that a language based on VMS enjoys low overhead compared to standard pthreads.  We illustrate the amount of overhead by plotting a curve whose shape is determined by the overhead.
   4.227 -
   4.228 -The curve  compares total CPU time to just work time. The difference is the overhead of scheduling,  which consists of: switching from application to scheduler; updating the sync-construct state; choosing a new thread to schedule; and deciding on which core to re-animate it.
   4.229 -  
   4.230 -The ratio of total CPU time to work time gets larger as the overhead increases, raising overhead's percent of the total.  When the ratio is exactly 2, the work time exactly equals the overhead.  Larger ratio indicates overhead dominates, smaller indicates work dominates.
   4.231 -
   4.232 -Hence, to find the size of the overhead,  find the size of task where the work in the task exactly equals the overhead of scheduling the task.  To do this, we plot the ratio on the y axis and single-task-time on x axis.  When the ratio equals 2, the cycles of work in the task equals the overhead of scheduling the task.  So the overhead can be read off the graph, as the task-size at the y=2 point.
   4.233 -
   4.234 -
   4.235 -\subsection{Performance Results}
   4.236 -
   4.237 -We executed on each of three machines.  On a given machine, we first executed the pthread version, then the Vthread version, with a variety of numbers of threads. Varying the number of threads shows the effect on scheduling time. For a given machine, both sets of curves are plotted on the same graph, to make direct comparison easy.
   4.238 +%%%%%%%%%%%%%%%%%%%%%
   4.239 +\section{Measurements}
   4.240  
   4.241  \begin{figure}[ht!]
   4.242   \includegraphics[width=2.3in, angle = -90]{plots_exec_vs_task_size/cray1_pthread_vthread_8_32_128_512thds__o30000__perfCtrs.eps}