ARMini faster than Iyonix
Chris Hall (132) 3558 posts |
I have a programme that stitches together four rather large (approx 10000×10000) sprites and I have found that this runs much faster on the ARMini than on either the Iyonix or (with smaller sprites to keep within the 28Mbyte limit) Virtual Risc PC on a fast Windows machine. In particular stitching together two 1bpp sprites forming LH and RH portions of an image. Results on Iyonix and ARMini: cropping etc pre join: Iy: 9sec ARMini: 4sec 10REM >Stitch4 20REM This programme stitches together four sprites 30REM First it takes sprite TL and TR and joins 40REM using a common point (measured from top left) 50REM Then it takes BL and BR similarly 60REM Then it takes TL and BL similary (but vertically) 70: 80DIM block% 150000000 90DIM name% 20, fname% 20 100: 110REM data indicates tl,tr,tlx,tly,trx,try 120REM bl,br,blx,bly,brx,bry 130REM "","",tlx,tly,blx,bly 140DATA "E452R","E451R",-1,-1,0,-1 150DATA "E455R","E454R",-1,0,0,0 160DATA "","",0,-1,0,0 170DATA "E457R","E456R",-1,-1,0,-1 180DATA "E458R","E459R",-1,0,0,0 190DATA "","",0,-1,0,0 200DATA "E460R","E463R",-1,-1,0,-1 210DATA "E461R","E462R",-1,0,0,0 220DATA "","",0,-1,0,0 230DATA "E465R","E464R",-1,-1,0,-1 240DATA "E466R","E467R",-1,0,0,0 250DATA "","",0,-1,0,0 260DATA "E469R","E468R",-1,-1,0,-1 270DATA "E470R","E471R",-1,0,0,0 280DATA "","",0,-1,0,0 290DATA "End" 300: 310i%=0 320READ TL$ 330REPEAT 340 PROCgetfile 350 *spool zdump/txt 360 *time 370 PROCprocess 380 PROCsave 390 *time 400 PRINT "Done!" 410 *spool 420 READ TL$ 430 i%+=1 440UNTIL TL$="End" 450END 460: 470DEFPROCdbg 480SYS "OS_SpriteOp",40+256,block%,"topl" TO ,,,ww1%,hh1% 490SYS "OS_SpriteOp",40+256,block%,"topr" TO ,,,ww2%,hh2% 500SYS "OS_SpriteOp",40+256,block%,"btml" TO ,,,ww3%,hh3% 510SYS "OS_SpriteOp",40+256,block%,"btmr" TO ,,,ww4%,hh4% 520PRINT "Four sprites sizes should be (";w1%;",";h1%;") (";w2%;",";h2%;") (";w3%;",";h3%;") (";w4%;",";h4%;")" 530PRINT "Four sprites sizes read back (";ww1%;",";hh1%;") (";ww2%;",";hh2%;") (";ww3%;",";hh3%;") (";ww4%;",";hh4%;")" 540ENDPROC 550: 560DEFPROCprocess 570PROCdbg 580REM Crop top rows on topl 590IF s1y%>s2y% THEN 600 REM crop top s1y%-s2y% rows - row0 at btm 610 PRINT "Crop top ";s1y%-s2y%;" rows on topl (no match on topr)" 620 REM rows R3 to R3-R4-1 are deleted, R4 negative 630 REM hence to delete top R rows R3=h-R R4=-R [rows h-1..h-R] 640 REM and to delete bottom R rows R3=0 R4=-R [rows R-1 .. 0] 650 REM and to delete LH C cols R3=0 R4=-C [cols 0 .. C-1] 660 REM and to delete RH C cols R3=w-C R4=-C [cols w-1..w-C] 670 SYS "OS_SpriteOp",57+256,block%,"topl",h1%+s2y%-s1y%,s2y%-s1y% 680 s5y%-=s1y%-s2y% 690 h1%-=s1y%-s2y% 700 s1y%=s2y% 710 PROCdbg 720ENDIF 730REM Crop btm rows on topl 740IF h1%-s1y%>h2%-s2y% THEN 750 REM crop bottom h1%+s2y%-s1y%-h2% rows 760 PRINT "Crop bottom ";h1%+s2y%-s1y%-h2%;" rows on topl (no match on topr)" 770 SYS "OS_SpriteOp",57+256,block%,"topl",0,h2%+s1y%-h1%-s2y% 780 h1%-=h1%+s2y%-s1y%-h2% 790 PROCdbg 800ENDIF 810REM Crop top rows on topr 820IF s2y%>s1y% THEN 830 REM crop top s2y%-s1y% rows 840 PRINT"Crop top ";s2y%-s1y%;" rows on topr (no match on topl)" 850 SYS "OS_SpriteOp",57+256,block%,"topr",h2%+s1y%-s2y%,s1y%-s2y% 860 h2%-=s2y%-s1y% 870 s2y%=s1y% 880 PROCdbg 890ENDIF 900REM Crop btm rows on topr 910IF h2%-s2y%>h1%-s1y% THEN 920 REM crop bottom h2%+s1y%-h1%-s2y% rows 930 PRINT "Crop bottom ";h2%+s1y%-h1%-s2y%;" rows on topr (no match on topl)" 940 SYS "OS_SpriteOp",57+256,block%,"topr",h2%+h1%+s2y%-h2%-s1y%,h1%+s2y%-h2%-s1y% 950 h2%-=h2%+s1y%-h1%-s2y% 960 PROCdbg 970ENDIF 980REM Crop RH w1%-s1x% cols in topl 990PRINT "Crop RH ";w1%-s1x%;" cols in topl (HH overlap)" 1000SYS "OS_SpriteOp",58+256,block%,"topl",w1%+s1x%-w1%,s1x%-w1% 1010w1%-=w1%-s1x% 1020PROCdbg 1030REM Crop LH cols on topl 1040IF s5x%>s6x% THEN 1050 REM crop LH s5x%-s6x% cols 1060 PRINT "Crop LH ";s5x%-s6x%;" cols on topl (no match on btml)" 1070 SYS "OS_SpriteOp",58+256,block%,"topl",0,s6x%-s5x% 1080 w1%-=s5x%-s6x% 1090 s5x%=s6x% 1100 PROCdbg 1110ENDIF 1120REM Crop LH s2x% cols in topr 1130PRINT "Crop LH ";s2x%;" cols in topr (HH overlap)" 1140SYS "OS_SpriteOp",58+256,block%,"topr",0,-s2x% 1150w2%-=s2x% 1160PROCdbg 1170w5%=w1%+w2% 1180h5%=h1% 1190REM crop btm h5%-s5y% rows on topl 1200PRINT "Crop btm ";h5%-s5y%;" rows on topl (V overlap)" 1210SYS "OS_SpriteOp",57+256,block%,"topl",0,s5y%-h5% 1220h1%-=h5%-s5y% 1230PROCdbg 1240REM crop btm h5%-s5y% rows on topr 1250PRINT "Crop btm ";h5%-s5y%;" rows on topr (V overlap)" 1260SYS "OS_SpriteOp",57+256,block%,"topr",0,s5y%-h5% 1270h2%-=h5%-s5y% 1280h5%-=h5%-s5y% 1290PROCdbg 1300: 1310REM Crop top rows on btml 1320IF s3y%>s4y% THEN 1330 REM crop top s3y%-s4y% rows 1340 PRINT "Crop top ";s3y%-s4y%;" rows on btml (no match on btmr)" 1350 SYS "OS_SpriteOp",57+256,block%,"btml",h3%+s4y%-s3y%,s4y%-s3y% 1360 s6y%-=s3y%-s4y% 1370 h3%-=s3y%-s4y% 1380 s3y%=s4y% 1390 PROCdbg 1400ENDIF 1410REM Crop btm rows on btml 1420IF h3%-s3y%>h4%-s4y% THEN 1430 REM crop bottom h3%+s4y%-s3y%-h4% rows 1440 PRINT "Crop bottom ";h3%+s4y%-s3y%-h4%;" rows on btml (no match on btmr)" 1450 SYS "OS_SpriteOp",57+256,block%,"btml",0,h4%+s3y%-s4y%-h3% 1460 h3%-=h3%+s4y%-s3y%-h4% 1470 PROCdbg 1480ENDIF 1490REM Crop top rows on btmr 1500IF s4y%>s3y% THEN 1510 REM crop top s4y%-s3y% rows 1520 PRINT "Crop top ";s4y%-s3y%;" rows on btmr (no match on btml)" 1530 SYS "OS_SpriteOp",57+256,block%,"btmr",h4%+s3y%-s4y%,s3y%-s4y% 1540 h4%-=s4y%-s3y% 1550 s4y%=s3y% 1560 PROCdbg 1570ENDIF 1580REM Crop btm rows on btmr 1590IF h4%-s4y%>h3%-s3y% THEN 1600 REM crop bottom h4%+s3y%-h3%-s4y% rows 1610 PRINT "Crop bottom ";h4%+s3y%-h3%-s4y%;" rows on btmr (no match on btml)" 1620 SYS "OS_SpriteOp",57+256,block%,"btmr",0,h3%+s4y%-h4%-s3y% 1630 h4%-=h4%+s3y%-h3%-s4y% 1640 PROCdbg 1650ENDIF 1660REM Crop RH w3%-s3x% cols in btml 1670PRINT "Crop RH ";w3%-s3x%;" cols in btml (HH overlap)" 1680SYS "OS_SpriteOp",58+256,block%,"btml",w3%+s3x%-w3%,s3x%-w3% 1690w3%-=w3%-s3x% 1700PROCdbg 1710REM Crop LH cols on btml 1720IF s6x%>s5x% THEN 1730 REM crop LH s6x%-s5x% cols 1740 PRINT "Crop LH ";s6x%-s5x%;" cols in btml (no match in topl)" 1750 SYS "OS_SpriteOp",58+256,block%,"btml",0,s5x%-s6x% 1760 w6%-=s6x%-s5x% 1770 w3%-=s6x%-s5x% 1780 s6x%=s5x% 1790 PROCdbg 1800ENDIF 1810REM Crop LH s4x% cols in btmr 1820PRINT "Crop LH ";s4x%;" cols in btmr (HH overlap)" 1830SYS "OS_SpriteOp",58+256,block%,"btmr",0,-s4x% 1840w4%-=s4x% 1850PROCdbg 1860w6%=w3%+w4% 1870h6%=h3% 1880REM Crop RH cols on btmr 1890IF w6%-s6x%>w5%-s5x% THEN 1900 REM crop RH w6%+s5x%-w5%-s6x% cols 1910 PRINT "Crop RH ";w6%+s5x%-w5%-s6x%;" cols in btmr (no match on topr)" 1920 SYS "OS_SpriteOp",58+256,block%,"btmr",w4%+w5%+s6x%-w6%-s5x%,w5%+s6x%-w6%-s5x% 1930 w4%-=w6%+s5x%-w5%-s6x% 1940 w6%-=w6%+s5x%-w5%-s6x% 1950 PROCdbg 1960ENDIF 1970REM Crop top s6y% rows on btml 1980PRINT "Crop top ";s6y%;" rows on btml (V overlap)" 1990SYS "OS_SpriteOp",57+256,block%,"btml",h6%-s6y%,-s6y% 2000h3%-=s6y% 2010PROCdbg 2020REM Crop top s6y% rows on btmr 2030PRINT "Crop top ";s6y%;" rows on btmr (V overlap)" 2040SYS "OS_SpriteOp",57+256,block%,"btmr",h6%-s6y%,-s6y% 2050h4%-=s6y% 2060h6%-=s6y% 2070PROCdbg 2080REM Crop RH cols on topr 2090IF w5%-s5x%>w6%-s6x% THEN 2100 REM crop RH w5%+s6x%-w6%-s5x% cols 2110 PRINT "Crop RH ";w5%+s6x%-w6%-s5x%;" cols on topr (no match on btmr)" 2120 SYS "OS_SpriteOp",58+256,block%,"topr",w2%+w6%+s5x%-w5%-s6x%,w6%+s5x%-w5%-s6x% 2130 w2%-=w5%+s6x%-w6%-s5x% 2140 w5%-=w5%+s6x%-w6%-s5x% 2150 PROCdbg 2160ENDIF 2170*time 2180PRINT "Marking joins" 2190FOR i%=1 TO 100 2200 SYS "OS_SpriteOp",42+256,block%,"topl",w1%-1,h1%-i%,1 2210 SYS "OS_SpriteOp",42+256,block%,"topl",i%-1,0,1 2220 SYS "OS_SpriteOp",42+256,block%,"topr",0,h2%-i%,1 2230 SYS "OS_SpriteOp",42+256,block%,"topr",w2%-i%,0,1 2240 SYS "OS_SpriteOp",42+256,block%,"btml",i%-1,h3%-1,1 2250 SYS "OS_SpriteOp",42+256,block%,"btml",w3%-1,i%-1,1 2260 SYS "OS_SpriteOp",42+256,block%,"btmr",w4%-i%,h4%-1,1 2270 SYS "OS_SpriteOp",42+256,block%,"btmr",0,i%-1,1 2280NEXT i% 2290ENDPROC 2300: 2310DEFPROCgetfile 2320!block%=149999997 2330block%!4=16 2340SYS "OS_SpriteOp",9+256,block%:REM Initialise 2350READ TR$,s1x%,s1y%,s2x%,s2y% 2360READ BL$,BR$,s3x%,s3y%,s4x%,s4y% 2370READ A$,A$,s5x%,s5y%,s6x%,s6y% 2380: 2390SYS "OS_SpriteOp",10+256,block%,TL$ 2400SYS "OS_SpriteOp",13+256,block%,name%,18,1 2410$fname%="topl"+CHR$0 2420SYS "OS_SpriteOp",26+256,block%,name%,fname% 2430: 2440SYS "OS_SpriteOp",11+256,block%,TR$ 2450SYS "OS_SpriteOp",13+256,block%,name%,18,2 2460$fname%="topr"+CHR$0 2470SYS "OS_SpriteOp",26+256,block%,name%,fname% 2480: 2490SYS "OS_SpriteOp",11+256,block%,BL$ 2500SYS "OS_SpriteOp",13+256,block%,name%,18,3 2510$fname%="btml"+CHR$0 2520SYS "OS_SpriteOp",26+256,block%,name%,fname% 2530: 2540SYS "OS_SpriteOp",11+256,block%,BR$ 2550SYS "OS_SpriteOp",13+256,block%,name%,18,4 2560$fname%="btmr"+CHR$0 2570SYS "OS_SpriteOp",26+256,block%,name%,fname% 2580: 2590SYS "OS_SpriteOp",40+256,block%,"topl" TO ,,,w1%,h1% 2600SYS "OS_SpriteOp",40+256,block%,"topr" TO ,,,w2%,h2% 2610SYS "OS_SpriteOp",40+256,block%,"btml" TO ,,,w3%,h3% 2620SYS "OS_SpriteOp",40+256,block%,"btmr" TO ,,,w4%,h4% 2630REM Treat -1 as a common point as max dimension: 2640IF s1x%=-1 THEN s1x%=w1% 2650IF s2x%=-1 THEN s2x%=w2% 2660IF s3x%=-1 THEN s3x%=w3% 2670IF s4x%=-1 THEN s4x%=w4% 2680IF s5x%=-1 THEN s5x%=w1%+w2% 2690IF s6x%=-1 THEN s6x%=w3%+w4% 2700IF s1y%=-1 THEN s1y%=h1% 2710IF s2y%=-1 THEN s2y%=h2% 2720IF s3y%=-1 THEN s3y%=h3% 2730IF s4y%=-1 THEN s4y%=h4% 2740IF s5y%=-1 THEN s5y%=h1% 2750IF s6y%=-1 THEN s6y%=h3% 2760ENDPROC 2770: 2780DEFPROCsave 2790: 2800REM save inter result 2810SYS "OS_SpriteOp",12+256,block%,"topli"+STR$(i%) 2820: 2830*time 2840PRINT "Joining topl,topr" 2850REM join topl,topr horizontally 2860SYS "OS_SpriteOp",35+256,block%,"topl","topr",0 2870*time 2880PRINT "Joining btml,btmr" 2890REM join btml,btmr horizontally 2900SYS "OS_SpriteOp",35+256,block%,"btml","btmr",0 2910*time 2920PRINT "Joining top,btm" 2930REM join topl,btml vertically 2940SYS "OS_SpriteOp",35+256,block%,"topl","btml",1 2950w7%=w5% 2960h7%=h5%+h6% 2970: 2980*time 2990PRINT "Saving result" 3000REM save result 3010SYS "OS_SpriteOp",12+256,block%,"topl"+STR$(i%) 3020ENDPROC |
Rik Griffin (98) 264 posts |
The Iyonix’s memory to CPU bandwidth is rather poor, to be honest. I think about 60MB/s, from memory (haha). A quick Google suggests that the Beagleboard xM is capable or something like 600MB/s. I think the xM has a substantial L2 cache, whereas the Iyonix has only L1. Actually the Iyonix’s memory can manage those sorts of speeds, just not to the CPU. If you use the application accelerator, or DMA, you can approach the theoretical max of 600MB/s, but it’s often not practical to utilise this. |
Terje Slettebø (285) 275 posts |
Yeah, it’s pretty bad. Here’s what I found in my informal test: Memory speed Data cache: 2/2 GB/s (read/write) |
Andrew Rawnsley (492) 1445 posts |
As others have explained, the CPU enhancements, clock speed benefit and RAM bandwidth should generally make the ARMini 175-225% faster (based on experience) than an Iyonix. That will drop if disc operations come into the mix, however, so it does depend on what you’re doing, but generally speaking you should notice a significant boost. |