[PD-cvs] externals/gridflow/cpu mmx.rb,NONE,1.1
Mathieu Bouchard
matju at users.sourceforge.net
Tue Oct 4 04:02:16 CEST 2005
- Previous message: [PD-cvs] externals/gridflow/base bitpacking.c, NONE, 1.1 flow_objects.c, NONE, 1.1 flow_objects.rb, NONE, 1.1 flow_objects_for_image.c, NONE, 1.1 flow_objects_for_matrix.c, NONE, 1.1 grid.c, NONE, 1.1 grid.h, NONE, 1.1 main.c, NONE, 1.1 main.rb, NONE, 1.1 number.c, NONE, 1.1 source_filter.rb, NONE, 1.1 test.rb, NONE, 1.1
- Next message: [PD-cvs] externals/gridflow/bridge placebo.rb, NONE, 1.1 puredata.c, NONE, 1.1 puredata.rb, NONE, 1.1
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/pure-data/externals/gridflow/cpu
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv21117/cpu
Added Files:
mmx.rb
Log Message:
starting to commit gridflow 0.8.0 ...
if you know how to use "cvs import" please mail me and i'll use it for 0.8.1
--- NEW FILE: mmx.rb ---
=begin
$Id: mmx.rb,v 1.1 2005/10/04 02:02:14 matju Exp $
GridFlow
Copyright (c) 2001,2002,2003,2004 by Mathieu Bouchard
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
See file ../COPYING for further informations on licensing terms.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
=end
STDOUT.reopen ARGV[0], "w"
$loader = File.open ARGV[1], "w"
$count = 0
$lines = 0
puts "; generated by/for GridFlow 0.8.0"
$loader.puts "#include \"../base/grid.h.fcs\"\nextern \"C\" {"
# this class is not really used yet (only self.make)
class AsmFunction
def initialize(name)
@name = name
@label_count = 1
end
def self.make(name)
puts "", "GLOBAL #{name}", "#{name}:"
puts "push ebp", "mov ebp,esp", "push esi", "push edi"
yield AsmFunction.new(name)
puts "pop edi", "pop esi", "leave", "ret", ""
end
def make_until(*ops)
a = @label_count
b = @label_count+1
@label_count+=2
ops[-1]<<" #{@name}_#{b}"
puts "#{@name}_#{a}: ", *ops
yield
puts "jmp #{@name}_#{a}"
puts "#{@name}_#{b}:"
end
end
$sizeof = {
:uint8 => 1,
:int16 => 2,
:int32 => 4,
:int64 => 8,
:float32 => 4,
:float64 => 8,
}
$accum = {
:uint8 => "al",
:int16 => "ax",
:int32 => "eax",
}
$asm_type = {
:uint8 => "byte",
:int16 => "word",
:int32 => "dword",
:int64 => "qword",
}
# in the following, the opcode "_" means no such thing seems available.
# also >> for x86 ought to be shr in the uint8 case.
# btw, i got all of the MMX information from the NASM manual, Appendix B.
$opcodes = {
# [--GF--|--x86--|--mmx-et-al----------------------------------------]
# [ | |-uint8-|-int16-|-int32-|-int64-|-float32-|-float64-]
:add => %w[ + add paddb paddw paddd paddq ],
:sub => %w[ - sub psubb psubw psubd psubq ],
:and => %w[ & and pand pand pand pand ],
:xor => %w[ ^ xor pxor pxor pxor pxor ],
:or => %w[ | or por por por por ],
# :max => %w[ max _ pmaxub pmaxsw _ _ ], # not plain MMX !!! (req.Katmai)
# :min => %w[ min _ pminub pminsw _ _ ], # not plain MMX !!! (req.Katmai)
# :eq => %w[ == _ pcmpeqb pcmpeqw pcmpeqd _ ],
# :gt => %w[ > _ pcmpgtb pcmpgtw pcmpgtd _ ],
# :shl => %w[ << shl _ psllw pslld psllq ], # noncommutative
# :shr => %w[ >> sar _ psraw psrad _ ], # noncommutative
# :clipadd => %w[ clip+ _ paddusb paddsw _ _ ], # future use
# :clipsub => %w[ clip- _ psubusb psubsw _ _ ], # future use
# :andnot => %w[ ¬ _ pandn pandn pandn pandn ], # not planned
}
$opcodes.each {|k,op|
op.map! {|x| if x=="_" then nil else x end }
STDERR.puts op.inspect
}
$decls = ""
$install = ""
def make_fun_map(op,type)
s="mmx_#{type}_map_#{op}"
size = $sizeof[type]
accum = $accum[type]
sym = $opcodes[op][0]
opcode = $opcodes[op][1]
mopcode = $opcodes[op][size+(size<4 ? 1 : 0)]
return if not mopcode
AsmFunction.make(s) {|a|
puts "mov ecx,[ebp+8]", "mov esi,[ebp+12]", "mov eax,[ebp+16]"
puts "mov dx,ax", "shl eax,8", "mov al,dl" if size==1
puts "mov edx,eax", "shl eax,16", "mov ax,dx" if size<=2
puts "push eax", "push eax", "movq mm7,[esp]", "add esp,8"
foo = proc {|n|
a.make_until("cmp ecx,#{8/size*n}","jb near") {
0.step(n,4) {|k|
nn=[n-k,4].min
o=(0..3).map{|x| 8*(x+k) }
for i in 0...nn do puts "movq mm#{i},[esi+#{o[i]}]" end
for i in 0...nn do puts "#{mopcode} mm#{i},mm7" end
for i in 0...nn do puts "movq [esi+#{o[i]}],mm#{i}" end
}
puts "lea esi,[esi+#{8*n}]", "lea ecx,[ecx-#{8/size*n}]"
}
}
foo.call 4
foo.call 1
a.make_until("test ecx,ecx", "jz") {
puts "#{opcode} #{$asm_type[type]} [esi],#{accum}", "lea esi,[esi+#{size}]"
puts "dec ecx"
}
puts "emms"
}
$decls << "void #{s}(int,#{type}*,#{type});\n"
$install << "FIX2PTR(Numop,rb_hash_aref(op_dict,SYM(#{sym})))"
$install << "->on_#{type}.op_map = #{s};\n"
$count += 1
end
def make_fun_zip(op,type)
s="mmx_#{type}_zip_#{op}"
size = $sizeof[type]
accum = $accum[type]
sym = $opcodes[op][0]
opcode = $opcodes[op][1]
mopcode = $opcodes[op][size+(size<4 ? 1 : 0)]
return if not mopcode
AsmFunction.make(s) {|a|
puts "mov ecx,[ebp+8]", "mov edi,[ebp+12]",
"mov esi,[ebp+16]"#, "mov ebx,[ebp+20]"
foo = proc {|n|
a.make_until("cmp ecx,#{8/size*n}","jb near") {
0.step(n,4) {|k|
nn=[n-k,4].min
o=(0..3).map{|x| 8*(x+k) }
for i in 0...nn do puts "movq mm#{i},[edi+#{o[i]}]" end
for i in 0...nn do puts "movq mm#{i+4},[esi+#{o[i]}]" end
for i in 0...nn do puts "#{mopcode} mm#{i},mm#{i+4}" end
for i in 0...nn do puts "movq [edi+#{o[i]}],mm#{i}" end
}
#for i in 0...n do puts "movq [ebx+#{8*i}],mm#{i}" end
puts "lea edi,[edi+#{8*n}]"
puts "lea esi,[esi+#{8*n}]"
#puts "lea ebx,[ebx+#{8*n}]"
puts "lea ecx,[ecx-#{8/size*n}]"
}
}
foo.call 4
foo.call 1
a.make_until("test ecx,ecx", "jz") {
# requires commutativity ??? fails with shl, shr
puts "mov #{accum},[esi]"
puts "#{opcode} #{$asm_type[type]} [edi],#{accum}"
#puts "mov #{accum},[edi]"
#puts "#{opcode} #{accum},[esi]"
#puts "mov [ebx],#{accum}"
puts "lea edi,[edi+#{size}]"
puts "lea esi,[esi+#{size}]"
#puts "lea ebx,[ebx+#{size}]"
puts "dec ecx"
}
puts "emms"
}
#$decls << "void #{s}(int,#{type}*,#{type}*,#{type}*);\n"
$decls << "void #{s}(int,#{type}*,#{type}*);\n"
$install << "FIX2PTR(Numop,rb_hash_aref(op_dict,SYM(#{sym})))"
$install << "->on_#{type}.op_zip = #{s};\n"
$count += 1
end
for op in $opcodes.keys do
for type in [:uint8, :int16#, :int32
] do
make_fun_map(op,type)
make_fun_zip(op,type)
end
end
$loader.puts $decls
$loader.puts %`
}; /* extern */
#include <stdlib.h>
void startup_mmx_loader () {/*bogus*/}
void startup_mmx () {
if (getenv("NO_MMX")) return;
if (EVAL(\"GridFlow.bridge_name\")!=Qnil) gfpost(\"startup_cpu: using MMX optimisations\");
#{$install}
}`
STDERR.puts "automatically generated #{$count} MMX asm functions"
=begin notes:
CPUID has a bit for detecting MMX
PACKSSDW PACKSSWB PACKUSWB = saturation-casting
PCMPxx: Compare Packed Integers
PMULHW, PMULLW: Multiply Packed _unsigned_ 16-bit Integers, and Store
PUNPCKxxx: Unpack and Interleave Data
=end
- Previous message: [PD-cvs] externals/gridflow/base bitpacking.c, NONE, 1.1 flow_objects.c, NONE, 1.1 flow_objects.rb, NONE, 1.1 flow_objects_for_image.c, NONE, 1.1 flow_objects_for_matrix.c, NONE, 1.1 grid.c, NONE, 1.1 grid.h, NONE, 1.1 main.c, NONE, 1.1 main.rb, NONE, 1.1 number.c, NONE, 1.1 source_filter.rb, NONE, 1.1 test.rb, NONE, 1.1
- Next message: [PD-cvs] externals/gridflow/bridge placebo.rb, NONE, 1.1 puredata.c, NONE, 1.1 puredata.rb, NONE, 1.1
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the Pd-cvs
mailing list