Most code and speed efficient bit reversal: 
; 4 cycles 5 bytes 2 ports
; (and most hardware wasting)

ffbr:
MOV P1,A ; 2 bytes 1 cycle
MOV A,P0 ; 2 bytes 1 cycle
RET      ; 1 byte 2 cycles but makes more sense to use a macro

; wire P0.0 to P1.7 and so on...
