我有一个用 C 语言编写的应用程序,带有 Xilinx Microblaze 内核的内联汇编。我的内联汇编有一个延迟任务。函数“_delay_loop_X_x”在处理器的每个循环中恰好延迟 4 个周期。输入信号确定要进行的循环数。函数“_NOPx”是为了达到更高的精度。该功能工作正常,但在信号结束时,它会提供两倍的额外延迟。恐怕我错误地使用了寄存器。有人可以检查我的汇编代码吗?
对于 Microblaze,我使用以下文档:https ://www.xilinx.com/support/documentation/sw_manuals/mb_ref_guide.pdf
汇编代码:
static __inline__ void _delay_loop_1_x( uint8_t) __attribute__((always_inline));
static __inline__ void _NOP1 (void) {__asm__ volatile ("nop \n\t" ); } //1 cycle
static __inline__ void _NOP2 (void) {__asm__ volatile ("beqi r12, 1f \n\t""1:\n\t" ::: "r12", "cc" ); } //2 cycle
static __inline__ void _NOP3 (void) {__asm__ volatile ("brk r12, r0 \n\t" ::: "r12", "cc" ); } //3 cycle
static __inline__ void /* exactly 4 cycles */
_delay_loop_1_x( uint8_t __n )
{ /* cycles per loop */
__asm__ volatile (
" addik r11, r0, 1 \n\t" /* 1 */
"1: rsub %[input], r11, %[input] \n\t" /* 1 */
" beqi %[input], 2f \n\t" /* 1 */
"2: bnei %[input], 1b \n\t" /* 1 */
: /* ----- */
: [input]"r" (__n) /* ----- */
: "r11", "cc" /* 4 */
);
}
static __inline__ void /* exactly 4 cycles/loop */
_delay_loop_2_x( uint16_t __n )
{ /* cycles per loop */
__asm__ volatile ( /* __n..one */
" addik r11, r0, 1 \n\t" /* 1 */
"1: rsub %[loops], r11, %[loops] \n\t" /* 1 */
" beqi %[loops], 2f \n\t" /* 1 */
"2: bnei %[loops], 1b \n\t" /* 1 */
: /* ----- */
: [loops]"r" (__n) /* ----- */
: "r11", "cc" /* 4 */
);
}
static __inline__ void
_delay_cycles(const double __ticks_d)
{
uint32_t __ticks = (uint32_t)(__ticks_d);
uint32_t __padding;
uint32_t __loops;
if( __ticks <= 3 ) {
__padding = __ticks;
} else if( __ticks <= 0x400 ) {
__ticks -= 1;
__loops = __ticks / 4;
__padding = __ticks % 4;
if( __loops != 0 )
_delay_loop_1_x( (uint8_t)__loops );
} else if( __ticks <= 0x40001 ) {
__ticks -= 2;
__loops = __ticks / 4;
__padding = __ticks % 4;
if( __loops != 0 )
_delay_loop_2_x( (uint16_t)__loops );
}
if( __padding == 1 ) _NOP1();
if( __padding == 2 ) _NOP2();
if( __padding == 3 ) _NOP3();
}
C代码:
#define _delay_ns(__ns) _delay_cycles( (double)(F_CPU)*((double)__ns)/1.0e9 + 0.5 )
#define _delay_us(__us) _delay_cycles( (double)(F_CPU)*((double)__us)/1.0e6 + 0.5 )
#define _delay_ms(__ms) _delay_cycles( (double)(F_CPU)*((double)__ms)/1.0e3 + 0.5 )
#define BIT_DELAY_1 _delay_ns(2070)
#define BIT_DELAY_5 _delay_us(19)
#define BIT_DELAY_7 _delay_us(26)
#define RX_TX_DELAY _delay_us(78)
#define SHA204_SWI_FLAG_TX ((uint8_t) 0x88)
XGpio GpioPIN;
uint8_t swi_send_bytes(uint8_t count, uint8_t *buffer);
uint8_t swi_send_byte(uint8_t value);
int main()
{
init_platform();
XGpio_Initialize(&GpioPIN, GPIO_PIN_DEVICE_ID);
XGpio_SetDataDirection(&GpioPIN, PIN_CHANNEL, ~PIN);
(void) swi_send_byte(SHA204_SWI_FLAG_TX);
cleanup_platform();
return 0;
}
uint8_t swi_send_byte(uint8_t value)
{
return swi_send_bytes(1, &value);
}
uint8_t swi_send_bytes(uint8_t count, uint8_t *buffer)
{
uint8_t i, bit_mask;
RX_TX_DELAY;
for (i = 0; i < count; i++) {
for (bit_mask = 1; bit_mask > 0; bit_mask <<= 1) {
if (bit_mask & buffer[i]) {
XGpio_DiscreteClear(&GpioPIN, PIN_CHANNEL, PIN);
BIT_DELAY_1;
XGpio_DiscreteWrite(&GpioPIN, PIN_CHANNEL, PIN);
BIT_DELAY_7;
}
else {
XGpio_DiscreteClear(&GpioPIN, PIN_CHANNEL, PIN);
BIT_DELAY_1;
XGpio_DiscreteWrite(&GpioPIN, PIN_CHANNEL, PIN);
BIT_DELAY_1;
XGpio_DiscreteClear(&GpioPIN, PIN_CHANNEL, PIN);
BIT_DELAY_1;
XGpio_DiscreteWrite(&GpioPIN, PIN_CHANNEL, PIN);
BIT_DELAY_5;
}
}
}
return 0;
}