为什么C比汇编这么慢?

计算受标量积的启发。 C版本:

#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <stdio.h>
#include <time.h>


int main() {
    clock_t t0,t1;
    int64_t n = 10000000; // 10 million
    int64_t *m = (int64_t*) malloc ((20*n) * sizeof(int64_t));
    for (int64_t i = 0; i < n; i++) {
        m[i] = rand() % 1000;
    }
    int64_t sum = 0;
    int64_t k = 0;
    t0 = clock();
    for (int64_t i = 0; i < n; i++) {
        sum += m[k]*m[k+1] + m[k+2]*m[k+3] + m[k+4]*m[k+5] + m[k+6]*m[k+7] + m[k+8]*m[k+9] + m[k+10]*m[k+11] + m[k+12]*m[k+13] + m[k+14]*m[k+15] + m[k+16]*m[k+17] + m[k+18]*m[k+19];
        k += 20;
    }
    t1 = clock();
    printf("%" PRId64 "\n",sum);
    float diff = ((float) (t1 - t0) / 1000000.0F ) * 1000;
    printf("%f",(float) diff);
}

常用版本:

主文件:

format PE64 console
entry prog

include "win64ax.inc"


section '.idata' import data readable writeable

library kernel32,'kernel32.dll',msvcrt,'msvcrt.dll'

import kernel32,ExitProcess,'ExitProcess',GetTickCount,'GetTickCount'
import msvcrt,printf,'printf'


section '.text' code readable executable

macro now
{
    cinvoke GetTickCount
    cinvoke printf,<"%lld",13,10>,rax
}

prog:

mov r14,temp
mov r15,10000000 ; 10 million
now
lbl:
dec r15

include "temp_code.asm"
add r14,80

cmp r15,0
jne lbl
now

end_prog:
    invoke ExitProcess,0


section '.data' data readable writeable
    align 8
    temp dq 100000200 dup(0)

temp_code.asm:

mov rbx,0
mov rax,[r14 + 0 * 8]
mov rcx,[r14 + 1 * 8]
imul rax,rcx
add rbx,rax
mov rax,[r14 + 2 * 8]
mov rcx,[r14 + 3 * 8]
imul rax,[r14 + 4 * 8]
mov rcx,[r14 + 5 * 8]
imul rax,[r14 + 6 * 8]
mov rcx,[r14 + 7 * 8]
imul rax,[r14 + 8 * 8]
mov rcx,[r14 + 9 * 8]
imul rax,[r14 + 10 * 8]
mov rcx,[r14 + 11 * 8]
imul rax,[r14 + 12 * 8]
mov rcx,[r14 + 13 * 8]
imul rax,[r14 + 14 * 8]
mov rcx,[r14 + 15 * 8]
imul rax,[r14 + 16 * 8]
mov rcx,[r14 + 17 * 8]
imul rax,[r14 + 18 * 8]
mov rcx,[r14 + 19 * 8]
imul rax,rbx

fasm的最佳时间是93毫秒,带有编译选项“ gcc.exe -std = c99 -g 1.c -O3 -o 1.exe”的c的最佳时间是710毫秒。这慢了7.63倍。 我听说过数百次手写程序集无法击败c编译器,而现在……这是超级跑车的速度对赛跑者的速度。您的解释?

ccc287718265 回答:为什么C比汇编这么慢?

暂时没有好的解决方案,如果你有好的解决方案,请发邮件至:iooj@foxmail.com
本文链接:https://www.f2er.com/3157027.html

大家都在问