问题
I've been writing a bootloader in assembly (a game). The bootloader uses the bios WAIT function (int 0x15, ah 0x86) for delays between frames. I'm debugging with BOCHS and everything works just splendid (timing is perfect). I also made a bootable iso with the isogenimage tool and tested my bootloader in virtualbox and everything works as expected there too. So I think it's safe to say that delays work in virtual environments. 
Now here's the peculiar part: when I write the bootloader to a usb drive and boot from it, the WAIT function waits FAR longer than on the virtual machines. I'd estimate it's 2-3 times longer. At least. Such long delays between frames are absolutely unacceptably for a game.
What in the world could cause this behaviour? Is it possible that my buggy bios clock runs at a slower speed (>18 microseconds)? But surely, that would violate some IBM standard? It's inexplicable to me.
By the way, if I remove the WAIT interrupt, the bootloader runs as expected (just too fast for my purposes), so it's not my actual code that's running slow. Plus, I'm running on a recent ~2013 laptop, so it can't be because of low performance.
Any insight, appreciated!
EDIT: I had the idea to test it on another physical computer and IT ALSO RAN EXTREMELY SLOW! So maybe it's not a "shitty bios" :) The odds of it running slow on two computers are pretty low (I think), so that might mean something...
CODE:
;MACRO CONSTANTS
%define p_width 10      ; player width
%define p_left_offset   20  ; space between left margin and player's left side
%define p_right_offset  290 ; space between right margin and player's right side; for linear player drawing purposes (xres - p_width - p_left_offset)
%define beak_width  5
%define xres    320     ; VGA resolution width
%define yres    200     ; VGA resolution height
%define buffer_addr 0x1000  ; start of offscrean buffer
[bits 16]
[org 0x7c00]
section .text
boot:
    jmp start
    times 3-($-$$) DB 0x90   ; Support 2 or 3 byte encoded JMPs before BPB.
        ; Dos 4.0 EBPB 1.44MB floppy
        OEMname:           db    "mkfs.fat"  ; mkfs.fat is what OEMname mkdosfs uses
        bytesPerSector:    dw    512
        sectPerCluster:    db    1
        reservedSectors:   dw    1
        numFAT:            db    2
        numRootDirEntries: dw    224
        numSectors:        dw    2880
        mediaType:         db    0xf0
    numFATsectors:     dw    9
        sectorsPerTrack:   dw    18
        numHeads:          dw    2
        numHiddenSectors:  dd    0
        numSectorsHuge:    dd    0
        driveNum:          db    0
        reserved:          db    0
        signature:         db    0x29
        volumeID:          dd    0x2d7e5a1a
        volumeLabel:       db    "NO NAME    "
        fileSysType:       db    "FAT12   "
start:
    mov ax, 0x13        ; VGA 16bit colors 320x200 mode
    int     0x10            ; call video update bios
    mov ax, 0xA000      ; Video memory startaddr
    mov es, ax          ; Real buffer video segment into segment register (for segment:offset format)
    mov ax, buffer_addr     ; temp storage of virtual buffer addr
    mov gs, ax          ; Virtual buffer segment stored in GS
    xor ax, ax
    mov ds, ax          ; zero offset to access vars defined in ;Data secition
   gameloop:
    call    clear_buffer        ; prepare to draw by clearing memory buffer
    call    draw_player     ; draw player to memory buffer
    call    switch_buffers      ; copy memory buffer to vram
    add     word [y_offset], 0x03   ; downward velocity, moves player around
   ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
   ; HERE IS THE WAIT INTERRUPT THAT CAUSES THE PROBLEM ON REAL HARDWARE
   ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
   ; WAIT (sleep) for a little bit
    mov ah, 0x86        ; specify for int 0x15 WAIT interupt
    mov cx, 0x0006      ; high word of wait time 
    mov dx, 0xffff      ; low word of wait time 
    int     0x15            ; waits for cx:dx 1,000,000ths of a second
    jmp     gameloop    
   ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    hlt
; Draws the player square on screen, with y offset of y_offset
draw_player:
    xor     si, si          ; make sure si is 0; this will be pixel offset accumulatore
    xor bx, bx          ; this will be row counter; paintnig will stop if bl == p_width
    mov ax, xres        ; used to get space above player
    mul     word [y_offset]     ; result will be in dx:ax (high, low bytes)
    add si, ax          ; add top offset to pixel accumulator
   accum_p_offset:  
    inc bl          ; keep track of rows drawn.
    add si, p_left_offset   ; now offset is at starting point to paint
    mov cx, si          ; cx looks into the future
    add cx, p_width     ; cx will be the goal pixel
   draw_p_pixel:            ; draws the player pixels
    inc si          ; go to next pixel (compensate for 0-start)
    mov [gs:si], byte 0x0E  ; move white pixel to virtual memory at offset ax
    cmp si, cx          ; check if ax caught up with cx
    jne draw_p_pixel        ; if not, draw next player pixel
    ret
clear_buffer:  
    mov cx, 32000       ; number of times loop is performaned. 320 * 200 / 2 (because moving words)
    xor     si, si          ; clear si to make sure acumulator starts at first pixel
   zero_buf:
    mov     [gs:si], word 0x0101    ; set background to blue
    add si, 2           ; increment accumulator
    loop    zero_buf        ; jumps to zero_buf if cx is not equal to 0, then decrements cx
    ret
; copies contents from memory buffer to vram
switch_buffers:
    push    ds          ; save old data-segment (needed for getting the address of databytes later on)
    mov si, gs          ; use si as temp for gs (virtual memory segment)
    mov ds, si          ; move into datasegment the addr of virtual memory segment for movsw instruction
    xor si, si          ; clear accumulators for string operation
    xor di, di
    mov     cx, 32000       ; how many words to copy 320 * 200 / 2
    cld             ; copy direction
    rep     movsw           ; repeat copy from ds:si to es:di until cx is 0 (copies buffer from memory to vram)
    pop     ds          ; restores data segment to reference databytes properly after switch_buffers
    ret
;DATA
y_offset:
    dw  0x16
; FILLER
times 510 - ($-$$) db 0
dw 0xaa55
来源:https://stackoverflow.com/questions/45765480/interrupt-0x15-function-0x86-bios-wait-runs-far-slower-on-real-hardware-than-o