Audio manipulation and delete some part of the audio

牧云@^-^@ 提交于 2019-12-11 02:09:24

问题


I'm new in voice codding, now I am succeed to recording microphone in the files and save each 10 seconds in a file with SaveRecordtoFile function(doing this with no problem)

Now I want to delete for example 2 seconds from the recorded data so my output will be 8 seconds instead of 10, in the randomTime array 0 is the number of seconds witch I want to be delete...

In a for-loop I copy the data of waveHeader->lpData in a new buffer if (randomTime[i] == '1')

10 seconds:

delete the data of second number of 3 and 7 as randomTime[10] = "110111011"

It seems this is a true algorithm and should works but the problem is the outputs, some of the outputs are good (about 70% or more) but some of them are corrupted

The output should be like this:

but some of the outputs (about 30% or less) corrupted and are like this:

I think I have a mistake in the code but I debug this code for some days and I don't understand what is the problem?

This is my code:

#include <Windows.h>
#include <atlstr.h>
#pragma comment(lib,"Winmm.lib")

int SetWaveFormat(WAVEFORMATEX* wf,int wFormatTag,int nChannels,int nSamplesPerSec, int nBlockAlign, int wBitsPerSample, int cbSize)
{
    int res;
    wf->wFormatTag = wFormatTag;
    wf->nChannels = nChannels;
    wf->nSamplesPerSec = nSamplesPerSec;
    wf->nBlockAlign = nBlockAlign;
    wf->wBitsPerSample = wBitsPerSample;
    wf->cbSize = cbSize;
    wf->nAvgBytesPerSec = nChannels * nSamplesPerSec * wBitsPerSample / 8;
    return 0;
}

int OpenWaveIn(HWAVEIN* hWaveIn, WAVEFORMATEX* wf)
{
    int res;
    char lpTemp[256];

    res = waveInGetNumDevs();
    if (! res )
    {
        //_debug_print("Access WaveIn channel FAILED!",1);
        return -1;
    }
    else
    {
        //_debug_print("Access WaveIn channel SUCCEED!");
    }

    // Open wave input channel
    res = waveInOpen(hWaveIn,WAVE_MAPPER, wf, (DWORD)NULL,0L,CALLBACK_WINDOW); 
    if ( res != MMSYSERR_NOERROR )
    {
        sprintf(lpTemp, "Open wave input channel FAILED, rror_Code = 0x%x", res );
        return -1;
    }
    else
    {
        //_debug_print("Open wave input channel SUCCEED!");
    }
    return 0;
}

// Prepare Wave In Header and allocate memory
int PrepareWaveIn(HWAVEIN* hWaveIn, WAVEHDR* waveHeader, DWORD dataSize)
{
    int res;
    char lpTemp[256];

    waveHeader->dwBufferLength = dataSize;
    waveHeader->dwBytesRecorded = 0;
    waveHeader->dwUser = 0;
    waveHeader->dwFlags = 0;
    waveHeader->dwLoops = 0;
    waveHeader->lpData = (char *)GlobalLock( GlobalAlloc(GMEM_MOVEABLE|GMEM_SHARE, dataSize));
    memset(waveHeader->lpData, 0, dataSize );

    // Prepare Header
    res = waveInPrepareHeader(*hWaveIn, waveHeader, sizeof(WAVEHDR) ); 
    if ( res != MMSYSERR_NOERROR)
    {
        sprintf(lpTemp, "Cannot prepare wave in header, ror_Code = 0x%03X", res );
        //_debug_print(lpTemp,1);
        return -1;
    }
    else
    {
        //_debug_print("Prepare wave in header SUCCEED!");
    }

    res = waveInAddBuffer( *hWaveIn, waveHeader, sizeof(WAVEHDR) );
    if ( res != MMSYSERR_NOERROR) 
    {
        sprintf(lpTemp, "Cannot add buffer for wave in, ror_Code = 0x%03X", res );
        //_debug_print(lpTemp,1);
        return -1;
    }
    else
    {
        //_debug_print("Add buffer for wave in SUCCEED!");
    }
    return 0;
}

// Start recording speech
int StartRecord(HWAVEIN* hWaveIn)
{
    int res;

    res = waveInStart(*hWaveIn);
    if(res != MMSYSERR_NOERROR)
    {
        //_debug_print("Start recording FAILED!",1);
        return -1;
    }
    else
    {
        //_debug_print("Start recording...",1);
    }
    return 0;
}

// Stop recording speech
int StopRecord(HWAVEIN* hWaveIn, MMTIME* mmTime)
{
    int res;

    res = waveInGetPosition(*hWaveIn, mmTime, sizeof(MMTIME));
    if(res != MMSYSERR_NOERROR)
    {
        //_debug_print("Get Position of wave in FAILED!",1);
        return -1;
    }
    else
    {
        //_debug_print("Get Position of wave in SUCCEED!");
    }

    res = waveInStop(*hWaveIn);
    if(res != MMSYSERR_NOERROR)
    {
        //_debug_print("Stop recording FAILED!",1);
        return -1;
    }
    else
    {
        //_debug_print("Stop recording SUCCEED!");
    }

    res = waveInReset(*hWaveIn);
    if(res != MMSYSERR_NOERROR)
    {
        //_debug_print("Reset wave in memory FAILED!",1);
        return -1;
    }
    else
    {
        //_debug_print("Reset wave in memory SUCCEED!");
    }

    return 0;
}

// str2num
DWORD FCC(LPSTR lpStr)
{
    DWORD Number = lpStr[0] + lpStr[1] *0x100 + lpStr[2] *0x10000 + lpStr[3] *0x1000000 ;
    return Number;
}

// Save recorded speech to file
int SaveRecordtoFile(const char* fileName, WAVEFORMATEX* wf, HWAVEIN* hWaveIn, WAVEHDR* waveHeader, MMTIME* mmTime)
{
    int res;
    DWORD NumToWrite=0;
    DWORD dwNumber = 0;
    DWORD dwSamplePerSec = 0;
    LPBYTE NewBuff;
    DWORD dwNewBuffLen = 0;

    //-----------------------------------------------------------------------------------
    // delete for example 2 seconds from the recorded data
    dwSamplePerSec = waveHeader->dwBytesRecorded / 10; // 10 Secs
    dwNewBuffLen = dwSamplePerSec * 8;
    NewBuff = (LPBYTE) HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, dwNewBuffLen);
    memset(NewBuff, NULL, dwNewBuffLen);
    CHAR randomTime[10] = "110111011";
    int m = 0;
    for (int i=0; i<10; i++)
    {
        if (randomTime[i] == '1')
        {
            memcpy_s(NewBuff + (m*dwSamplePerSec), dwNewBuffLen, waveHeader->lpData + (i*dwSamplePerSec), dwSamplePerSec);
            m++;
        }
    }
    //-----------------------------------------------------------------------------------

    /*waveHeader->dwBytesRecorded = mmTime->u.cb;*/

    HANDLE FileHandle = CreateFile(CString(fileName), GENERIC_WRITE, FILE_SHARE_READ, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);

    dwNumber = FCC("RIFF");
    WriteFile(FileHandle, &dwNumber, 4, &NumToWrite, NULL);

    dwNumber = /*waveHeader->dwBytesRecorded*/dwNewBuffLen + 12 + sizeof(WAVEFORMATEX) + 18 + 8;
    WriteFile(FileHandle, &dwNumber, 4, &NumToWrite, NULL);

    dwNumber = FCC("WAVE");
    WriteFile(FileHandle, &dwNumber, 4, &NumToWrite, NULL);

    dwNumber = FCC("fmt ");
    WriteFile(FileHandle, &dwNumber, 4, &NumToWrite, NULL);

    dwNumber = sizeof(WAVEFORMATEX);
    WriteFile(FileHandle, &dwNumber, 4, &NumToWrite, NULL);

    WriteFile(FileHandle, wf, sizeof(WAVEFORMATEX), &NumToWrite, NULL);

    dwNumber = FCC("data");
    WriteFile(FileHandle, &dwNumber, 4, &NumToWrite, NULL);

    dwNumber = /*waveHeader->dwBytesRecorded*/dwNewBuffLen;
    WriteFile(FileHandle, &dwNumber, 4, &NumToWrite, NULL);

    WriteFile(FileHandle, NewBuff, dwNewBuffLen, &NumToWrite, NULL);
    HeapFree(GetProcessHeap(), 0, NewBuff);
    NewBuff = NULL;
    SetEndOfFile( FileHandle );
    CloseHandle( FileHandle );
    FileHandle = INVALID_HANDLE_VALUE;

    return 0;
}

// Release wave in memory
int ReleaseWaveIn(HWAVEIN* hWaveIn, WAVEHDR* waveHeader)
{
    int res;

    res = waveInUnprepareHeader(*hWaveIn, waveHeader, sizeof(WAVEHDR));
    if ( res != MMSYSERR_NOERROR ) 
    {
        //_debug_print("UnPrepare Wave In Header FAILED!",1);
        return -1;
    }
    else
    {
        //_debug_print("UnPrepare Wave In Header SUCCEED!");
    }

    res = (int)GlobalFree(GlobalHandle( waveHeader->lpData ));
    if ( res != MMSYSERR_NOERROR )
    {
        //_debug_print("Global Free FAILED!",1);
        return -1;
    }
    else
    {
        //_debug_print("Global Free SUCCEED!");
    }

    return 0;
}

// Close Wave in channel
int CloseWaveIn(HWAVEIN* hWaveIn)
{
    int res;

    res = waveInClose(*hWaveIn);
    if(res != MMSYSERR_NOERROR)
    {
        //_debug_print("Close wave in FAILED!",1);
    }
    else
    {
        //_debug_print("Close wave in SUCCEED!");
    }
    return 0;
}

int main()
{
    int numOfFiles = 0;
    // Set wave format when sampling the audio
    WAVEFORMATEX wf;
    SetWaveFormat(&wf,1,2,48000,4,16,18);

    // Open wave input channel
    HWAVEIN hWaveIn;
    OpenWaveIn(&hWaveIn,&wf);


    while (TRUE)
    {   
        // Prepare Wave In Header and allocate memory
        WAVEHDR waveHdr;
        DWORD dataSize = 192000000;

        PrepareWaveIn(&hWaveIn, &waveHdr, dataSize);
        // Start recording
        StartRecord(&hWaveIn);

        Sleep(10000);

        // Stop recording
        MMTIME mmt;
        StopRecord(&hWaveIn, &mmt);

        CHAR FileName[MAX_PATH] = {};
        wsprintfA(FileName, "Record-%d.wav", numOfFiles);
        SaveRecordtoFile(FileName, &wf, &hWaveIn, &waveHdr, &mmt);
        numOfFiles++;
        ReleaseWaveIn(&hWaveIn, &waveHdr);
    }
    CloseWaveIn(&hWaveIn);
    return 0;
}

Some where I read I should copy samples not bytes, but as I'm not professional in this, I don't know how to work with samples, I changed my code in some way but the result was not good

1:

//-----------------------------------------------------------------------------------
// delete for example 2 seconds from the recorded data
dwSamplePerSec = waveHeader->dwBytesRecorded / 10; // 10 Secs
dwNewBuffLen = dwSamplePerSec * 8;
NewBuff = (LPBYTE) HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, dwNewBuffLen);
memset(NewBuff, NULL, dwNewBuffLen);
CHAR randomTime[10] = "110111011";
int m = 0;

BYTE* pData = (BYTE*)(waveHeader->lpData);
for (int i=0; i<10; i++)
{
    if (randomTime[i] == '1')
    {
        for (DWORD index = 0; index < dwSamplePerSec; index++)
        {
            short left = *(short*)pData; pData+=2;
            short right = *(short*)pData; pData+=2; 
            //memcpy_s(NewBuff + (m*dwSamplePerSec), dwNewBuffLen, (SHORT *)waveHeader->lpData + (i*dwSamplePerSec), dwSamplePerSec);       
        }
        m++;
    }       
}
//-----------------------------------------------------------------------------------

2:

int x = 0;
BYTE* pData = (BYTE*)(waveHeader->lpData);  
for (int i=0; i<10; i++)
{
    if (randomTime[i] == '1')
    {
        for (DWORD index = 0; index < dwSamplePerSec; index++)
        {
            //short int* pSamples = (short int *) (pData);
            short right = *(short*)pData; //pData+=2; 
            //memcpy_s(NewBuff + (m*dwSamplePerSec), dwNewBuffLen, (SHORT *)waveHeader->lpData + (i*dwSamplePerSec), dwSamplePerSec);   
            float pDataSample = (*pData - 32768) / 32768.0f;

            if (fabs(pDataSample) > 0.25f)
                NewBuff[x] = /*pSamples;// **/ right;
                //pData++;
            else
                NewBuff[x] = *pData;

            //NewBuff[x] = pDataSample;// + right;
            x++;
            pData++;
        }
        m++;
    } 
    else
    {
        pData = pData + (/*4 * */dwSamplePerSec);
    }
}

And as my 70% or more of outputs are good I think It's not because of bytes or samples


回答1:


Your code can break a sample apart, after that the stream is out of sync and you hear a loud noise.

How it happens? Your sample size is 4 bytes. So you must never copy anything that is not a multiple of 4. 10 seconds of audio will take 10x48000×4=1920000 bytes. However Sleep(10000) will always be near 10 seconds but not exactly 10 seconds. So you can get 1920012 bytes. Then you do:

dwSamplePerSec = waveHeader->dwBytesRecorded / 10; // 10 Secs

that returns 192001 (which is not multiple of 4) and the steam gets out of sync. If you're lucky you receive 1920040 bytes for 10 second and that remains multiple of 4 after division on 10 and you're ok.



来源:https://stackoverflow.com/questions/56503611/audio-manipulation-and-delete-some-part-of-the-audio

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!