#include "printf.h"
#include "audio.h"
#include "emit.h"


//emit efficient code to load a sample in the proper format, convert it to our 8.24 format, and adjust the source pointer by the number of bytes consumed
//produced code will output result in dstReg, which must be a loReg, no other regs will be clobbered, flags could be clobbered, stack might be used
//produced code assumes base for source data is in r1, and will be updated
//will not work if dstReg is r0..r1 and sample type is float and VFP is not available
static enum EmitStatus audioPrvEmitLoadSample(struct EmitBuf *dest, uint32_t dstRegL /* must be loreg*/, uint32_t dstRegR, enum AudioSampleType typ, enum AudioChannelConfig chCfg)
{
	uint32_t dstRegT, i, nCh, srcReg = 1;
	bool stereo;
	
	switch (chCfg) {
		case AudioMono:
			stereo = false;
			nCh = 1;
			
			if (dstRegL >= 8) {
				loge("%s: left reg must be loreg and %u >= 8\n", __func__, dstRegL);
				return EmitErrNotEncodeable;
			}
			break;
		
		case AudioStereo:
			stereo = true;
			nCh = 2;
			
			if (dstRegL >= dstRegR) {
				loge("%s: left reg must be smaller than right and %u >= %u\n", __func__, dstRegL, dstRegR);
				return EmitErrNotEncodeable;
			}
			if (dstRegR >= 8) {
				loge("%s: left reg must be loreg and %u >= 8\n", __func__, dstRegL);
				return EmitErrNotEncodeable;
			}
			break;
		
		default:
			return EmitErrNotEncodeable;
	}
	
	(void)nCh;	//shut GCC up
	
	//first: load, second: shift and adjust pointers
	//try to keep loads in order so that they can be pipelined and thus we save a cycle
	switch (typ) {
		case AudioSampleU8:
		
			if (stereo) {
				
				//load R before L so that at least one instr can be 16 bits
				
				//LDRB dstRegR, [src, #1]
				EMIT(LLloadImm, dstRegR, srcReg, 1, EmitSzByte, false, EmitAdrModeIndex);
				
				//LDRB dstRegL, [src], #2
				EMIT(LLloadImm, dstRegL, srcReg, 2, EmitSzByte, false, EmitAdrModePostindex);
				
				//dstRegR -= 0x80;
				EMIT(LLsubImm, dstRegR, dstRegR, 0x80, EmitFlagsDoNotCare, false);
			}
			else {
				//LDRB dstRegL, [src], #1
				EMIT(LLloadImm, dstRegL, srcReg, 1, EmitSzByte, false, EmitAdrModePostindex);
			}
			//dstRegL -= 0x80;
			EMIT(LLsubImm, dstRegL, dstRegL, 0x80, EmitFlagsDoNotCare, false);
			break;
		
		case AudioSampleS8:
		
			//LDRSB dstRegL, [src], #1
			EMIT(LLloadImm, dstRegL, srcReg, 1, EmitSzByte, true, EmitAdrModePostindex);
			
			if (stereo) {
				//LDRSB dstRegR, [src], #1
				EMIT(LLloadImm, dstRegL, srcReg, 1, EmitSzByte, true, EmitAdrModePostindex);
			}
			break;
		
		case AudioSampleU16LE:
		case AudioSampleS16LE:
		case AudioSampleU16BE:
		case AudioSampleS16BE:
		
			if (stereo) {
				//LDMIA src!, {dstRegL}
				EMIT(LLldmia, srcReg, 1 << dstRegL, true);
				
				if (typ == AudioSampleU16BE || typ == AudioSampleS16BE) {
					
					//REV16 dstRegL, dstRegL
					EMIT(LLrev16, dstRegL, dstRegL);
				}
				
				if (typ == AudioSampleU16LE || typ == AudioSampleU16BE) {
					//EOR dstRegL, dstRegL, #0x80008000
					EMIT(LLeorImm, dstRegL, dstRegL, 0x80008000, 0, EmitFlagsDoNotCare);
				}
				
				//ASRS dstRegR, dstRegL, #16
				EMIT(LLmov, dstRegR, dstRegL, EmitShiftAsr, 16, EmitFlagsDoNotCare, false);
				
				//SXTH dstRegL, dstRegL
				EMIT(LLextend, dstRegL, dstRegL, 0, false, false);
			}
			else {
				
				if (typ == AudioSampleS16LE) {
					
					//LDRSH dstRegL, [src], #2
					EMIT(LLloadImm, dstRegL, srcReg, 2, EmitSzHalfword, true, EmitAdrModePostindex);
				}
				else {
					
					//LDRH dstRegL, [src], #2
					EMIT(LLloadImm, dstRegL, srcReg, 2, EmitSzHalfword, false, EmitAdrModePostindex);
					
					if (typ == AudioSampleS16BE) {
						
						//REVSH dstRegL
						EMIT(LLrevsh, dstRegL, dstRegL);
					}
					else {
						
						if (typ == AudioSampleU16BE) {
							
							//REV16 dstRegL, dstRegL
							EMIT(LLrev16, dstRegL, dstRegL);
						}
						
						//SUB dstRegL, dstRegL, #0x8000
						EMIT(LLsubImm, dstRegL, dstRegL, 0x8000, EmitFlagsDoNotCare, false);
					}
				}
			}
			break;

		case AudioSampleU32LE:
		case AudioSampleU32BE:
		case AudioSampleS32LE:
		case AudioSampleS32BE:
		
			//ldmia src!, {dstRegL, [dstRegR]}
			EMIT(LLldmia, srcReg, (1 << dstRegL) | (stereo ? (1 << dstRegR) : 0), true);

		
			if (typ == AudioSampleU32BE || typ == AudioSampleS32BE) {
				
				//REV dstRegL, dstRegL
				EMIT(LLrev, dstRegL, dstRegL);

				if (stereo) {
					
					//REV dstRegR, dstRegR
					EMIT(LLrev, dstRegR, dstRegR);
				}
			}
		
			if (typ == AudioSampleU32LE || typ == AudioSampleU32BE) {
				
				//SUB dstRegL, dstRegL, #0x80000000
				EMIT(LLsubImm, dstRegL, dstRegL, 0x80000000, EmitFlagsDoNotCare, false);
				
				if (stereo) {
					
					//SUB dstRegR, dstRegR, #0x80000000
					EMIT(LLsubImm, dstRegR, dstRegR, 0x80000000, EmitFlagsDoNotCare, false);
				}
			}
			break;
		
		case AudioSampleFloatLE:
		case AudioSampleFloatBE:
		
			if (MIXER_ALLOW_VFP_USE) {
		
				if (typ == AudioSampleFloatLE) {
					
					//stereo:	vldmia src!, {s0, s1}
					//mono:		vldmia src!, {s0}
					EMIT(LLvldmiaSP, srcReg, 0, nCh, true);
				}
				else {
					
					//ldmia src!, {dstRegL, [dstRegR]}
					EMIT(LLldmia, srcReg, (1 << dstRegL) | (stereo ? (1 << dstRegR) : 0), true);
					
					//REV dstRegL, dstRegL
					EMIT(LLrev, dstRegL, dstRegL);
					
					if (stereo) {
						
						//REV dstRegR, dstRegR
						EMIT(LLrev, dstRegR, dstRegR);
						
						//vmov S0, S1, dstRegL, dstRegR		//move two regs at once
						EMIT(LLvmovArmToVfp2xSP, 0, dstRegL, dstRegR);
					}
					else {
						
						//vmov S0, dstRegL
						EMIT(LLvmovArmToVfpSP, 0, dstRegL);
					}
				}
				
				//vcvt.S32.F32 S0, S0, #24
				EMIT(LLvcvtToFixedPtSP, 0, 32, 24, false);
				
				//interleave for better latency
				if (stereo) {
					
					//vcvt.S32.F32 S1, S1, #24
					EMIT(LLvcvtToFixedPtSP, 1, 32, 24, false);
				}
				
				if (stereo) {
				
					//vmov dstRegL, dstRegR, S0, S1		//move two regs at once
					EMIT(LLvmovVfpToArm2xSP, dstRegL, dstRegR, 0);
				}
				else {
				
					//vmov dstRegL, S0
					EMIT(LLvmovVfpToArmSP, dstRegL, 0);
				}
			}
			else {
				// abbreviated float reading code that only does what we care for. we need two temp regs and do this for each value
				//  inf, nan, and all out of bounds values become zero (this might introduce clipping if your samples are too big)
			
				//push {r0-r1}
				EMIT(HLpush, 0x0003);
				
				//ldmia src!, {dstRegL, [dstRegR]}
				EMIT(LLldmia, srcReg, (1 << dstRegL) | (stereo ? (1 << dstRegR) : 0), true);
				
				for (i = 0; i < nCh; i++) {
					
					uint32_t rvNo = i ? dstRegR : dstRegL;
					struct EmitBuf bcsToOut;
			
					if (typ == AudioSampleFloatBE) {
						
						//rev rvNo, rvNo
						EMIT(LLrev, rvNo, rvNo);
					}
				
					//ubfx r0, rV, #23, #8
					EMIT(LLbfx, 0, rvNo, 23, 8, true, true);
					
					//subs r0, #104
					EMIT(LLsubImm, 0, 0, 104, EmitSetFlags, false);
					
					//cmp r0, #127 - 104
					EMIT(LLcmpImm, 0, 127 - 104);
					
					//it cs
					EMIT(LLit, EmitCcCs);
					
					//mov[cs] rV, #0
					EMIT(HLloadImmToReg, rvNo, 0, false, false, true);
					
					//bcs out
					EMIT(SaveSpace, &bcsToOut, 1);
					
					//lsl r1, rV, #8
					EMIT(LLmov, 1, rvNo, EmitShiftLsl, 8, EmitFlagsDoNotCare, false);
					
					//orr r1, #0x80000000
					EMIT(LLorrImm, 1, 1, 0x80000000, 0, EmitFlagsDoNotCare);
					
					//rsb r0, r0, #(127 - 104) + 9
					EMIT(LLrsbImm, 0, 0, (127 - 104) + 9, EmitFlagsDoNotCare, false);
					
					//lsrs r1, r0
					EMIT(LLshiftByReg, 1, 1, 0, EmitShiftLsr, EmitFlagsDoNotCare, false);
					
					//lsls rV, #1
					EMIT(LLmov, rvNo, rvNo, EmitShiftLsl, 1, EmitSetFlags, false);
					
					//ite cs
					EMIT(LLite, EmitCcCs);
					
					//neg[cs] rV, r1
					EMIT(LLrsbImm, rvNo, 1, 0, EmitLeaveFlags, true);
					
					//mov[cc] rV, r1
					EMIT(LLmov, rvNo, 1, EmitShiftLsl, 0, EmitFlagsDoNotCare, true);
					
					//"out" label is here
					EMIT_TO(LLbranch, &bcsToOut, emitGetPtrToJumpHere(dest), EmitCcCs);
				}
				
				//pop {r0-r1}
				EMIT(HLpop, 0x0003);
			}
		
			break;
		default:
			return EmitErrNotEncodeable;
	}
	
	//shift sample into place
	switch (typ) {
		case AudioSampleU8:
		case AudioSampleS8:
			//lsls dstRegL, 16
			EMIT(LLmov, dstRegL, dstRegL, EmitShiftLsl, 16, EmitFlagsDoNotCare, false);
			
			if (stereo) {
				//lsls dstRegR, 16
				EMIT(LLmov, dstRegR, dstRegR, EmitShiftLsl, 16, EmitFlagsDoNotCare, false);
			}
			break;
		case AudioSampleU16LE:
		case AudioSampleU16BE:
		case AudioSampleS16LE:
		case AudioSampleS16BE:
			//lsl dstRegL, 8
			EMIT(LLmov, dstRegL, dstRegL, EmitShiftLsl, 8, EmitFlagsDoNotCare, false);
			
			if (stereo) {
				//lsls dstRegR, 8
				EMIT(LLmov, dstRegR, dstRegR, EmitShiftLsl, 8, EmitFlagsDoNotCare, false);
			}
			break;
		case AudioSampleU32LE:
		case AudioSampleU32BE:
		case AudioSampleS32LE:
		case AudioSampleS32BE:
			//asr dstRegL, 8
			EMIT(LLmov, dstRegL, dstRegL, EmitShiftAsr, 8, EmitFlagsDoNotCare, false);
			
			if (stereo) {
				//asr dstRegR, 8
				EMIT(LLmov, dstRegR, dstRegR, EmitShiftAsr, 8, EmitFlagsDoNotCare, false);
			}
			break;
		case AudioSampleFloatLE:
		case AudioSampleFloatBE:
			break;
		default:
			//nothing
			return EmitErrNotEncodeable;
	}
	
	return EmitErrNone;
}

//tmpReg may not equal dstReg, all else allowed
static enum EmitStatus audioPrvEmitVolumeScale(struct EmitBuf *dest, uint32_t dstReg, uint32_t sampleReg, uint32_t volumeReg, uint32_t tmpReg)
{
	//smull dstReg, tmpReg, sampleReg, volumeReg
	EMIT(LLsmull, dstReg, tmpReg, sampleReg, volumeReg);
	
	//lsr[s] dstReg, #10
	EMIT(LLmov, dstReg, dstReg, EmitShiftLsr, 10, EmitFlagsDoNotCare, false);
	
	//bfi dstReg, tmpReg, #22, #10
	EMIT(LLbfi, dstReg, tmpReg, 22, 10);
	
	return EmitErrNone;
}

//scale sample by volume, take channels into account. for mono, L is used. if mono in AND out, we expect mono volume in volL
//if output is mono, combine and generate it in L
//regL & regR are in and out, MUST be loRegs
static enum EmitStatus audioPrvMixInFuncScaleChansByVolume(struct EmitBuf *dest, uint32_t regL, uint32_t regR, uint32_t regVolL, uint32_t regVolR, uint32_t regTmp, enum AudioChannelConfig chans, bool nativeFmtIsStereo)
{
	enum EmitStatus now;
	
	//if input is stereo, scale by volumes now
	if (chans == AudioStereo) {
		
		now = audioPrvEmitVolumeScale(dest, regL, regL, regVolL, regTmp);
		if (now != EmitErrNone)
			return now;
		
		now = audioPrvEmitVolumeScale(dest, regR, regR, regVolR, regTmp);
		if (now != EmitErrNone)
			return now;
		
		//if output is mono, combine samples now
		if (!nativeFmtIsStereo) {
		
			//add regL, regR
			EMIT(LLaddReg, regL, regL, regR, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			
			//asrs regL, #1
			EMIT(LLmov, regL, regL, EmitShiftAsr, 1, EmitFlagsDoNotCare, false);
		}
	}
	else if (nativeFmtIsStereo) {	//if input is mono but output is stereo, scale the input sample by each channel's volume
		
		now = audioPrvEmitVolumeScale(dest, regR, regL, regVolR, regTmp);
		if (now != EmitErrNone)
			return now;
		
		now = audioPrvEmitVolumeScale(dest, regL, regL, regVolL, regTmp);
		if (now != EmitErrNone)
			return now;
	}
	else {						//mono in and out - scale the sample by the average volume we had calculated
		
		now = audioPrvEmitVolumeScale(dest, regL, regL, regVolL, regTmp);
		if (now != EmitErrNone)
			return now;
	}
	
	return EmitErrNone;
}

static enum EmitStatus audioPrvMixInAddToOutSampleProperly(struct EmitBuf *dest, uint32_t regDst, uint32_t regSrc)
{
	if (MIXER_BE_RECKLESS_WITH_MIXING) {
		
		//ADD regDst, regSrc
		EMIT(LLaddReg, regDst, regDst, regSrc, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	}
	else {
		
		#ifdef HAVE_v7E_SUPPORT
			//QADD regDst, regSrc
			EMIT(LLqadd, regDst, regDst, regSrc);
		#else
			//this is the fastest way i found to saturate in v7. it works, i promise. proof is up to the reader
			
			//ADDS regDst, regSrc
			EMIT(LLaddReg, regDst, regDst, regSrc, EmitShiftLsl, 0, EmitSetFlags, false);
			
			//ITT VS
			EMIT(LLitt, EmitCcVs);
			
			//ASR regDst, #31
			EMIT(LLmov, regDst, regDst, EmitShiftAsr, 31, EmitLeaveFlags, true);
	
			//RRX regDst
			EMIT(LLmov, regDst, regDst, EmitShiftRor, 0 /* ROR 0 is RRX */, EmitLeaveFlags, true);
			
		#endif
	}
	
	return EmitErrNone;
}

//may corrupt samples in for speed. may corrupt flags
static enum EmitStatus audioPrvMixInFuncEmitSampleExport(struct EmitBuf *dest, uint32_t regL, uint32_t regR, uint32_t tmpRegL, uint32_t tmpRegR, bool nativeFmtIsStereo)
{
	enum EmitStatus now;
	
	if (nativeFmtIsStereo) { //add two samples into the output buffer
	
		//ldrd tmpRegL, tmpRegR, [r0]
		EMIT(LLldrdImm, tmpRegL, tmpRegR, 0, 0, EmitAdrModeIndex);
		
		//NOTE: we could add to temp and leave sample regs uncorrupted, but sometimes sample regs are loregs in order and we save on our store using a STMIA.N
		now = audioPrvMixInAddToOutSampleProperly(dest, regL, tmpRegL);
		if (now != EmitErrNone)
			return now;
		
		now = audioPrvMixInAddToOutSampleProperly(dest, regR, tmpRegR);
		if (now != EmitErrNone)
			return now;

		//if sample regs are in order, use a stmia with a writeback
		if (regL < regR) {
			
			EMIT(HLstmia, 0, (1 << regL) + (1 << regR), true);
		}
		else {	//else use a STRD with postindex
			
			EMIT(LLstrdImm, regL, regR, 0, 4, EmitAdrModePostindex);
		}
	}
	else {				//add one sample into the output buffer
		
		//ldr tmpRegL, [r0]
		EMIT(LLloadImm, tmpRegL, 0, 0, EmitSzWord, false, EmitAdrModeIndex);

		//NOTE: we could add to temp and leave sample reg uncorrupted, but sometimes sample reg is a loreg and we save on our store using a STMIA.N

		//add regL, tmpRegL 				// add LEFT
		now = audioPrvMixInAddToOutSampleProperly(dest, regL, tmpRegL);
		if (now != EmitErrNone)
			return now;

		//store using a stmia
		EMIT(HLstmia, 0, (1 << regL), true);
	}
	
	return EmitErrNone;
}

enum EmitStatus audioPrvStreamCreateOutputMixFuncGutsNoResamp(struct EmitBuf *dest, enum AudioSampleType sampTyp, enum AudioChannelConfig chans, bool nativeFmtIsStereo)
{
	enum EmitStatus now;
	uintptr_t loopPos;
	
	//push {r1, r4-r8, lr}
	EMIT(HLpush, 0x41f2);
	
	//ldr r1, [r1]	//get source pointer
	EMIT(LLloadImm, 1, 1, 0, EmitSzWord, false, EmitAdrModeIndex);
	
	//up front decide how many samples we'll produce
	
	//ldr r4, [sp, #ofst_to_numInSamples]
	EMIT(LLloadImm, 4, EMIT_REG_NO_SP, 9 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
	//cmp r2, r4
	EMIT(LLcmpReg, 2, 4, EmitShiftLsl, 0);
	
	//IT PL	// true if r2 >= r4		(more space in output than we'll produce samples)
	EMIT(LLit, EmitCcPl);
	
	//movpl r2, r4	//so we simply just have to produce numSamples == r2
	EMIT(LLmov, 2, 4, EmitShiftLsl, 0, EmitFlagsDoNotCare, true);
	
	//ldrd r7, r8, [sp, #ofst_to_volumes]	//load volumes into r7,r8 using ldrd from sp
	EMIT(LLldrdImm, 7, 8, EMIT_REG_NO_SP, 7 * sizeof(uint32_t), EmitAdrModeIndex);

	//if input & output are both mono, we need to calculate average volume (we'll store it in r7)
	if (!nativeFmtIsStereo && chans == AudioMono) {
		
		#ifdef HAVE_v7E_SUPPORT		//UHADD16 is a nice way to average volumes
			
			//uhadd r7, r7, r8
			EMIT(LLuhadd16, 7, 7, 8);
		#else
			
			//add r7, r8
			EMIT(LLaddReg, 7, 7, 8, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
			//lsrs r7, #1
			EMIT(LLmov, 7, 7, EmitShiftLsr, 1, EmitFlagsDoNotCare, false);
		#endif
	}
	
	loopPos = emitGetPtrToJumpHere(dest);
	
	//load sample(s) into r4 (and r5 if stereo)
	now = audioPrvEmitLoadSample(dest, 4, 5, sampTyp, chans);
	if (now != EmitErrNone)
		return now;
	
	//handle volume scaling
	now = audioPrvMixInFuncScaleChansByVolume(dest, 4, 5, 7, 8, 14, chans, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;
	
	//store it
	now = audioPrvMixInFuncEmitSampleExport(dest, 4, 5, 6, 14, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;
	
	//subs r2, #1  // account for the one sample we just did
	EMIT(LLsubImm, 2, 2, 1, EmitSetFlags, false);
	
	//loop back if there are more samples to work on
	EMIT(LLbranch, loopPos, EmitCcNe);

	//ldr r2, [sp]   //we need to store r1
	EMIT(LLloadImm, 2, EMIT_REG_NO_SP, 0, EmitSzWord, false, EmitAdrModeIndex);
	
	//str r1, [r2]
	EMIT(LLstoreImm, 1, 2, 0, EmitSzWord, EmitAdrModeIndex);

	//pop {r1, r4-r8, pc}
	EMIT(HLpop, 0x81f2);

	return EmitErrNone;
}

enum EmitStatus audioPrvStreamCreateOutputMixFuncGutsUpsample(struct EmitBuf *dest, const uint16_t* resampTab, enum AudioSampleType sampTyp, enum AudioChannelConfig chans, bool nativeFmtIsStereo)
{
	uint32_t sampleSzShift = (mSampleShifts[sampTyp] + (chans == AudioStereo ? 1 : 0)), sampleSz = 1 << sampleSzShift;
	struct EmitBuf beqOutOfInput, skipTableReloadSpot, bcsSpot, jumpToExitCodeSpot;
	uint32_t i, numOutputChannels = nativeFmtIsStereo ? 2 : 1, tempReg;
	uintptr_t loopLoadSample, loopPostLoadSample;
	enum EmitStatus now;
	
	//int32_t* upsample(int32_t* dst, const uint8_t** srcP, uint32_t maxOutSamples, void* resampleStateP, uint32_t volumeL, uint32_t volumeR, uint32_t numInSamples)

	//r0 is dst
	//r1 is src
	//r2 is nSampLeft
	//r3 is temp value
	//r4 is current sample L (or mono)
	//r5 is current sample R (if input is mono, we store "src end ptr" here
	//r6 is "next" sample L (or mono)
	//r7 is "next" sample R
	//r8 is volume L (or mono volume)
	//r9 is volume R
	//r10 is output L (or mono)
	//r11 is output R
	//r12 is current table pointer
	//r14 is temp value
	//[sp, 4] is "source end ptr" if stereo
	
	//RESAMP STATE is:
	// [0] - previously read "now" L sample
	// [1] - previously read "now" R sample
	// [2] - table pointer to current entry
	

	//we use each sample more than once, so volume scaling is better done BEFORE interpolation than after. we do that
	
	//push {r1-r11, lr}
	EMIT(HLpush, 0x4ffe);

	//ldr r1, [r1]	//get source pointer
	EMIT(LLloadImm, 1, 1, 0, EmitSzWord, false, EmitAdrModeIndex);
	
	//calculate source end pointer
	//ldr r5, [sp, #ofst_to_numInSamples]
	EMIT(LLloadImm, 5, EMIT_REG_NO_SP, 14 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
	//add r5, r1, r5, lsl in_sample_sz_shift
	EMIT(LLaddReg, 5, 1, 5, EmitShiftLsl, sampleSzShift, EmitFlagsDoNotCare, false);
	
	if (nativeFmtIsStereo) {
		//str r5, [sp, #4]					//stash it on the stack where we saved a slot for it (where we stashed r2)
		EMIT(LLstoreImm, 5, EMIT_REG_NO_SP, 4, EmitSzWord, EmitAdrModeIndex);
	}

	//calculate destination end pointer
	//add r2, r0, r2, lsl 2 + isStereo
	EMIT(LLaddReg, 2, 0, 2, EmitShiftLsl, nativeFmtIsStereo ? 3 : 2, EmitFlagsDoNotCare, false);

	//load resamp state
	//ldmia r3, {r6,r7, r12}				//load current table index to r12, and "cur L" and "cur R" from resamp state into r6,r7 (which normally stores next sample), but we're about to move it into "cur" sample space
	EMIT(LLldmia, 3, 0x10c0, false);
	
	//ldrd r8, r9, [sp, #ofst_to_vol_l]	//load volumes into r8,r9 using ldrd from sp
	EMIT(LLldrdImm, 8, 9, EMIT_REG_NO_SP, 12 * sizeof(uint32_t), EmitAdrModeIndex);
	
	//if input & output are both mono, we need to calculate average volume (we'll store it in r8)
	if (!nativeFmtIsStereo && chans == AudioMono) {
		
		#ifdef HAVE_v7E_SUPPORT		//UHADD16 is a nice way to average volumes
			
			//uhadd r8, r8, r9
			EMIT(LLuhadd16, 8, 8, 9);
		#else
			
			//add r8, r9
			EMIT(LLaddReg, 8, 8, 9, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			
			//lsr r8, #1
			EMIT(LLmov, 8, 8, EmitShiftLsr, 1, EmitFlagsDoNotCare, false);
		#endif
	}
	
	//save location for the loop where a new sample needs to be loaded
	loopLoadSample = emitGetPtrToJumpHere(dest);
	
	//move current "next" sample into "current"
	EMIT(LLmov, 4, 6, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	if (nativeFmtIsStereo) {
		EMIT(LLmov, 5, 7, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	}
	
	//check if we're out of input samples (since we're about to overwrite r6, we can use it for temp)
	if (!nativeFmtIsStereo)
		tempReg = 5;
	else {
		//ldr r6, [sp, #4]
		EMIT(LLloadImm, 6, EMIT_REG_NO_SP, 4, EmitSzWord, false, EmitAdrModeIndex);
		
		tempReg = 6;
	}
	
	//cmp src, tmpReg
	EMIT(LLcmpReg, 1, tempReg, EmitShiftLsl, 0);
	
	//save space for a "beq" to exit
	EMIT(SaveSpace, &beqOutOfInput, 1);
	
	//get "next" left or mono sample into r6, right into r7 if needed
	now = audioPrvEmitLoadSample(dest, 6, 7, sampTyp, chans);
	if (now != EmitErrNone)
		return now;
	
	//scale by volume and maybe merge
	now = audioPrvMixInFuncScaleChansByVolume(dest, 6, 7, 8, 9, 3, chans, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;
	
	//save location for the loop where sample is not needed to be loaded
	loopPostLoadSample = emitGetPtrToJumpHere(dest);
	
	//ldrh r3, [r12], #2 //grab resamp tab value
	EMIT(LLloadImm, 3, 12, 2, EmitSzHalfword, false, EmitAdrModePostindex);

	//save space for a "cbnz" to skip table reload
	EMIT(SaveSpace, &skipTableReloadSpot, 1);
	
	//ldr r12, =resampTab + 1	//reload table. point right to elem idx 1 since we'll inline idx 0 here
	EMIT(HLloadImmToReg, 12, (uintptr_t)(resampTab + 1), true, true, false);
	
	//mov r3, resampTab[0]		//we know we'll load idx 0, inline it here
	EMIT(HLloadImmToReg, 3, *resampTab, true, true, false);
	
	//set up that "cbnz" we saved a space for
	EMIT_TO(LLcbnz, &skipTableReloadSpot, 3, emitGetPtrToJumpHere(dest));
	
	//lsrs r3, #1				//grab the table value's top 15 bits into lr, shift the "emit sample?" bit into C
	EMIT(LLmov, 3, 3, EmitShiftLsr, 1, EmitSetFlags, false);
	
	//rsb lr, r3, #0x8000		//get (0x8000 - tabEntry) into lr
	EMIT(LLrsbImm, EMIT_REG_NO_LR, 3, 0x8000, EmitLeaveFlags, false);
	
	//interpolate
	for (i = 0; i < numOutputChannels; i++) {
		//we need a temp reg. while generating first (L) sample, use the second (R) output reg as temp
		//while generating second (R) sample, use lr since it holds tablVal which we no longer need
		uint32_t sampRegNow = 4 + i, sampRegNext = 6 + i, sampOutReg = 10 + i, tempReg = i ? 3 : 11;
		
		//smull sampOutReg, tempReg, sampRegNow, tablVal(3)	//multiply sample into table value
		EMIT(LLsmull, sampOutReg, tempReg, sampRegNow, 3);
		
		//smlal sampOutReg, tempReg, sampRegNext, inverseTabVal(EMIT_REG_NO_LR)
		EMIT(LLsmlal, sampOutReg, tempReg, sampRegNext, EMIT_REG_NO_LR);
		
		//assemble the results:
		//lsr sampOutReg, #15
		EMIT(LLmov, sampOutReg, sampOutReg, EmitShiftLsr, 15, EmitLeaveFlags, false);
		
		//bfi sampOutReg, tempReg, #17, #15
		EMIT(LLbfi, sampOutReg, tempReg, 17, 15);
	}
	
	//we need to discriminate now based on C flag (which is set if we DO need a new sample to be read).
	// we do this using a bcs which we'll save space for
	// we catually need to emit the sample first, but since that may clobber flags, we do that post-decision
	EMIT(SaveSpace, &bcsSpot, 1);
	
	//store the resuts
	now = audioPrvMixInFuncEmitSampleExport(dest, 10, 11, 3, 14, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;

	// this is the path for when we DO NOT need a new sample
	
	//cmp r2, r0	//see if we;re done
	EMIT(LLcmpReg, 2, 0, EmitShiftLsl, 0);
	
	//loop to start (without sample load) using bne
	EMIT(LLbranch, loopPostLoadSample, EmitCcNe);
	
	//source pointer now points to PAST what should be the "next" sample next run - adjust it
	//subs r1, in_sample_sz
	EMIT(LLsubImm, 1, 1, sampleSz, EmitSetFlags, false);
	
	//we'll need a jump here to exit code. save a slot for it
	EMIT(SaveSpace, &jumpToExitCodeSpot, 1);
	
	// this is the path for when we DO need a new sample
	
	//fill the above "bne"
	EMIT_TO(LLbranch, &bcsSpot, emitGetPtrToJumpHere(dest), EmitCcCs);
	
	//store the resuts
	now = audioPrvMixInFuncEmitSampleExport(dest, 10, 11, 3, 14, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;
	
	//cmp r2, r0	//see if we're done
	EMIT(LLcmpReg, 2, 0, EmitShiftLsl, 0);
	
	//loop to start (with sample load) using bne (short one should work)
	EMIT(LLbranch, loopLoadSample, EmitCcNe);
	
	//nonetheless shift the "next" sample to "cur" one so we can stash it properly below
	EMIT(LLmov, 4, 6, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	if (nativeFmtIsStereo) {
		EMIT(LLmov, 5, 7, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	}
	
	//source pointer now points to what should be the "next" sample next run. Good
	
	// this is the common exit path. we saved a slot above to insert a jump to here. generate the jump
	EMIT_TO(LLbranch, &jumpToExitCodeSpot, emitGetPtrToJumpHere(dest), EmitCcAl);
	
	// we also saved a spot above when we ran out of input data, generate that beq
	EMIT_TO(LLbranch, &beqOutOfInput, emitGetPtrToJumpHere(dest), EmitCcEq);

	// save "src" pointer
	//ldr r2, [sp]   //we need to store r1
	EMIT(LLloadImm, 2, EMIT_REG_NO_SP, 0, EmitSzWord, false, EmitAdrModeIndex);
	
	//str r1, [r2]
	EMIT(LLstoreImm, 1, 2, 0, EmitSzWord, EmitAdrModeIndex);
	
	// save resamp state
	//ldr r2, [sp, #8]
	EMIT(LLloadImm, 2, EMIT_REG_NO_SP, 8, EmitSzWord, false, EmitAdrModeIndex);
	
	//stmia r2, {r4, r5, r12}
	EMIT(LLstmia, 2, 0x1030, true);

	//pop {r1-r11, pc}
	EMIT(HLpop, 0x8ffe);

	return EmitErrNone;
}

enum EmitStatus audioPrvStreamCreateOutputMixFuncGutsDownsample(struct EmitBuf *dest, const uint16_t* resampTab, enum AudioSampleType sampTyp, enum AudioChannelConfig chans, bool nativeFmtIsStereo)
{
	uint32_t sampleSzShift = (mSampleShifts[sampTyp] + (chans == AudioStereo ? 1 : 0));
	struct EmitBuf savedSpaceForJump;
	enum EmitStatus now;
	uintptr_t mainLoop;
	
	//int32_t* downsample(int32_t* dst, const uint8_t** srcP, uint32_t maxOutSamples, void* resampleStateP, uint32_t volumeL, uint32_t volumeR, uint32_t numInSamples)

	//r0 is dst
	//r1 is src
	//r2 is dst endPtr
	//r3 is curSampL (or mono)
	//r4 is curSampR
	//r5 is tabentry
	//r6 is leftSum lo (or mono)
	//r7 is right sum lo
	//r8 is leftSum hi (or mono)
	//r9 is right sum hi
	//r10 is volume L (or mono)
	//r11 is volume R
	//r12 is tabPtr
	//lr  is source end ptr
	
	//RESAMP STATE is:
	// [0] - curSampL (or mono)
	// [1] - curSampR
	// [2] - leftSum lo (or mono)
	// [3] - right sum lo
	// [4] - leftSum hi (or mono)
	// [5] - right sum hi
	// [6] - tabptr


	//push {r1, r3-r11, lr}
	EMIT(HLpush, 0x4ffa);
	
	//ldr r1, [r1]							//get source pointer
	EMIT(LLloadImm, 1, 1, 0, EmitSzWord, false, EmitAdrModeIndex);
	
	//calculate source end pointer
	//ldr r4, [sp, #ofst_to_numInSamples]
	EMIT(LLloadImm, 4, EMIT_REG_NO_SP, 13 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
	//add lr, r1, r4, lsl in_sample_sz_shift
	EMIT(LLaddReg, EMIT_REG_NO_LR, 1, 4, EmitShiftLsl, sampleSzShift, EmitFlagsDoNotCare, false);
	
	//calculate destination end pointer
	//add r2, r0, r2, lsl 2 + isStereo
	EMIT(LLaddReg, 2, 0, 2, EmitShiftLsl, nativeFmtIsStereo ? 3 : 2, EmitFlagsDoNotCare, false);
	
	//load resample state
	//ldmia r3, {r3, r4, r6-r9, r12}
	EMIT(LLldmia, 3, 0x13d8, false);
	
	//ldrd r10, r11, [sp, #ofst_to_vol_l]	//load volumes into r10,r11 using ldrd from sp
	EMIT(LLldrdImm, 10, 11, EMIT_REG_NO_SP, 11 * sizeof(uint32_t), EmitAdrModeIndex);
	
	//if input & output are both mono, we need to calculate average volume (we'll store it in r10)
	if (!nativeFmtIsStereo && chans == AudioMono) {
		
		#ifdef HAVE_v7E_SUPPORT		//UHADD16 is a nice way to average volumes
			
			//uhadd r10, r10, r11
			EMIT(LLuhadd16, 10, 10, 11);
		#else
			
			//add r10, r11
			EMIT(LLaddReg, 10, 10, 11, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			
			//lsr r10, #1
			EMIT(LLmov, 10, 10, EmitShiftLsr, 1, EmitFlagsDoNotCare, false);
		#endif
	}
	
	//loop:
	mainLoop = emitGetPtrToJumpHere(dest);
	
	//load tab entry
	
	//ldrh r5, [r12], #2 //grab resamp tab value
	EMIT(LLloadImm, 5, 12, 2, EmitSzHalfword, false, EmitAdrModePostindex);

	//save space for a "cbnz" to skip table reload
	EMIT(SaveSpace, &savedSpaceForJump, 1);
	
	//ldr r12, =resampTab + 1	//reload table. point right to elem idx 1 since we'll inline idx 0 here
	EMIT(HLloadImmToReg, 12, (uintptr_t)(resampTab + 1), true, true, false);
	
	//mov r5, resampTab[0]		//we know we'll load idx 0, inline it here
	EMIT(HLloadImmToReg, 5, *resampTab, true, true, false);
	
	//set up that "cbnz" we saved a space for
	EMIT_TO(LLcbnz, &savedSpaceForJump, 5, emitGetPtrToJumpHere(dest));
	
	//lsls tabentry(aka r5), #17
	EMIT(LLmov, 5, 5, EmitShiftLsl, 17, EmitSetFlags, false);

	//save space for "bcc skip_load_sample"
	EMIT(SaveSpace, &savedSpaceForJump, 1);
	
	//get "next" left or mono sample into r3, right into r4 if needed
	now = audioPrvEmitLoadSample(dest, 3, 4, sampTyp, chans);
	if (now != EmitErrNone)
		return now;
	
	//skip_load_sample:
	
	//fill in that jump above to skip loading the sample
	EMIT_TO(LLbranch, &savedSpaceForJump, emitGetPtrToJumpHere(dest), EmitCcCc);
	
	//lsrs tabentry(aka r5), #18		//now only has the multiplier (in bottom 14 bits), and C bit has whether we need to emit
	EMIT(LLmov, 5, 5, EmitShiftLsr, 18, EmitSetFlags, false);

	// SMLAL leftSumLo, leftSumHi, tabentry, curSampL
	EMIT(LLsmlal, 6, 8, 5, 3);
	
	if (chans == AudioStereo) {
		
		// SMLAL rightSumLo, rightSumHi, tabentry, curSampR
		EMIT(LLsmlal, 7, 9, 5, 4);
	}

	//save space for "bcc noemit"
	EMIT(SaveSpace, &savedSpaceForJump, 1);

	//collapse the samples to a single reg
	
	//assemble the results:
	//lsr[s] leftSumLo, #14
	EMIT(LLmov, 6, 6, EmitShiftLsr, 14, EmitFlagsDoNotCare, false);
	
	//bfi leftSumLo, leftSumHi, #18, #14
	EMIT(LLbfi, 6, 8, 18, 14);
		
	if (chans == AudioStereo) {
		
		//lsr[s] leftSumLo, #14
		EMIT(LLmov, 7, 7, EmitShiftLsr, 14, EmitFlagsDoNotCare, false);
		
		//bfi leftSumLo, leftSumHi, #18, #14
		EMIT(LLbfi, 7, 9, 18, 14);
	}
	
	//scale by volume and maybe merge
	now = audioPrvMixInFuncScaleChansByVolume(dest, 6, 7, 10, 11, 8, chans, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;
	
	//emit samples (leftSum, rightSum, hi regs free as temps)
	now = audioPrvMixInFuncEmitSampleExport(dest, 6, 7, 8, 9, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;
	
	//we need to zero all the result regs now
	EMIT(HLloadImmToReg, 6, 0, true, true, false);
	EMIT(LLmov, 7, 6, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	EMIT(LLmov, 8, 6, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	EMIT(LLmov, 9, 6, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//noemit:
	
	//fill in that jump above to skip producing a sample
	EMIT_TO(LLbranch, &savedSpaceForJump, emitGetPtrToJumpHere(dest), EmitCcCc);
	
	//cmp r2, r0		//see if we're done with the output
	EMIT(LLcmpReg, 2, 0, EmitShiftLsl, 0);
	//if not, maybe done with input?
	//it ne
	EMIT(LLit, EmitCcNe);
	//cmp[ne] r1, lr
	EMIT(LLcmpReg, 1, EMIT_REG_NO_LR, EmitShiftLsl, 0);
	
	//if both not done, go loop around
	EMIT(LLbranch, mainLoop, EmitCcNe);

	//loop is over - save state

	// save "src" pointer
	//ldr r2, [sp]   //we need to store r1
	EMIT(LLloadImm, 2, EMIT_REG_NO_SP, 0, EmitSzWord, false, EmitAdrModeIndex);
	
	//str r1, [r2]
	EMIT(LLstoreImm, 1, 2, 0, EmitSzWord, EmitAdrModeIndex);
	
	// save resamp state
	//ldr r2, [sp, #4]
	EMIT(LLloadImm, 2, EMIT_REG_NO_SP, 4, EmitSzWord, false, EmitAdrModeIndex);
	
	//stmia r2, {r3, r4, r6-r9, r12}
	EMIT(LLstmia, 2, 0x13d8, false);

	//pop {r1, r3-r11, pc}
	EMIT(HLpop, 0x8ffa);

	return EmitErrNone;
}

static float audioMicPrvToFloat(int32_t i)
{
	float ret = i;
	
	return ret / 33554432.f;
}

//prototype is void* MicCvtF(void* dst, const int16_t *src, uint32_t volumeL, uint32_t volumeR, uint32_t nSamplesOver2);
//source guaranteed four byte aligned always
enum EmitStatus audioPrvMicCreateConvertFunc(struct EmitBuf *dest, enum AudioSampleType sampTyp, enum AudioChannelConfig chans)
{
	uint_fast8_t rNo, width, constAddReg = EMIT_REG_NO_LR;
	bool stereo = chans != AudioMono;
	uintptr_t loopStart;
	
	if (!stereo) {
		
		#ifdef HAVE_v7E_SUPPORT		//UHADD16 is a nice way to scale volumes
			
			//uhadd r2, r2, r3
			EMIT(LLuhadd16, 2, 2, 3);
			
			//r3 is now free
			constAddReg = 3;
			
		#else
			
			//add r2, r3
			EMIT(LLaddReg, 2, 2, 3, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
			//lsrs r2, #1
			EMIT(LLmov, 2, 2, EmitShiftLsr, 1, EmitFlagsDoNotCare, false);
		#endif
	}
	
	//ldr r12, [sp]			//get nSamplesOver2 into r12
	EMIT(LLloadImm, 12, EMIT_REG_NO_SP, 0, EmitSzWord, false, EmitAdrModeIndex);

	//push {r4-r7, lr}
	EMIT(HLpush, 0x40f0);
	
	switch (sampTyp) {
		case AudioSampleU8:
		case AudioSampleS8:
			width = 8;
			break;
		
		case AudioSampleU16LE:
		case AudioSampleU16BE:
			//mov R_constAdd, #0x00008000
			EMIT(LLmovImm, constAddReg, 0x00008000, 0,  EmitFlagsDoNotCare, false);
			//fallthrough
			
		case AudioSampleS16LE:
		case AudioSampleS16BE:
			width = 16;
			break;
		
		case AudioSampleU32LE:
		case AudioSampleU32BE:
			//mov R_constAdd, #0x80000000
			EMIT(LLmovImm, constAddReg, 0x80000000, 0,  EmitFlagsDoNotCare, false);
			//fallthrough
			
		case AudioSampleFloatLE:
		case AudioSampleFloatBE:
		case AudioSampleS32LE:
		case AudioSampleS32BE:
			width = 32;
			break;
		
		default:
			return EmitErrInvalidInput;
	}
	
	//loopstart:
	loopStart = emitGetPtrToJumpHere(dest);
	
	//ldmia src!, {r4}
	EMIT(LLldmia, 1, 0x10, true);
	
	//spit into 2/4 regs and multiply by volume(s)
	#ifdef HAVE_v7E_SUPPORT
		
		if (stereo) {
			//smultb r7, r4, volR
			EMIT(LLsmulxy, 6, 4, 3, true, false);
		}
		
		//smultb r6, r4, volL
		EMIT(LLsmulxy, 6, 4, 2, true, false);
		
		if (stereo) {
			//smulbb r5, r4, volR
			EMIT(LLsmulxy, 5, 4, 3, false, false);
		}
		
		//smulbb r4, r4, volL
		EMIT(LLsmulxy, 4, 4, 2, false, false);
		
	#else
	
		if (stereo) {
			//asr r7, r4, #16
			EMIT(LLmov, 7, 4, EmitShiftAsr, 16, EmitFlagsDoNotCare, false);
			
			//mul r7, volR
			EMIT(LLmulReg, 7, 7, 3, EmitFlagsDoNotCare, false);
		}
		
		//asr r6, r4, #16
		EMIT(LLmov, 6, 4, EmitShiftAsr, 16, EmitFlagsDoNotCare, false);
		
		//mul r6, volL
		EMIT(LLmulReg, 6, 6, 2, EmitFlagsDoNotCare, false);
		
		if (stereo) {
		
			//sxth r5, r4
			EMIT(LLextend, 5, 4, 0, false, false);

			//mul r5, volR
			EMIT(LLmulReg, 5, 5, 3, EmitFlagsDoNotCare, false);
		}
		
		//sxth r4, r4
		EMIT(LLextend, 4, 4, 0, false, false);

		//mul r4, volL
		EMIT(LLmulReg, 4, 4, 2, EmitFlagsDoNotCare, false);
	
	#endif
	
	//saturate to final range and shift into final place
	//we cannot shift then saturate for 32 bit so we are forced to saturate then shift
	//  the general form is:
	//	  ssat rX, #width, rX, asr # 18	//for 8 width
	//	  ssat rX, #width, rX, asr # 10	//for 16 width
	//	  ssat rX, #26, rX, lsl # 0	//for 32 width, lsl 6 later
	
	if (stereo) {
		
		EMIT(LLssat, 7, width == 32 ? 26 : width, 7, width == 32 ? EmitShiftLsl : EmitShiftAsr, width == 32 ? 0 : 26 - width);
		EMIT(LLssat, 5, width == 32 ? 26 : width, 5, width == 32 ? EmitShiftLsl : EmitShiftAsr, width == 32 ? 0 : 26 - width);
	}
	EMIT(LLssat, 6, width == 32 ? 26 : width, 6, width == 32 ? EmitShiftLsl : EmitShiftAsr, width == 32 ? 0 : 26 - width);
	EMIT(LLssat, 4, width == 32 ? 26 : width, 4, width == 32 ? EmitShiftLsl : EmitShiftAsr, width == 32 ? 0 : 26 - width);
	
	//if float format requested, convert to float
	if (sampTyp == AudioSampleFloatLE || sampTyp == AudioSampleFloatBE) {
		#ifdef HAVE_FPU
			if (stereo) {
				//vmov s0, s1, r5, r7	//one move to move both regs is faster and shorter
				EMIT(LLvmovArmToVfp2xSP, 0, 5, 7);
				
				//vcvt.S32.F32 s0, s0, #26
				EMIT(LLvcvtFromFixedPtSP, 0, 32, 26, false);
				
				//vcvt.S32.F32 s1, s1, #26
				EMIT(LLvcvtFromFixedPtSP, 1, 32, 26, false);
				
				//vmov r5, r7, s0, s1
				EMIT(LLvmovVfpToArm2xSP, 5, 7, 0);
			}
			
			//vmov s0, s1, r4, r6	//one move to move both regs is faster and shorter
			EMIT(LLvmovArmToVfp2xSP, 0, 4, 6);
			
			//vcvt.S32.F32 s0, s0, #32
			EMIT(LLvcvtFromFixedPtSP, 0, 32, 32, false);
			
			//vcvt.S32.F32 s1, s1, #32
			EMIT(LLvcvtFromFixedPtSP, 1, 32, 32, false);
			
			//vmov r4, r6, s0, s1
			EMIT(LLvmovVfpToArm2xSP, 4, 6, 0);
		#else
		
			//push {r0-r3, r11, r2, lr}
			EMIT(HLpush, 0x580f);
			
			//ldr r11, =toFloatF
			EMIT(HLloadImmToReg, 11, (uintptr_t)&audioMicPrvToFloat, true, true, false);
			
			if (stereo) {
				
				//mov r0, r7
				EMIT(LLmov, 0, 7, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			
				//blx r11
				EMIT(LLblx, 11);

				//mov r7, r0
				EMIT(LLmov, 7, 0, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			
				//mov r0, r5
				EMIT(LLmov, 0, 5, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			
				//blx r11
				EMIT(LLblx, 11);

				//mov r5, r0
				EMIT(LLmov, 5, 0, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			}
			
			//mov r0, r6
			EMIT(LLmov, 0, 6, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
		
			//blx r11
			EMIT(LLblx, 11);

			//mov r6, r0
			EMIT(LLmov, 6, 0, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			
			//mov r0, r4
			EMIT(LLmov, 0, 4, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
		
			//blx r11
			EMIT(LLblx, 11);

			//mov r4, r0
			EMIT(LLmov, 4, 0, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			
			//pop {r0-r3, r11, r2, lr}
			EMIT(HLpop, 0x400f);
			
		#endif
	}
	else if (width == 32) {		//lsl to 32 bits now0
		
		if (stereo) {
			
			//lsl r7, #6
			EMIT(LLmov, 7, 7, EmitShiftLsl, 6, EmitFlagsDoNotCare, false);
			
			//lsl r5, #6
			EMIT(LLmov, 5, 5, EmitShiftLsl, 6, EmitFlagsDoNotCare, false);
		}
			
		//lsl r6, #6
		EMIT(LLmov, 6, 6, EmitShiftLsl, 6, EmitFlagsDoNotCare, false);
		
		//lsl r4, #6
		EMIT(LLmov, 4, 4, EmitShiftLsl, 6, EmitFlagsDoNotCare, false);
	}
	
	//pack into form we'll write, byteswap, write
	switch (sampTyp) {
		case AudioSampleU8:
			if (stereo) {
				
				//add r7, r7, #0x80
				EMIT(LLaddImm, 7, 7, 0x80, EmitFlagsDoNotCare, false);
				
				//add r5, r5, #0x80
				EMIT(LLaddImm, 5, 5, 0x80, EmitFlagsDoNotCare, false);
			}
			//add r6, r6, #0x80
			EMIT(LLaddImm, 6, 6, 0x80, EmitFlagsDoNotCare, false);
			
			//add r4, r4, #0x80
			EMIT(LLaddImm, 4, 4, 0x80, EmitFlagsDoNotCare, false);
			//fallthrough
		
		case AudioSampleS8:
			if (stereo) {
				
				//strb r7, [dst, #3]
				EMIT(LLstoreImm, 7, 0, 3, EmitSzByte, EmitAdrModeIndex);
				
				//strb r6, [dst, #2]
				EMIT(LLstoreImm, 6, 0, 2, EmitSzByte, EmitAdrModeIndex);
				
				//strb r5, [dst, #1]
				EMIT(LLstoreImm, 5, 0, 1, EmitSzByte, EmitAdrModeIndex);
				
				//strb r5, [dst], #4
				EMIT(LLstoreImm, 4, 0, 4, EmitSzByte, EmitAdrModePostindex);
			}
			else {
				//strb r6, [dst, #1]
				EMIT(LLstoreImm, 6, 0, 1, EmitSzByte, EmitAdrModeIndex);
				
				//strb r5, [dst], #2
				EMIT(LLstoreImm, 4, 0, 2, EmitSzByte, EmitAdrModePostindex);
			}
			break;
		
		case AudioSampleU16LE:
		case AudioSampleU16BE:
			if (stereo) {
				
				//add r7, r7, R_constAddReg
				EMIT(LLaddReg, 7, 7, constAddReg, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
				
				//add r5, r5, R_constAddReg
				EMIT(LLaddReg, 5, 5, constAddReg, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			}
			//add r6, r6, R_constAddReg
			EMIT(LLaddReg, 6, 6, constAddReg, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			
			//add r4, r4, R_constAddReg
			EMIT(LLaddReg, 4, 4, constAddReg, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			//fallthrough
		
		case AudioSampleS16BE:
			
			if (sampTyp != AudioSampleU16LE) {
				
				if (stereo) {
				
					//rev16 r7, r7
					EMIT(LLrev16, 7, 7);
					
					//rev16 r5, r5
					EMIT(LLrev16, 5, 5);
				}
				//rev16 r6, r6
				EMIT(LLrev16, 6, 6);
				
				//rev16 r4, r4
				EMIT(LLrev16, 4, 4);
			}
			//fallthrough
		
		case AudioSampleS16LE:
		
			if (stereo) {
				
				//strh r7, [dst, #6]
				EMIT(LLstoreImm, 7, 0, 6, EmitSzHalfword, EmitAdrModeIndex);
				
				//strh r6, [dst, #4]
				EMIT(LLstoreImm, 6, 0, 4, EmitSzHalfword, EmitAdrModeIndex);
				
				//strh r5, [dst, #2]
				EMIT(LLstoreImm, 5, 0, 2, EmitSzHalfword, EmitAdrModeIndex);
				
				//strh r5, [dst], #8
				EMIT(LLstoreImm, 4, 0, 8, EmitSzHalfword, EmitAdrModePostindex);
			}
			else {
				//strh r6, [dst, #2]
				EMIT(LLstoreImm, 6, 0, 2, EmitSzHalfword, EmitAdrModeIndex);
				
				//strh r5, [dst], #4
				EMIT(LLstoreImm, 4, 0, 4, EmitSzHalfword, EmitAdrModePostindex);
			}
			break;
		
		case AudioSampleU32LE:
		case AudioSampleU32BE:
			if (stereo) {
				
				//add r7, r7, R_constAddReg
				EMIT(LLaddReg, 7, 7, constAddReg, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
				
				//add r5, r5, R_constAddReg
				EMIT(LLaddReg, 5, 5, constAddReg, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			}
			//add r6, r6, R_constAddReg
			EMIT(LLaddReg, 6, 6, constAddReg, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			
			//add r4, r4, R_constAddReg
			EMIT(LLaddReg, 4, 4, constAddReg, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			//fallthrough
		
		case AudioSampleFloatBE:
		case AudioSampleS32BE:
			
			if (sampTyp != AudioSampleU32LE) {
				
				if (stereo) {
				
					//rev r7, r7
					EMIT(LLrev, 7, 7);
					
					//rev r5, r5
					EMIT(LLrev, 5, 5);
				}
				//rev r6, r6
				EMIT(LLrev, 6, 6);
				
				//rev r4, r4
				EMIT(LLrev, 4, 4);
			}
			//fallthrough
		
		case AudioSampleS32LE:
		case AudioSampleFloatLE:
			
			//stmia dst!, {regs}
			EMIT(LLstmia, 0, stereo ? 0xf0 : 0x50, true);
			break;
	}

	//subs r12, #1
	EMIT(LLsubImm, 12, 12, 1, EmitSetFlags, false);
	
	//bne loopstart
	EMIT(LLbranch, loopStart, EmitCcNe);
	
	//pop {r4-r7, pc}
	EMIT(HLpop, 0x80f0);

	return EmitErrNone;
}










/*




















static void __attribute__((naked)) audioMicPrvConvertSamplesU8mono(uint8_t *dst, const int16_t *src, uint32_t nSamp, uint32_t vol)
{
	//this requires v7E-M
	asm volatile(
		"	push   {r4-r7}					\n\t"
		"1:									\n\t"
		"	subs   r2, #4					\n\t"
		"	bmi	   1f						\n\t"
		"	ldmia  r1!, {r4, r5}			\n\t"
		"	smultb r7, r5, r3				\n\t"
		"	smulbb r6, r5, r3				\n\t"
		"	ssat   r7, #8, r7, asr #18		\n\t"
		"	ssat   r6, #8, r6, asr #18		\n\t"
		"	smultb r5, r4, r3				\n\t"
		"	smulbb r4, r4, r3				\n\t"
		"	ssat   r5, #8, r5, asr #18		\n\t"
		"	ssat   r4, #8, r4, asr #18		\n\t"
		"	adds   r4, #0x80				\n\t"
		"	adds   r5, #0x80				\n\t"
		"	adds   r6, #0x80				\n\t"
		"	adds   r7, #0x80				\n\t"
		"	strb   r7, [r0, #3]				\n\t"
		"	strb   r6, [r0, #2]				\n\t"
		"	strb   r5, [r0, #1]				\n\t"
		"	strb   r4, [r0], #4				\n\t"
		"	b 1b							\n\t"
		"1:									\n\t"
		"	lsls   r2, #31					\n\t"
		"	bcc    1f						\n\t"
		"	ldmia  r1!, {r4}				\n\t"
		"	smultb r5, r4, r3				\n\t"
		"	smulbb r4, r4, r3				\n\t"
		"	ssat   r5, #8, r5, asr #18		\n\t"
		"	ssat   r4, #8, r4, asr #18		\n\t"
		"	itt    cs						\n\t"	//use shorter instrs - worth it
		"	addcs  r4, #0x80				\n\t"
		"	addcs  r5, #0x80				\n\t"
		"	strb   r5, [r0, #1]				\n\t"
		"	strb   r4, [r0], #2				\n\t"
		"1:									\n\t"
		"	bpl    1f						\n\t"
		"	ldrsh  r4, [r1]					\n\t"
		"	muls   r4, r3					\n\t"
		"	ssat   r4, #8, r4, asr #18		\n\t"
		"	adds   r4, #0x80				\n\t"
		"	strb   r4, [r0]  				\n\t"
		"1:									\n\t"
		"	pop    {r4-r7}					\n\t"
		"	bx     lr						\n\t"
		:::"memory","cc"
	);
}

static void __attribute__((naked)) audioMicPrvConvertSamplesU8stereo(uint8_t *dst, const int16_t *src, uint32_t nSamp, uint32_t volL, uint32_t volR)
{
	//this requires v7E-M
	asm volatile(
		"	ldr    r12, [sp]				\n\t"
		"	push   {r4-r11}					\n\t"
		"1:									\n\t"
		"	subs   r2, #4					\n\t"
		"	bmi	   1f						\n\t"
		"	ldmia  r1!, {r4, r5}			\n\t"
		"	smultb r11, r5, r12				\n\t"
		"	smulbb r10, r5, r12				\n\t"
		"	smultb r7, r5, r3				\n\t"
		"	smulbb r6, r5, r3				\n\t"
		"	ssat   r11, #8, r11, asr #18	\n\t"
		"	ssat   r10, #8, r10, asr #18	\n\t"
		"	ssat   r7, #8, r7, asr #18		\n\t"
		"	ssat   r6, #8, r6, asr #18		\n\t"
		"	smultb r9, r4, r12				\n\t"
		"	smulbb r8, r4, r12				\n\t"
		"	smultb r5, r4, r3				\n\t"
		"	smulbb r4, r4, r3				\n\t"
		"	ssat   r9, #8, r9, asr #18		\n\t"
		"	ssat   r8, #8, r8, asr #18		\n\t"
		"	ssat   r5, #8, r5, asr #18		\n\t"
		"	ssat   r4, #8, r4, asr #18		\n\t"
		"	adds   r4, #0x80				\n\t"
		"	adds   r5, #0x80				\n\t"
		"	adds   r6, #0x80				\n\t"
		"	adds   r7, #0x80				\n\t"
		"	add    r8, #0x80				\n\t"
		"	add    r9, #0x80				\n\t"
		"	add    r10, #0x80				\n\t"
		"	add    r11, #0x80				\n\t"
		"	strb   r11, [r0, #7]			\n\t"
		"	strb   r7, [r0, #6]				\n\t"
		"	strb   r10, [r0, #5]			\n\t"
		"	strb   r6, [r0, #4]				\n\t"
		"	strb   r9, [r0, #3]				\n\t"
		"	strb   r5, [r0, #2]				\n\t"
		"	strb   r8, [r0, #1]				\n\t"
		"	strb   r4, [r0], #8				\n\t"
		"	b 1b							\n\t"
		"1:									\n\t"
		"	lsls   r2, #31					\n\t"
		"	bcc    1f						\n\t"
		"	ldmia  r1!, {r4}				\n\t"
		"	smultb r7, r4, r12				\n\t"
		"	smulbb r6, r4, r12				\n\t"
		"	smultb r5, r4, r3				\n\t"
		"	smulbb r4, r4, r3				\n\t"
		"	ssat   r7, #8, r7, asr #18		\n\t"
		"	ssat   r6, #8, r6, asr #18		\n\t"
		"	ssat   r5, #8, r5, asr #18		\n\t"
		"	ssat   r4, #8, r4, asr #18		\n\t"
		"	itttt  cs						\n\t"	//use shorter instrs - worth it
		"	addcs  r4, #0x80				\n\t"
		"	addcs  r5, #0x80				\n\t"
		"	addcs  r6, #0x80				\n\t"
		"	addcs  r7, #0x80				\n\t"
		"	strb   r7, [r0, #3]				\n\t"
		"	strb   r5, [r0, #2]				\n\t"
		"	strb   r6, [r0, #1]				\n\t"
		"	strb   r4, [r0], #4				\n\t"
		"1:									\n\t"
		"	bpl    1f						\n\t"
		"	ldrsh  r4, [r1]					\n\t"
		"	muls   r4, r3					\n\t"
		"	mul    r5, r12					\n\t"
		"	ssat   r4, #8, r4, asr #18		\n\t"
		"	ssat   r5, #8, r5, asr #18		\n\t"
		"	adds   r4, #0x80				\n\t"
		"	adds   r5, #0x80				\n\t"
		"	strb   r5, [r0, #1]				\n\t"
		"	strb   r4, [r0]  				\n\t"
		"1:									\n\t"
		"	pop    {r4-r11}					\n\t"
		"	bx     lr						\n\t"
		:::"memory","cc"
	);
}

static void __attribute__((naked)) audioMicPrvConvertSamplesS8mono(uint8_t *dst, const int16_t *src, uint32_t nSamp, uint32_t vol)
{
	//this requires v7E-M
	asm volatile(
		"	push   {r4-r7}					\n\t"
		"1:									\n\t"
		"	subs   r2, #4					\n\t"
		"	bmi	   1f						\n\t"
		"	ldmia  r1!, {r4, r5}			\n\t"
		"	smultb r7, r5, r3				\n\t"
		"	smulbb r6, r5, r3				\n\t"
		"	ssat   r7, #8, r7, asr #18		\n\t"
		"	ssat   r6, #8, r6, asr #18		\n\t"
		"	smultb r5, r4, r3				\n\t"
		"	smulbb r4, r4, r3				\n\t"
		"	ssat   r5, #8, r5, asr #18		\n\t"
		"	ssat   r4, #8, r4, asr #18		\n\t"
		"	strb   r7, [r0, #3]				\n\t"
		"	strb   r6, [r0, #2]				\n\t"
		"	strb   r5, [r0, #1]				\n\t"
		"	strb   r4, [r0], #4				\n\t"
		"	b 1b							\n\t"
		"1:									\n\t"
		"	lsls   r2, #31					\n\t"
		"	bcc    1f						\n\t"
		"	ldmia  r1!, {r4}				\n\t"
		"	smultb r5, r4, r3				\n\t"
		"	smulbb r4, r4, r3				\n\t"
		"	ssat   r5, #8, r5, asr #18		\n\t"
		"	ssat   r4, #8, r4, asr #18		\n\t"
		"	strb   r5, [r0, #1]				\n\t"
		"	strb   r4, [r0], #2				\n\t"
		"1:									\n\t"
		"	bpl    1f						\n\t"
		"	ldrsh  r4, [r1]					\n\t"
		"	muls   r4, r3					\n\t"
		"	ssat   r4, #8, r4, asr #18		\n\t"
		"	strb   r4, [r0]  				\n\t"
		"1:									\n\t"
		"	pop    {r4-r7}					\n\t"
		"	bx     lr						\n\t"
		:::"memory","cc"
	);
}

static void __attribute__((naked)) audioMicPrvConvertSamplesS8stereo(uint8_t *dst, const int16_t *src, uint32_t nSamp, uint32_t volL, uint32_t volR)
{
	//this requires v7E-M
	asm volatile(
		"	ldr    r12, [sp]				\n\t"
		"	push   {r4-r11}					\n\t"
		"1:									\n\t"
		"	subs   r2, #4					\n\t"
		"	bmi	   1f						\n\t"
		"	ldmia  r1!, {r4, r5}			\n\t"
		"	smultb r11, r5, r12				\n\t"
		"	smulbb r10, r5, r12				\n\t"
		"	smultb r7, r5, r3				\n\t"
		"	smulbb r6, r5, r3				\n\t"
		"	ssat   r11, #8, r11, asr #18	\n\t"
		"	ssat   r10, #8, r10, asr #18	\n\t"
		"	ssat   r7, #8, r7, asr #18		\n\t"
		"	ssat   r6, #8, r6, asr #18		\n\t"
		"	smultb r9, r4, r12				\n\t"
		"	smulbb r8, r4, r12				\n\t"
		"	smultb r5, r4, r3				\n\t"
		"	smulbb r4, r4, r3				\n\t"
		"	ssat   r9, #8, r9, asr #18		\n\t"
		"	ssat   r8, #8, r8, asr #18		\n\t"
		"	ssat   r5, #8, r5, asr #18		\n\t"
		"	ssat   r4, #8, r4, asr #18		\n\t"
		"	strb   r11, [r0, #7]			\n\t"
		"	strb   r7, [r0, #6]				\n\t"
		"	strb   r10, [r0, #5]			\n\t"
		"	strb   r6, [r0, #4]				\n\t"
		"	strb   r9, [r0, #3]				\n\t"
		"	strb   r5, [r0, #2]				\n\t"
		"	strb   r8, [r0, #1]				\n\t"
		"	strb   r4, [r0], #8				\n\t"
		"	b 1b							\n\t"
		"1:									\n\t"
		"	lsls   r2, #31					\n\t"
		"	bcc    1f						\n\t"
		"	ldmia  r1!, {r4}				\n\t"
		"	smultb r7, r4, r12				\n\t"
		"	smulbb r6, r4, r12				\n\t"
		"	smultb r5, r4, r3				\n\t"
		"	smulbb r4, r4, r3				\n\t"
		"	ssat   r7, #8, r7, asr #18		\n\t"
		"	ssat   r6, #8, r6, asr #18		\n\t"
		"	ssat   r5, #8, r5, asr #18		\n\t"
		"	ssat   r4, #8, r4, asr #18		\n\t"
		"	strb   r7, [r0, #3]				\n\t"
		"	strb   r5, [r0, #2]				\n\t"
		"	strb   r6, [r0, #1]				\n\t"
		"	strb   r4, [r0], #4				\n\t"
		"1:									\n\t"
		"	bpl    1f						\n\t"
		"	ldrsh  r4, [r1]					\n\t"
		"	muls   r4, r3					\n\t"
		"	mul    r5, r12					\n\t"
		"	ssat   r4, #8, r4, asr #18		\n\t"
		"	ssat   r5, #8, r5, asr #18		\n\t"
		"	strb   r5, [r0, #1]				\n\t"
		"	strb   r4, [r0]  				\n\t"
		"1:									\n\t"
		"	pop    {r4-r11}					\n\t"
		"	bx     lr						\n\t"
		:::"memory","cc"
	);
}

static void audioMicPrvConvertSamplesS16LEmono(int16_t *dst, const int16_t *src, uint32_t nSamp, uint32_t vol)
{
	while (nSamp--) {
		
		*dst++ = (((int32_t)*src++) * vol) >> 10;
	}
}




//sampTyp and chCfg guaranteed to be checked for correctness. remember that unity volume is 1024
void audioMicPrvConvertSamples(void *dst, const int16_t *src, uint32_t nSamp, enum AudioSampleType sampTyp, enum AudioChannelConfig chCfg, uint32_t volL, uint32_t volR)
{
	switch (sampTyp) {
		case AudioSampleU8:
			if (chCfg == AudioMono)
				audioMicPrvConvertSamplesU8mono(dst, src, nSamp, (volL + volR) / 2);
			else
				audioMicPrvConvertSamplesU8stereo(dst, src, nSamp, volL, volR);
			break;
		case AudioSampleS8:
			if (chCfg == AudioMono)
				audioMicPrvConvertSamplesS8mono(dst, src, nSamp, (volL + volR) / 2);
			else
				audioMicPrvConvertSamplesS8stereo(dst, src, nSamp, volL, volR);
			break;
	
		case AudioSampleU16LE:
			if (chCfg == AudioMono)
				audioMicPrvConvertSamplesU16LEmono(dst, src, nSamp, (volL + volR) / 2);
			else
				audioMicPrvConvertSamplesU16LEstereo(dst, src, nSamp, volL, volR);
			break;
		
		case AudioSampleU16BE:
			if (chCfg == AudioMono)
				audioMicPrvConvertSamplesU16BEmono(dst, src, nSamp, (volL + volR) / 2);
			else
				audioMicPrvConvertSamplesU16BEstereo(dst, src, nSamp, volL, volR);
			break;
	
		case AudioSampleS16LE:
			if (chCfg == AudioMono)
				audioMicPrvConvertSamplesS16LEmono(dst, src, nSamp, (volL + volR) / 2);
			else
				audioMicPrvConvertSamplesS16LEstereo(dst, src, nSamp, volL, volR);
			break;
		
		case AudioSampleS16BE:
			if (chCfg == AudioMono)
				audioMicPrvConvertSamplesS16BEmono(dst, src, nSamp, (volL + volR) / 2);
			else
				audioMicPrvConvertSamplesS16BEstereo(dst, src, nSamp, volL, volR);
			break;
		
		case AudioSampleU32LE:
			if (chCfg == AudioMono)
				audioMicPrvConvertSamplesU32LEmono(dst, src, nSamp, (volL + volR) / 2);
			else
				audioMicPrvConvertSamplesU32LEstereo(dst, src, nSamp, volL, volR);
			break;
		
		case AudioSampleU32BE:
			if (chCfg == AudioMono)
				audioMicPrvConvertSamplesU32BEmono(dst, src, nSamp, (volL + volR) / 2);
			else
				audioMicPrvConvertSamplesU32BEstereo(dst, src, nSamp, volL, volR);
			break;
		
		case AudioSampleS32LE:
			if (chCfg == AudioMono)
				audioMicPrvConvertSamplesS32LEmono(dst, src, nSamp, (volL + volR) / 2);
			else
				audioMicPrvConvertSamplesS32LEstereo(dst, src, nSamp, volL, volR);
			break;
		
		case AudioSampleS32BE:
			if (chCfg == AudioMono)
				audioMicPrvConvertSamplesS32BEmono(dst, src, nSamp, (volL + volR) / 2);
			else
				audioMicPrvConvertSamplesS32BEstereo(dst, src, nSamp, volL, volR);
			break;
		
		case AudioSampleFloatLE:
			if (MIXER_ALLOW_VFP_USE) {
				if (chCfg == AudioMono)
					audioMicPrvConvertSamplesHFLEmono(dst, src, nSamp, (volL + volR) / 2);
				else
					audioMicPrvConvertSamplesHFLEstereo(dst, src, nSamp, volL, volR);
			}
			else {
				if (chCfg == AudioMono)
					audioMicPrvConvertSamplesSFLEmono(dst, src, nSamp, (volL + volR) / 2);
				else
					audioMicPrvConvertSamplesSFLEstereo(dst, src, nSamp, volL, volR);
			}
			break;
			
		case AudioSampleFloatBE:
			if (MIXER_ALLOW_VFP_USE) {
				if (chCfg == AudioMono)
					audioMicPrvConvertSamplesHFBEmono(dst, src, nSamp, (volL + volR) / 2);
				else
					audioMicPrvConvertSamplesHFBEstereo(dst, src, nSamp, volL, volR);
			}
			else {
				if (chCfg == AudioMono)
					audioMicPrvConvertSamplesSFBEmono(dst, src, nSamp, (volL + volR) / 2);
				else
					audioMicPrvConvertSamplesSFBEstereo(dst, src, nSamp, volL, volR);
			}
			break;
	
		
		default:;
	}
}



*/