#define _ISLOCAL_	static		//allow us same func names as exported funcs have
#include <string.h>
#include "palmcardProto.h"
#include "palmcardComms.h"
#include "ral_export.h"
#include "palmcard.h"
#include "memmap.h"
#include "printf.h"
#include "pinout.h"
#include "heap.h"
#include "kal.h"
#include "ral.h"
#include "cpu.h"

#define SIDE_SET_HAS_ENABLE_BIT 	0
#define SIDE_SET_NUM_BITS			1
#define DEFINE_PIO_INSTRS
#include "pioAsm.h"
#undef DEFINE_PIO_INSTRS


//XXX: we DMA from RAM here, which might be storage ram via XIP. this might happen while we get context switched out and someone tries to write to ROMRAM which would cause us to turn XIP off and break DMA
//the solution is to disable preemption while dma is on...



#define NUM_INSTRS_WE_NEED				11
#define NUM_SMS_WE_NEED					1
#define NUM_DMAS_WE_NEED				2


static uint8_t mMySm, mMyFirstDmaChannel, mMyStartPc;
static RepalmSdioInsertNotifCbk mInsertCbk;


static uint8_t sdPrvCrcAccount(uint8_t curCrc, uint8_t byte)
{
	static const uint8_t crcTab7[] = {		//generated from the iterative func :)
		0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
		0x19, 0x10, 0x0b, 0x02, 0x3d, 0x34, 0x2f, 0x26, 0x51, 0x58, 0x43, 0x4a, 0x75, 0x7c, 0x67, 0x6e,
		0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x04, 0x0d, 0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45,
		0x2b, 0x22, 0x39, 0x30, 0x0f, 0x06, 0x1d, 0x14, 0x63, 0x6a, 0x71, 0x78, 0x47, 0x4e, 0x55, 0x5c,
		0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b, 0x2c, 0x25, 0x3e, 0x37, 0x08, 0x01, 0x1a, 0x13,
		0x7d, 0x74, 0x6f, 0x66, 0x59, 0x50, 0x4b, 0x42, 0x35, 0x3c, 0x27, 0x2e, 0x11, 0x18, 0x03, 0x0a,
		0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69, 0x1e, 0x17, 0x0c, 0x05, 0x3a, 0x33, 0x28, 0x21,
		0x4f, 0x46, 0x5d, 0x54, 0x6b, 0x62, 0x79, 0x70, 0x07, 0x0e, 0x15, 0x1c, 0x23, 0x2a, 0x31, 0x38,
		0x41, 0x48, 0x53, 0x5a, 0x65, 0x6c, 0x77, 0x7e, 0x09, 0x00, 0x1b, 0x12, 0x2d, 0x24, 0x3f, 0x36,
		0x58, 0x51, 0x4a, 0x43, 0x7c, 0x75, 0x6e, 0x67, 0x10, 0x19, 0x02, 0x0b, 0x34, 0x3d, 0x26, 0x2f,
		0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04,
		0x6a, 0x63, 0x78, 0x71, 0x4e, 0x47, 0x5c, 0x55, 0x22, 0x2b, 0x30, 0x39, 0x06, 0x0f, 0x14, 0x1d,
		0x25, 0x2c, 0x37, 0x3e, 0x01, 0x08, 0x13, 0x1a, 0x6d, 0x64, 0x7f, 0x76, 0x49, 0x40, 0x5b, 0x52,
		0x3c, 0x35, 0x2e, 0x27, 0x18, 0x11, 0x0a, 0x03, 0x74, 0x7d, 0x66, 0x6f, 0x50, 0x59, 0x42, 0x4b,
		0x17, 0x1e, 0x05, 0x0c, 0x33, 0x3a, 0x21, 0x28, 0x5f, 0x56, 0x4d, 0x44, 0x7b, 0x72, 0x69, 0x60,
		0x0e, 0x07, 0x1c, 0x15, 0x2a, 0x23, 0x38, 0x31, 0x46, 0x4f, 0x54, 0x5d, 0x62, 0x6b, 0x70, 0x79,
	};

	
	return crcTab7[curCrc * 2 ^ byte];
}

static uint32_t repalmSdioSetSpeed(uint32_t newMaxRate)		//return new rate
{
	uint32_t realRate, div, base = CPU_CLOCK_RATE / 4;		//each bit takes 2 of our ticks, so our base clock is 1/2 of sysclock. we also avoid noninteger divisions as that makes out clock assymetric
	
	
	if (newMaxRate > 15000000)		//set a sane limit for our board (15MHz)
		newMaxRate = 15000000;
	
	div = (base + newMaxRate - 1) / newMaxRate;
		
	pio1_hw->sm[mMySm].clkdiv = (pio1_hw->sm[mMySm].clkdiv &~ (PIO_SM0_CLKDIV_FRAC_BITS | PIO_SM0_CLKDIV_INT_BITS)) | (div << PIO_SM0_CLKDIV_INT_LSB);
	realRate = base / div;
	
	logi("SDIO: settled on rate %u\n", realRate);
	
	return realRate;
}

static void palmcardSdioPrvPinsSetup(void)
{
	//set clk as output for pio
	pio1_hw->sm[mMySm].pinctrl = (1 << PIO_SM1_PINCTRL_SET_COUNT_LSB) | (PIN_SD_CLK << PIO_SM1_PINCTRL_SET_BASE_LSB);
	pio1_hw->sm[mMySm].instr = I_SET(0, 0, SET_DST_PINDIRS, 1);		//CLK is out

	//set DAT as input for pio
	pio1_hw->sm[mMySm].pinctrl = (1 << PIO_SM1_PINCTRL_SET_COUNT_LSB) | (PIN_SD_DAT0 << PIO_SM1_PINCTRL_SET_BASE_LSB);
	pio1_hw->sm[mMySm].instr = I_SET(0, 0, SET_DST_PINDIRS, 0);		//DAT0 is in
	
	pio1_hw->input_sync_bypass |= (1 << PIN_SD_DAT0) | (1 << PIN_SD_CMD);
}

static enum SdWriteRet repalmSdioPrvDataTxCalcReply(uint_fast8_t retByteAligned)
{
	switch (retByteAligned & 0x1f) {
		case 0x05:
			//logi("data accepted\n");
			return SdWriteOK;
		
		case 0x0b:
			logi("crc error on data\n");
			return SdWriteCrcError;
		
		case 0x0d:
			logi("write error on data\n");
			return SdWriteCardError;
		
		default:	//framing error
			logi("unknown reply: %08lx\n", retByteAligned);
			while(1);
			return SdWriteFramingError;
	}
}

static enum SdWriteRet repalmSdioPrvDataTxOneBlock(const uint8_t* src, uint32_t blockSz)
{
	uint_fast8_t pc = mMyStartPc, startPC, endPC, jumpOnPinHighLoc, jmpDst, warpToPC, warpFromPC;
	uint8_t frame[3];
	uint32_t reply;
	bool dmaDone;
	
	//stop SM
	pio1_hw->ctrl &=~ ((0x01 << PIO_CTRL_SM_ENABLE_LSB) << mMySm);
	while (pio1_hw->ctrl & ((0x01 << PIO_CTRL_SM_ENABLE_LSB) << mMySm));
	pio1_hw->ctrl |= ((0x01 << PIO_CTRL_SM_RESTART_LSB) << mMySm);
	
	
	//page57 of the original "SD Card physical layer spec" goes into more detail about this than any other document
	//after the last CRC bit, the bus shall have EZSstaE____, where E = end bit = 1, Z = hiZ, S = start bit = 0, and "_" is the busy "low" state
	//we send the "E" bit as our end bit already, so then we need to read 7 bits. High speed signalling might differ on this...
	
	//start the DMA process while we do other things so that it can prepare the CRC for us
	dma_hw->sniff_ctrl = (dma_hw->sniff_ctrl &~ (DMA_SNIFF_CTRL_OUT_INV_BITS | DMA_SNIFF_CTRL_OUT_REV_BITS | DMA_SNIFF_CTRL_BSWAP_BITS | DMA_SNIFF_CTRL_CALC_BITS | DMA_SNIFF_CTRL_DMACH_BITS)) | (DMA_SNIFF_CTRL_CALC_VALUE_CRC16 << DMA_SNIFF_CTRL_CALC_LSB) | (((uint32_t)mMyFirstDmaChannel + 0) << DMA_SNIFF_CTRL_DMACH_LSB) | DMA_SNIFF_CTRL_EN_BITS;
	dma_hw->sniff_data = 0;
	
	dma_hw->ch[mMyFirstDmaChannel + 0].read_addr = (uintptr_t)src;
	dma_hw->ch[mMyFirstDmaChannel + 0].write_addr = (uintptr_t)&frame;
	dma_hw->ch[mMyFirstDmaChannel + 0].transfer_count = blockSz;
	
	(void)KALTaskSwitching(false);
	
	dma_hw->ch[mMyFirstDmaChannel + 0].ctrl_trig = (0x3f << DMA_CH0_CTRL_TRIG_TREQ_SEL_LSB) | (((uint32_t)mMyFirstDmaChannel + 0) << DMA_CH0_CTRL_TRIG_CHAIN_TO_LSB) | (DMA_CH0_CTRL_TRIG_DATA_SIZE_VALUE_SIZE_BYTE << DMA_CH0_CTRL_TRIG_DATA_SIZE_LSB) |  DMA_CH0_CTRL_TRIG_SNIFF_EN_BITS | DMA_CH0_CTRL_TRIG_EN_BITS | DMA_CH0_CTRL_TRIG_INCR_READ_BITS;
	
	startPC = pc;
	jmpDst = pc;
	pio1_hw->instr_mem[pc++] = I_OUT(0, 0, OUT_DST_PINS, 1);
	pio1_hw->instr_mem[pc++] = I_NOP(1, 1);
	pio1_hw->instr_mem[pc++] = I_JMP(0, 0, JMP_X_POSTDEC, startPC);
		
	//clk is low currently
	
	//now get reply (8 bits precisely)
	pio1_hw->instr_mem[pc++] = I_SET(0, 0, SET_DST_PINDIRS, 0);				//DAT0 is in
	pio1_hw->instr_mem[pc++] = I_NOP(1, 1);	
	
	jmpDst = pc;
	pio1_hw->instr_mem[pc++] = I_IN(1, 0, IN_SRC_PINS, 1);
	pio1_hw->instr_mem[pc++] = I_JMP(1, 1, JMP_Y_POSTDEC, jmpDst);
	
	endPC = pc;
	pio1_hw->instr_mem[pc++] = I_JMP(0, 0, JMP_ALWAYS, endPC);
	
	//config the SM
	pio1_hw->sm[mMySm].execctrl = (pio1_hw->sm[mMySm].execctrl &~ (PIO_SM0_EXECCTRL_WRAP_BOTTOM_BITS | PIO_SM2_EXECCTRL_SIDE_EN_BITS | PIO_SM0_EXECCTRL_JMP_PIN_BITS)) | (SIDE_SET_HAS_ENABLE_BIT ? PIO_SM2_EXECCTRL_SIDE_EN_BITS : 0) | PIO_SM0_EXECCTRL_WRAP_TOP_BITS;
	pio1_hw->sm[mMySm].shiftctrl = (pio1_hw->sm[mMySm].shiftctrl &~ (PIO_SM0_SHIFTCTRL_FJOIN_RX_BITS | PIO_SM0_SHIFTCTRL_FJOIN_TX_BITS | PIO_SM0_SHIFTCTRL_PULL_THRESH_BITS | PIO_SM0_SHIFTCTRL_PUSH_THRESH_BITS | PIO_SM0_SHIFTCTRL_OUT_SHIFTDIR_BITS | PIO_SM0_SHIFTCTRL_IN_SHIFTDIR_BITS | PIO_SM0_SHIFTCTRL_AUTOPULL_BITS | PIO_SM0_SHIFTCTRL_AUTOPUSH_BITS));
	pio1_hw->sm[mMySm].pinctrl = (SIDE_SET_BITS_USED << PIO_SM1_PINCTRL_SIDESET_COUNT_LSB) | (1 << PIO_SM1_PINCTRL_OUT_COUNT_LSB) | (PIN_SD_DAT0 << PIO_SM1_PINCTRL_OUT_BASE_LSB) | (1 << PIO_SM1_PINCTRL_SET_COUNT_LSB) | (PIN_SD_DAT0 << PIO_SM1_PINCTRL_SET_BASE_LSB) | (PIN_SD_DAT0 << PIO_SM0_PINCTRL_IN_BASE_LSB) | (PIN_SD_CLK << PIO_SM1_PINCTRL_SIDESET_BASE_LSB);
	
	//prepare state
	pio1_hw->txf[mMySm] = (blockSz + 3 /* crc and start framing */) * 8 + 1 /* end framing */ - 1;
	
	//logi("data fifo info A: %08xh\n", pio1_hw->flevel);
	
	pio1_hw->sm[mMySm].instr = I_SET(0, 0, SET_DST_PINDIRS, 1);		//DAT0 is out
	pio1_hw->sm[mMySm].instr = I_PULL(0, 0, 0, 0);
	pio1_hw->sm[mMySm].instr = I_OUT(0, 0, OUT_DST_X, 32);
	pio1_hw->sm[mMySm].instr = I_SET(0, 0, SET_DST_Y, 6);
	
	
	//logi("data fifo info C: %08xh\n", pio1_hw->flevel);
	
	frame[2] = 0x80;
	
	//wait for checksum
	while(dma_hw->ch[mMyFirstDmaChannel].al1_ctrl & DMA_CH0_CTRL_TRIG_BUSY_BITS);
	dma_hw->ch[mMyFirstDmaChannel].al1_ctrl = 0;
	(void)KALTaskSwitching(true);
	
	//logi("checksum: %04x\n", dma_hw->sniff_data);
	
	frame[0] = dma_hw->sniff_data >> 8;
	frame[1] = dma_hw->sniff_data;
	
	pio1_hw->sm[mMySm].shiftctrl |= PIO_SM0_SHIFTCTRL_AUTOPULL_BITS | PIO_SM0_SHIFTCTRL_AUTOPUSH_BITS | (8 << PIO_SM0_SHIFTCTRL_PULL_THRESH_LSB) | (7 << PIO_SM0_SHIFTCTRL_PUSH_THRESH_LSB);
	
	//pre-feed our start token
	//logi("data fifo info q: %08xh\n", pio1_hw->flevel);
	*(volatile uint8_t*)&pio1_hw->txf[mMySm] = 0xfe;
	
	//logi("data fifo info w: %08xh\n", pio1_hw->flevel);

	dma_hw->ch[mMyFirstDmaChannel + 1].read_addr = (uintptr_t)&frame;
	dma_hw->ch[mMyFirstDmaChannel + 1].write_addr = (uintptr_t)&pio1_hw->txf[mMySm];
	dma_hw->ch[mMyFirstDmaChannel + 1].transfer_count = 3;
	dma_hw->ch[mMyFirstDmaChannel + 1].al1_ctrl = ((DREQ_PIO1_TX0 + mMySm) << DMA_CH0_CTRL_TRIG_TREQ_SEL_LSB) | (((uint32_t)mMyFirstDmaChannel + 1) << DMA_CH0_CTRL_TRIG_CHAIN_TO_LSB) | (DMA_CH0_CTRL_TRIG_DATA_SIZE_VALUE_SIZE_BYTE << DMA_CH0_CTRL_TRIG_DATA_SIZE_LSB) | DMA_CH0_CTRL_TRIG_INCR_READ_BITS | DMA_CH0_CTRL_TRIG_EN_BITS;
	
	dma_hw->ch[mMyFirstDmaChannel + 0].read_addr = (uintptr_t)src;
	dma_hw->ch[mMyFirstDmaChannel + 0].write_addr = (uintptr_t)&pio1_hw->txf[mMySm];
	dma_hw->ch[mMyFirstDmaChannel + 0].transfer_count = blockSz;
	
	(void)KALTaskSwitching(false);
	
	dma_hw->ch[mMyFirstDmaChannel + 0].ctrl_trig = ((DREQ_PIO1_TX0 + mMySm) << DMA_CH0_CTRL_TRIG_TREQ_SEL_LSB) | (((uint32_t)mMyFirstDmaChannel + 1) << DMA_CH0_CTRL_TRIG_CHAIN_TO_LSB) | (DMA_CH0_CTRL_TRIG_DATA_SIZE_VALUE_SIZE_BYTE << DMA_CH0_CTRL_TRIG_DATA_SIZE_LSB) | DMA_CH0_CTRL_TRIG_INCR_READ_BITS | DMA_CH0_CTRL_TRIG_EN_BITS;
	
	//logi("data fifo info E: %08xh\n", pio1_hw->flevel);
	//logi("cur xfer count %u\n", dma_hw->ch[mMyFirstDmaChannel + 0].transfer_count);
	
	//HELP US DEBUG IT
//	sio_hw->gpio_clr = 1 << PIN_SD_DAT3;
//	asm volatile("dsb sy\ndsb sy\ndsb sy\ndsb sy\ndsb sy\n");
//	sio_hw->gpio_set = 1 << PIN_SD_DAT3;
	
	//start
	pio1_hw->sm[mMySm].instr = I_JMP(0, 0, JMP_ALWAYS, startPC);	//start at the start
	pio1_hw->ctrl |= ((0x01 << PIO_CTRL_SM_ENABLE_LSB) << mMySm);
	
	//logi("wait, data fifo info F: %08xh\n", pio1_hw->flevel);
	
	//wait
	while (pio1_hw->sm[mMySm].addr != endPC);
	
	//logi("***data fifo info G: %08xh\n", pio1_hw->flevel);
	reply = pio1_hw->rxf[mMySm];
	//logi("reply = 0x%02x\n", reply);
	
	//logi("data fifo info G: %08xh\n", pio1_hw->flevel);
	//logi("done, dma transfer_counts = {%u %u}\n", dma_hw->ch[mMyFirstDmaChannel + 0].transfer_count, dma_hw->ch[mMyFirstDmaChannel + 1].transfer_count);
	
	dmaDone = !dma_hw->ch[mMyFirstDmaChannel + 0].transfer_count && !dma_hw->ch[mMyFirstDmaChannel + 1].transfer_count;

	dma_hw->abort = 3 << mMyFirstDmaChannel;
	while (dma_hw->abort & (3 << mMyFirstDmaChannel));
	while (dma_hw->ch[mMyFirstDmaChannel + 0].al1_ctrl & DMA_CH0_CTRL_TRIG_BUSY_BITS);
	while (dma_hw->ch[mMyFirstDmaChannel + 1].al1_ctrl & DMA_CH0_CTRL_TRIG_BUSY_BITS);
	dma_hw->ch[mMyFirstDmaChannel + 0].al1_ctrl = 0;
	dma_hw->ch[mMyFirstDmaChannel + 1].al1_ctrl = 0;

	(void)KALTaskSwitching(true);

	if (!dmaDone)
		return SdWriteDataTxTimeout;
	
	//some card send reply with wrong edges. Live with this
	if ((reply & 0x11) == 0x01)			//not shifted
		return repalmSdioPrvDataTxCalcReply(reply);
	else if ((reply & 0x22) == 0x02)	//shifted right 1
		return repalmSdioPrvDataTxCalcReply(reply >> 1);
	else {
		logi("unknown reply alignment: %08lx\n", reply);
		while(1);
		return SdWriteFramingError;
	}
}


static enum SdWriteRet repalmSdioDataTx(const uint8_t* src, uint32_t nBlocks, uint32_t blockSz, uint32_t maxClocksPerBlock)
{
	while (nBlocks--) {
		
		enum SdWriteRet writeRet = repalmSdioPrvDataTxOneBlock(src, blockSz);
		
		if (!repalmSdioBusyWait(maxClocksPerBlock))
			return SdWriteBusyTimeout;
		
		if (writeRet != SdWriteOK)
			return writeRet;
		
		src += blockSz;
	}
	
	return SdWriteOK;
}

static enum SdReadRet repalmSdioPrvDataRxOneBlock(uint8_t* dst, const uint32_t blockSz, uint32_t maxClocks)
{
	uint_fast8_t pc = mMyStartPc, startPC, endPC, jumpOnPinHighLoc, jmpDst, warpToPC, warpFromPC;
	uint8_t frame[3], *dstLocal = dst;
	bool dmaDone;

	if (((uintptr_t)dst) >= ROMRAM_BASE && (((uintptr_t)dst) - ROMRAM_BASE) < ROMRAM_SIZE) {
		
		static uint8_t *dstTemp = NULL;
		static uint32_t dstSz = 0;
		
		if (dstSz < blockSz) {
			
			dstSz = 0;
			if (dstTemp)
				kheapFree(dstTemp);
			dstTemp = NULL;
		}
		
		if (!dstTemp) {
			
			dstTemp = kheapAllocEx(blockSz, MEM_NO_OS_HEAP);
			if (!dstTemp)
				fatal("SDIO: cannot alloc tmp buf %u\n", blockSz);
			
			dstSz = blockSz;
		}
		
		dstLocal = dstTemp;
	}
	
	//stop SM3
	pio1_hw->ctrl &=~ ((0x01 << PIO_CTRL_SM_ENABLE_LSB) << mMySm);
	while (pio1_hw->ctrl & ((0x01 << PIO_CTRL_SM_ENABLE_LSB) << mMySm));
	pio1_hw->ctrl |= ((0x01 << PIO_CTRL_SM_RESTART_LSB) << mMySm);
	
	//in start X is now cycles to wait for data, Y is no bits of DATA to rx

	startPC = pc;
	jumpOnPinHighLoc = pc++;	//we'll insert a jump here to be taken if data pin is high
	
	//we get here if data is low and thus we are ready to RX. clock is low but still zero start bit seen - skip it
	pio1_hw->instr_mem[pc] = I_JMP(0, 0, JMP_ALWAYS, pc + 2);
	pc++;
	
	jmpDst = pc;
	pio1_hw->instr_mem[pc++] = I_IN(1, 0, IN_SRC_PINS, 1);
	pio1_hw->instr_mem[pc++] = I_JMP(1, 1, JMP_Y_POSTDEC, jmpDst);
	
	//we get here when we've RXed all data, and the last high framing bit, but we can only output bytes, so grab 7 more bits of zeroes. clock is low now
	pio1_hw->instr_mem[pc++] = I_IN(0, 0, IN_SRC_ZEROES, 7);
	
	//this is our terminal location
	endPC = pc;
	warpToPC = pc;
	pio1_hw->instr_mem[pc++] = I_JMP(0, 0, JMP_ALWAYS, endPC);
	
	//this is the loop wrapper for the initial search loop. the first instr will jump here if the data pin is higb
	pio1_hw->instr_mem[jumpOnPinHighLoc] = I_JMP(1, 0, JMP_PIN, pc);
	pio1_hw->instr_mem[pc++] = I_JMP(1, 1, JMP_X_POSTDEC, jumpOnPinHighLoc);
	warpFromPC = pc - 1;
	
	//wraparound takes us to end from here
	
	pio1_hw->sm[mMySm].execctrl = (pio1_hw->sm[mMySm].execctrl &~ (PIO_SM0_EXECCTRL_WRAP_TOP_BITS | PIO_SM0_EXECCTRL_WRAP_BOTTOM_BITS | PIO_SM2_EXECCTRL_SIDE_EN_BITS | PIO_SM0_EXECCTRL_JMP_PIN_BITS)) | (warpFromPC << PIO_SM0_EXECCTRL_WRAP_TOP_LSB) | (warpToPC << PIO_SM0_EXECCTRL_WRAP_BOTTOM_LSB) | (SIDE_SET_HAS_ENABLE_BIT ? PIO_SM2_EXECCTRL_SIDE_EN_BITS : 0) | (PIN_SD_DAT0 << PIO_SM0_EXECCTRL_JMP_PIN_LSB);
	pio1_hw->sm[mMySm].shiftctrl = (pio1_hw->sm[mMySm].shiftctrl &~ (PIO_SM0_SHIFTCTRL_FJOIN_RX_BITS | PIO_SM0_SHIFTCTRL_FJOIN_TX_BITS | PIO_SM0_SHIFTCTRL_PULL_THRESH_BITS | PIO_SM0_SHIFTCTRL_PUSH_THRESH_BITS | PIO_SM0_SHIFTCTRL_OUT_SHIFTDIR_BITS | PIO_SM0_SHIFTCTRL_IN_SHIFTDIR_BITS | PIO_SM0_SHIFTCTRL_AUTOPULL_BITS)) | PIO_SM0_SHIFTCTRL_AUTOPUSH_BITS | (8 << PIO_SM0_SHIFTCTRL_PUSH_THRESH_LSB);
	pio1_hw->sm[mMySm].pinctrl = (SIDE_SET_BITS_USED << PIO_SM1_PINCTRL_SIDESET_COUNT_LSB) | (PIN_SD_DAT0 << PIO_SM0_PINCTRL_IN_BASE_LSB) | (PIN_SD_CLK << PIO_SM1_PINCTRL_SIDESET_BASE_LSB);
	
	//preset x and y
	pio1_hw->txf[mMySm] = maxClocks - 1;
	pio1_hw->txf[mMySm] = (blockSz + 2) * 8 + 1;
	pio1_hw->sm[mMySm].instr = I_PULL(0, 0, 0, 1);
	pio1_hw->sm[mMySm].instr = I_OUT(0, 0, OUT_DST_X, 32);
	pio1_hw->sm[mMySm].instr = I_PULL(0, 0, 0, 1);
	pio1_hw->sm[mMySm].instr = I_OUT(0, 0, OUT_DST_Y, 32);
	
	//set up DMA
	dma_hw->sniff_ctrl = (dma_hw->sniff_ctrl &~ (DMA_SNIFF_CTRL_OUT_INV_BITS | DMA_SNIFF_CTRL_OUT_REV_BITS | DMA_SNIFF_CTRL_BSWAP_BITS | DMA_SNIFF_CTRL_CALC_BITS | DMA_SNIFF_CTRL_DMACH_BITS)) | (DMA_SNIFF_CTRL_CALC_VALUE_CRC16 << DMA_SNIFF_CTRL_CALC_LSB) | (((uint32_t)mMyFirstDmaChannel + 0) << DMA_SNIFF_CTRL_DMACH_LSB) | DMA_SNIFF_CTRL_EN_BITS;
	dma_hw->sniff_data = 0;
	
	dma_hw->ch[mMyFirstDmaChannel + 1].read_addr = (uintptr_t)&pio1_hw->rxf[mMySm];
	dma_hw->ch[mMyFirstDmaChannel + 1].write_addr = (uintptr_t)frame;
	dma_hw->ch[mMyFirstDmaChannel + 1].transfer_count = 3;
	dma_hw->ch[mMyFirstDmaChannel + 1].al1_ctrl = ((DREQ_PIO1_RX0 + mMySm) << DMA_CH0_CTRL_TRIG_TREQ_SEL_LSB) | (((uint32_t)mMyFirstDmaChannel + 1) << DMA_CH0_CTRL_TRIG_CHAIN_TO_LSB) | (DMA_CH0_CTRL_TRIG_DATA_SIZE_VALUE_SIZE_BYTE << DMA_CH0_CTRL_TRIG_DATA_SIZE_LSB) | DMA_CH0_CTRL_TRIG_INCR_WRITE_BITS | DMA_CH0_CTRL_TRIG_EN_BITS;
	
	dma_hw->ch[mMyFirstDmaChannel + 0].read_addr = (uintptr_t)&pio1_hw->rxf[mMySm];
	dma_hw->ch[mMyFirstDmaChannel + 0].write_addr = (uintptr_t)dstLocal;
	dma_hw->ch[mMyFirstDmaChannel + 0].transfer_count = blockSz;
	
	(void)KALTaskSwitching(false);
	
	dma_hw->ch[mMyFirstDmaChannel + 0].ctrl_trig = ((DREQ_PIO1_RX0 + mMySm) << DMA_CH0_CTRL_TRIG_TREQ_SEL_LSB) | (((uint32_t)mMyFirstDmaChannel + 1) << DMA_CH0_CTRL_TRIG_CHAIN_TO_LSB) | (DMA_CH0_CTRL_TRIG_DATA_SIZE_VALUE_SIZE_BYTE << DMA_CH0_CTRL_TRIG_DATA_SIZE_LSB) | DMA_CH0_CTRL_TRIG_INCR_WRITE_BITS | DMA_CH0_CTRL_TRIG_SNIFF_EN_BITS | DMA_CH0_CTRL_TRIG_EN_BITS;
	
	//wait
	
	///HELP US DEBUG IT
//	sio_hw->gpio_clr = 1 << PIN_SD_DAT3;
//	asm volatile("dsb sy\ndsb sy\ndsb sy\ndsb sy\ndsb sy\n");
//	sio_hw->gpio_set = 1 << PIN_SD_DAT3;
	
	
	//start
	pio1_hw->sm[mMySm].instr = I_JMP(0, 0, JMP_ALWAYS, startPC);	//start at the start
	pio1_hw->ctrl |= ((0x01 << PIO_CTRL_SM_ENABLE_LSB) << mMySm);
	
	//logi("wait, data fifo info F: %08xh\n", pio1_hw->flevel);
	
	//wait
	while (pio1_hw->sm[mMySm].addr != endPC);
	
	dmaDone = !dma_hw->ch[mMyFirstDmaChannel + 0].transfer_count && !dma_hw->ch[mMyFirstDmaChannel + 1].transfer_count;
	dma_hw->abort = 3 << mMyFirstDmaChannel;
	while (dma_hw->abort & (3 << mMyFirstDmaChannel));
	while (dma_hw->ch[mMyFirstDmaChannel + 0].al1_ctrl & DMA_CH0_CTRL_TRIG_BUSY_BITS);
	while (dma_hw->ch[mMyFirstDmaChannel + 1].al1_ctrl & DMA_CH0_CTRL_TRIG_BUSY_BITS);
	dma_hw->ch[mMyFirstDmaChannel + 0].al1_ctrl = 0;
	dma_hw->ch[mMyFirstDmaChannel + 1].al1_ctrl = 0;
	
	(void)KALTaskSwitching(true);
	
	//logi("data fifo info G: %08xh\n", pio1_hw->flevel);
	//logi("done, dma transfer_counts = {%u %u}\n", dma_hw->ch[mMyFirstDmaChannel + 0].transfer_count, dma_hw->ch[mMyFirstDmaChannel + 1].transfer_count);
	//logi("FRAME: %02x %02x %02x\n", frame[0], frame[1], frame[2]);
	
	//logi("sniff data is 0x%08x\n", dma_hw->sniff_data);
	
	if (!dmaDone)
		return SdReadTimeout;
	
	if (dstLocal != dst)
		memcpy(dst, dstLocal, blockSz);
	
	if (frame[0] * 256u + frame[1] != dma_hw->sniff_data)
		return SdReadCrcErr;
	
	return SdReadOK;
}

static enum SdReadRet repalmSdioDataRx(uint8_t* dst, uint32_t nBlocks, uint32_t blockSz, uint32_t maxClocksPerBlock)
{
	while (nBlocks--) {
		
		enum SdReadRet ret = repalmSdioPrvDataRxOneBlock(dst, blockSz, maxClocksPerBlock);
		
		if (ret != SdReadOK)
			return ret;
		
		dst += blockSz;
	}
	
	return SdReadOK;
}

static bool repalmSdioBusyWait(uint32_t maxClocks)
{
	uint_fast8_t pc = mMyStartPc, startPC, endPC;

	//stop SM3
	pio1_hw->ctrl &=~ ((0x01 << PIO_CTRL_SM_ENABLE_LSB) << mMySm);
	while (pio1_hw->ctrl & ((0x01 << PIO_CTRL_SM_ENABLE_LSB) << mMySm));
	pio1_hw->ctrl |= ((0x01 << PIO_CTRL_SM_RESTART_LSB) << mMySm);
	
	startPC = pc;
	pio1_hw->instr_mem[pc] = I_JMP(1, 0, JMP_PIN, pc + 2);
	pc++;
	pio1_hw->instr_mem[pc] = I_JMP(1, 1, JMP_X_POSTDEC, pc - 1);
	pc++;
	endPC = pc;
	pio1_hw->instr_mem[pc] = I_JMP(0, 0, JMP_ALWAYS, pc);
	
	pio1_hw->sm[mMySm].execctrl = (pio1_hw->sm[mMySm].execctrl &~ (PIO_SM0_EXECCTRL_WRAP_BOTTOM_BITS | PIO_SM2_EXECCTRL_SIDE_EN_BITS | PIO_SM0_EXECCTRL_JMP_PIN_BITS)) | (SIDE_SET_HAS_ENABLE_BIT ? PIO_SM2_EXECCTRL_SIDE_EN_BITS : 0) | (PIN_SD_DAT0 << PIO_SM0_EXECCTRL_JMP_PIN_LSB) | PIO_SM0_EXECCTRL_WRAP_TOP_BITS;
	pio1_hw->sm[mMySm].shiftctrl = (pio1_hw->sm[mMySm].shiftctrl &~ (PIO_SM0_SHIFTCTRL_FJOIN_RX_BITS | PIO_SM0_SHIFTCTRL_FJOIN_TX_BITS | PIO_SM0_SHIFTCTRL_PULL_THRESH_BITS | PIO_SM0_SHIFTCTRL_PUSH_THRESH_BITS | PIO_SM0_SHIFTCTRL_OUT_SHIFTDIR_BITS | PIO_SM0_SHIFTCTRL_IN_SHIFTDIR_BITS | PIO_SM0_SHIFTCTRL_AUTOPULL_BITS | PIO_SM0_SHIFTCTRL_AUTOPUSH_BITS));
	pio1_hw->sm[mMySm].pinctrl = (SIDE_SET_BITS_USED << PIO_SM1_PINCTRL_SIDESET_COUNT_LSB) | (PIN_SD_CLK << PIO_SM1_PINCTRL_SIDESET_BASE_LSB);
	
	//prepare state
	pio1_hw->txf[mMySm] = maxClocks - 1;
	
	pio1_hw->sm[mMySm].instr = I_PULL(0, 0, 0, 1);
	pio1_hw->sm[mMySm].instr = I_OUT(0, 0, OUT_DST_X, 32);
	
	pio1_hw->sm[mMySm].instr = I_JMP(0, 0, JMP_ALWAYS, startPC);	//start at the start
	
	///HELP US DEBUG IT
//	sio_hw->gpio_clr = 1 << PIN_SD_DAT3;
//	asm volatile("dsb sy\ndsb sy\ndsb sy\ndsb sy\ndsb sy\n");
//	sio_hw->gpio_set = 1 << PIN_SD_DAT3;
	
	
	//start
	pio1_hw->ctrl |= ((0x01 << PIO_CTRL_SM_ENABLE_LSB) << mMySm);
	
	//logi("wait, data fifo info F: %08xh\n", pio1_hw->flevel);
	
	//wait
	while (pio1_hw->sm[mMySm].addr != endPC);
	
	return !!(sio_hw->gpio_in & (1 << PIN_SD_DAT0));
}

static bool palmcardSdioPrvCmdRspRaw(uint_fast8_t cmd, uint32_t param, uint8_t *replyOutP, uint_fast8_t nReplyBits)
{
	uint32_t word0 = 0xff400000 + (((uint32_t)cmd) << 16) + (param >> 16);
	uint32_t word1 = ((param << 16)) + (((uint32_t)sdPrvCrcAccount(sdPrvCrcAccount(sdPrvCrcAccount(sdPrvCrcAccount(sdPrvCrcAccount(0, 0x40 + cmd), param >> 24), param >> 16), param >> 8), param)) << 9) + 0x100 + (uint8_t)(nReplyBits - 2);
	uint_fast8_t pc = mMyStartPc, startPC, jmpToPC, warpToPC, warpFromPC, endPC;
	bool success;
	
	
	//logi("expecting %u bits\n", nReplyBits);
	
	//stop SM3
	pio1_hw->ctrl &=~ ((0x01 << PIO_CTRL_SM_ENABLE_LSB) << mMySm);
	while (pio1_hw->ctrl & ((0x01 << PIO_CTRL_SM_ENABLE_LSB) << mMySm));
	pio1_hw->ctrl |= ((0x01 << PIO_CTRL_SM_RESTART_LSB) << mMySm);
	
	//program SM3 to send command and RX reply (expects X preset to 47), Y to 63, CMD as output
	startPC = pc;
	//some cards capture on up, some on down. who knows...
	//to be extra careful, we change data, then do a full up down with no data changes
	//live on both sides of the edge - the grass is always green
	pio1_hw->instr_mem[pc++] = I_OUT(0, 0, OUT_DST_PINS, 1);
	pio1_hw->instr_mem[pc++] = I_NOP(1, 1);
	pio1_hw->instr_mem[pc++] = I_JMP(0, 0, JMP_X_POSTDEC, startPC);
	
	//clock is still high now
	
	if (!nReplyBits) {
		
		warpToPC = warpFromPC = endPC = pc;
		pio1_hw->instr_mem[pc] = I_JMP(0, 0, JMP_ALWAYS, pc);
	}
	else {
		
		pio1_hw->instr_mem[pc++] = I_SET(0, 0, SET_DST_PINDIRS, 0);
		
		pio1_hw->instr_mem[pc++] = I_OUT(0, 0, OUT_DST_X, 8);		//sort out how long the reply is
		
		
		pio1_hw->instr_mem[pc] = I_JMP(1, 1, JMP_Y_POSTDEC, pc + 2);
		pc++;
		endPC = pc;
		warpToPC = pc;
		pio1_hw->instr_mem[pc] = I_JMP(0, 0, JMP_ALWAYS, pc);
		pc++;
		pio1_hw->instr_mem[pc] = I_JMP(1, 0, JMP_PIN, pc - 2);
		pc++;
		
		//in the zero that we missed
		pio1_hw->instr_mem[pc++] = I_IN(1, 1, IN_SRC_ZEROES, 1);
		
		//we get here if we have data to RX, clock is high
		pio1_hw->instr_mem[pc++] = I_IN(1, 0, IN_SRC_PINS, 1);
		pio1_hw->instr_mem[pc] = I_JMP(1, 1, JMP_X_POSTDEC, pc - 1);
		pc++;
		
		//we get here when reply is done being RXed
		warpFromPC = pc - 1;
/*
	waitloop:
		if Y--, goto checkmore, hi, wait 1
	infloop:
		goto infloop, low
	checkmore:
		if hi goto waitloop, low, wait 1
	rx:
		in 1bit from NULL, hi, wait 1
	more_bits:
		in 1bit from PIN, low, wait 1
		jump if x-- more_bits, hi, wait 1
*/
	}
	
	//logi("last pc %u, endPC %u, wrap from %u to %u\n", pc, endPC, warpFromPC, warpToPC);
	
	//config the SM
	pio1_hw->sm[mMySm].execctrl = (pio1_hw->sm[mMySm].execctrl &~ (PIO_SM0_EXECCTRL_WRAP_TOP_BITS | PIO_SM0_EXECCTRL_WRAP_BOTTOM_BITS | PIO_SM2_EXECCTRL_SIDE_EN_BITS | PIO_SM0_EXECCTRL_JMP_PIN_BITS)) | (warpFromPC << PIO_SM0_EXECCTRL_WRAP_TOP_LSB) | (warpToPC << PIO_SM0_EXECCTRL_WRAP_BOTTOM_LSB) | (SIDE_SET_HAS_ENABLE_BIT ? PIO_SM2_EXECCTRL_SIDE_EN_BITS : 0) | (PIN_SD_CMD << PIO_SM0_EXECCTRL_JMP_PIN_LSB);
	pio1_hw->sm[mMySm].shiftctrl = (pio1_hw->sm[mMySm].shiftctrl &~ (PIO_SM0_SHIFTCTRL_FJOIN_RX_BITS | PIO_SM0_SHIFTCTRL_FJOIN_TX_BITS | PIO_SM0_SHIFTCTRL_PULL_THRESH_BITS | PIO_SM0_SHIFTCTRL_PUSH_THRESH_BITS | PIO_SM0_SHIFTCTRL_OUT_SHIFTDIR_BITS | PIO_SM0_SHIFTCTRL_IN_SHIFTDIR_BITS | PIO_SM0_SHIFTCTRL_AUTOPULL_BITS | PIO_SM0_SHIFTCTRL_AUTOPUSH_BITS));
	pio1_hw->sm[mMySm].pinctrl = (SIDE_SET_BITS_USED << PIO_SM1_PINCTRL_SIDESET_COUNT_LSB) | (1 << PIO_SM1_PINCTRL_OUT_COUNT_LSB) | (PIN_SD_CMD << PIO_SM1_PINCTRL_OUT_BASE_LSB) | (1 << PIO_SM1_PINCTRL_SET_COUNT_LSB) | (PIN_SD_CMD << PIO_SM1_PINCTRL_SET_BASE_LSB) | (PIN_SD_CMD << PIO_SM0_PINCTRL_IN_BASE_LSB) | (PIN_SD_CLK << PIO_SM1_PINCTRL_SIDESET_BASE_LSB);
	
	//prepare state
	pio1_hw->txf[mMySm] = 8 /* initial 0xff */ + 48 /* command */ - 1;
	pio1_hw->txf[mMySm] = 64 - 1;
	
	
	//logi("data fifo info A: %08xh\n", pio1_hw->flevel);
	
	pio1_hw->sm[mMySm].instr = I_SET(0, 0, SET_DST_PINDIRS, 1);		//CMD is out
	pio1_hw->sm[mMySm].instr = I_PULL(0, 0, 0, 1);
	pio1_hw->sm[mMySm].instr = I_OUT(0, 0, OUT_DST_X, 32);			//48 command bits
	pio1_hw->sm[mMySm].instr = I_PULL(0, 0, 0, 1);
	pio1_hw->sm[mMySm].instr = I_OUT(0, 0, OUT_DST_Y, 32);			//64 wait cycles max
	
	pio1_hw->sm[mMySm].shiftctrl |= PIO_SM0_SHIFTCTRL_AUTOPULL_BITS | PIO_SM0_SHIFTCTRL_AUTOPUSH_BITS | (8 << PIO_SM0_SHIFTCTRL_PUSH_THRESH_LSB);
	pio1_hw->sm[mMySm].instr = I_JMP(0, 0, JMP_ALWAYS, startPC);	//start at the start
	
	
	//logi("data fifo info C: %08xh\n", pio1_hw->flevel);
	
	
	//logi("XMITTING %08x %08x\n", word0, word1);
	
	//give it data
	pio1_hw->txf[mMySm] = word0;
	pio1_hw->txf[mMySm] = word1;
	//logi("data fifo info D: %08xh\n", pio1_hw->flevel);
	if (nReplyBits) {
				
		//DMA ch1 gets data from PIO1.sm that was RXed
		dma_hw->ch[mMyFirstDmaChannel + 0].read_addr = (uintptr_t)&pio1_hw->rxf[mMySm];
		dma_hw->ch[mMyFirstDmaChannel + 0].write_addr = (uintptr_t)replyOutP;
		dma_hw->ch[mMyFirstDmaChannel + 0].transfer_count = nReplyBits / 8;
		
		(void)KALTaskSwitching(false);
		
		dma_hw->ch[mMyFirstDmaChannel + 0].ctrl_trig = ((DREQ_PIO1_RX0 + mMySm) << DMA_CH0_CTRL_TRIG_TREQ_SEL_LSB) | (((uint32_t)mMyFirstDmaChannel + 0) << DMA_CH0_CTRL_TRIG_CHAIN_TO_LSB) | (DMA_CH0_CTRL_TRIG_DATA_SIZE_VALUE_SIZE_BYTE << DMA_CH0_CTRL_TRIG_DATA_SIZE_LSB) | DMA_CH0_CTRL_TRIG_INCR_WRITE_BITS | DMA_CH0_CTRL_TRIG_EN_BITS;
	}
	
	
	//logi("data fifo info E: %08xh\n", pio1_hw->flevel);
	
	//logi("cur xfer count %u\n", dma_hw->ch[mMyFirstDmaChannel + 0].transfer_count);
	
	
	///HELP US DEBUG IT
//	sio_hw->gpio_clr = 1 << PIN_SD_DAT3;
//	asm volatile("dsb sy\ndsb sy\ndsb sy\ndsb sy\ndsb sy\n");
//	sio_hw->gpio_set = 1 << PIN_SD_DAT3;
	
	
	//start
	pio1_hw->ctrl |= ((0x01 << PIO_CTRL_SM_ENABLE_LSB) << mMySm);
	
	//logi("wait, data fifo info F: %08xh\n", pio1_hw->flevel);
	
	//wait
	while (pio1_hw->sm[mMySm].addr != endPC);
	
	//logi("data fifo info G: %08xh\n", pio1_hw->flevel);
	//logi("done, dma_hw->ch[mMyFirstDmaChannel + 0].transfer_count=%u\n", dma_hw->ch[mMyFirstDmaChannel + 0].transfer_count);
	
	success = !nReplyBits || !dma_hw->ch[mMyFirstDmaChannel + 0].transfer_count;
	
	dma_hw->abort = 1 << mMyFirstDmaChannel;
	while (dma_hw->abort & (1 << mMyFirstDmaChannel));
	while (dma_hw->ch[mMyFirstDmaChannel + 0].al1_ctrl & DMA_CH0_CTRL_TRIG_BUSY_BITS);
	dma_hw->ch[mMyFirstDmaChannel + 0].al1_ctrl = 0;
	
	if (nReplyBits) {
		
		(void)KALTaskSwitching(true);
	}
	
	return success;
}

static enum SdCmdRet repalmSdioCmd(uint_fast8_t cmd, uint32_t param, uint8_t *replyOutP, uint_fast8_t nReplyBits, bool doCrcCheck)
{
	uint_fast8_t nReplyBytes = nReplyBits / 8, nReplyBytesSansCrc = nReplyBytes - 1, i, crc = 0;
	
	if (!palmcardSdioPrvCmdRspRaw(cmd, param, replyOutP, nReplyBits))
		return SdCmdRespTimeout;
	
	if (nReplyBytes < 2)
		return SdCmdOK;
	
	if (!doCrcCheck)
		return SdCmdOK;
	
	for (i = (nReplyBits == 136) ? 1 : 0; i < nReplyBytesSansCrc; i++)
		crc = sdPrvCrcAccount(crc, replyOutP[i]);
	
	if (crc * 2 + 1 == replyOutP[nReplyBytesSansCrc])
		return SdCmdOK;
	
	loge("calced crc byte %02x, got %02x (full rx %02x %02x %02x %02x %02x %02x)\n",
		crc * 2 + 1, replyOutP[nReplyBytesSansCrc], replyOutP[0], replyOutP[1], replyOutP[2], replyOutP[3], replyOutP[4], replyOutP[5]);
	
	return SdCmdRespCrcErr;
}

static bool repalmSdioPrvIsInserted(void)		//will only work if card power is on
{
	return !!(sio_hw->gpio_in & (1 << PIN_SD_DET));
}

static void palmcardSdioPrvInsertIrqSetup(void)
{
	//todo
}

bool repalmSdioInsertNotifCfg(RepalmSdioInsertNotifCbk cbk)
{
	mInsertCbk = cbk;
	
	return repalmSdioPrvIsInserted();
}

bool palmcardSdioSetup(uint8_t *firstFreeSmP, uint8_t *firstFreePioInstrP, uint8_t *firstFreeDmaChP, uint8_t nDmaCh, uint8_t nPioSms, uint8_t nPioInstrs)
{
	if (*firstFreeSmP > nPioSms - NUM_SMS_WE_NEED || *firstFreeDmaChP > nDmaCh - NUM_DMAS_WE_NEED || *firstFreePioInstrP > nPioInstrs - NUM_INSTRS_WE_NEED)
		return false;
	
	mMySm = *firstFreeSmP;
	mMyStartPc = *firstFreePioInstrP;
	mMyFirstDmaChannel = *firstFreeDmaChP;
	
	(*firstFreeSmP) += NUM_SMS_WE_NEED;
	(*firstFreePioInstrP) += NUM_INSTRS_WE_NEED;
	(*firstFreeDmaChP) += NUM_DMAS_WE_NEED;
	
	//logi("sdio: will use sm%u, code will be at offset %u, dma channels %u and %u\n", mMySm, mMyStartPc, mMyFirstDmaChannel, mMyFirstDmaChannel + 1);
	palmcardSdioPrvPinsSetup();
	
	palmcardSdioPrvInsertIrqSetup();
	
	if (!ralSetRePalmTabFunc(REPALM_FUNC_IDX_SDIO_SET_SPEED, &repalmSdioSetSpeed))
		return false;
	if (!ralSetRePalmTabFunc(REPALM_FUNC_IDX_SDIO_CMD, &repalmSdioCmd))
		return false;
	if (!ralSetRePalmTabFunc(REPALM_FUNC_IDX_SDIO_BUSY_WAIT, &repalmSdioBusyWait))
		return false;
	if (!ralSetRePalmTabFunc(REPALM_FUNC_IDX_SDIO_DATA_RX, &repalmSdioDataRx))
		return false;
	if (!ralSetRePalmTabFunc(REPALM_FUNC_IDX_SDIO_DATA_TX, &repalmSdioDataTx))
		return false;
	if (!ralSetRePalmTabFunc(REPALM_FUNC_IDX_SDIO_INSERT_NOTIF_CFG, &repalmSdioInsertNotifCfg))
		return false;
	
	
	return true;
}


