/****************************************************************************\
 *     Copyright (C) 2011, 2012, 2013 Texas Instruments Incorporated.       *
 *                           All Rights Reserved                            *
 *                                                                          *
 * GENERAL DISCLAIMER                                                       *
 * -------------------------------------------------------------------      *
 * All software and related documentation is provided "AS IS" and without   *
 * warranty or support of any kind and Texas Instruments expressly disclaims*
 * all other warranties, express or implied, including, but not limited to, *
 * the implied warranties of merchantability and fitness for a particular   *
 * purpose.  Under no circumstances shall Texas Instruments be liable for   *
 * any incidental, special or consequential damages that result from the    *
 * use or inability to use the software or related documentation, even if   *
 * Texas Instruments has been advised of the liability.                     *
 ****************************************************************************
Example to show memory access and interrupt throughput PCIE
In this example, DSP1's memory are mapped to DSP0 through PCIE  
so, DSP0 accesses DSP 1 through the PCIE memory window just like 
access other memory space. 
To run 2 DSP test, You should run this project on the second core of the second 
DSP firstly, and then the first core of the first DSP
Internal loopback is also supported in this example, for this case, 
DSP0 actually access its own local memory through PCIE memory window.
 ****************************************************************************
 * Written by :                                                             *
 *            Brighton Feng                                                 *
 *            Texas Instruments                                             *
 *            June 23, 2013                                                 *
 ***************************************************************************/
#include <C6x.h>
#include <stdio.h>
#include <string.h>
#include <csl_xmcAux.h>
//#include "PCIE_debug.h"
//#include "PCIE_intc.h"
#include "PCIE_test.h"
#include "Keystone_common.h"
#include "Keystone_DDR_init.h"
#include "Keystone_PCIE_init_drv.h"

PCIE_Loopback_Mode loop_mode= PCIE_LOOPBACK_DISABLE;//PCIE_LOOPBACK_DISABLE;PCIE_PHY_LOOPBACK;
PCIE_Address_Width address_width= PCIE_ADDRESS_32_BITS;//PCIE_ADDRESS_32_BITS;

extern CSL_Pciess_appRegs   				*gpPCIE_app_regs ;
extern CSL_Pcie_cfg_space_rootcomplexRegs 	*gpPCIE_RC_regs ;
extern CSL_Pcie_cfg_space_endpointRegs		*gpPCIE_EP_regs ;
extern PCIE_CAP_Implement_Regs 			*gpPCIE_CAP_implement_regs;

extern volatile Uint32 gudISRFlag ;
//extern gtMSIStruct gtMSIInt[8];
Uint32 gudMSIIntIndex;
Uint32 gudMSIIntNumber;

#define K2_SIMULATOR_MSI 1
//comment below definition if DDR3B is not availible
#define DDRB_TEST 	1

#define PCIE_PREFETCH_BASE_ADDRESS 	0x80000000
#define PCIE_NONFETCH_BASE_ADDRESS 	0x10000000

//#define MSI0_IRQ_ENABLE_SET	0x21800108
//#define MSI_IRQ 			0x21800054

#pragma DATA_ALIGN(uadRxBuf, 256);
#pragma DATA_SECTION(uadRxBuf,".pcie_rcv_buf")
Uint32 uadRxBuf[PCIE_BUFSIZE_APP+1];//The last one is for flag

#pragma DATA_ALIGN(srcBuf, 256);
Uint32 srcBuf[PCIE_BUFSIZE_APP+1];

#pragma DATA_ALIGN(gaudRCOutofRange, 256);
#pragma DATA_SECTION(gaudRCOutofRange,".pcie_rc_outofrange")
Uint32 gaudRCOutofRange[PCIE_BUFSIZE_APP+1];//0x88120000
/*
#pragma DATA_ALIGN(gaudEPOutofRangeAddress, 256);
#pragma DATA_SECTION(gaudEPOutofRangeAddress,".pcie_ep_outofrange")
Uint32 gaudEPOutofRangeAddress[PCIE_BUFSIZE_APP+1];//0x11850000
*/

/*
#pragma DATA_ALIGN(uadRCRxBuf, 256);
#pragma DATA_SECTION(uadRCRxBuf,"PCIE_RCV_BUF")
Uint32 uadRCRxBuf[PCIE_BUFSIZE_APP];
*/
//PCIE remote test address
#pragma DATA_SECTION(PcieRemoteTestAddr,".far:PCIEInit")
PCIERemoteTestAddress PcieRemoteTestAddr=
{
	PCIE_OUTBOUND_DATA_WINDOW+0x0000000,//DDR_SRC_ADDR;//0x60800000
	PCIE_OUTBOUND_DATA_WINDOW+0x1000000,//DDR_DST_ADDR;//0x61800000
	PCIE_OUTBOUND_DATA_WINDOW+0x4000000,//SL2_SRC_ADDR;//0x64800000
	PCIE_OUTBOUND_DATA_WINDOW+0x4040000,//SL2_DST_ADDR;//0x64840000
	PCIE_OUTBOUND_DATA_WINDOW+0x4100000,//LL2_SRC_ADDR;//0x64900000
	PCIE_OUTBOUND_DATA_WINDOW+0x4120000,//LL2_DST_ADDR;//0x64920000
};

unsigned int PcieFilterAddrTestAddr = CSL_PCIE_REGS+0x1120000;/*To choose EP OB Offset[2]*/

/*used for access EP'a applicaton register*/
//Uint32 gudRCConfigEPAppAddr = 0x6A000000;

/*Used for inbound configuration, as inbound offset*/
#define PCIE_CORE1_LL2_BUF 0x11820000 //BOTH for RC and EP Rcv Data
#pragma DATA_SECTION(memory_regions,".far:PCIEInit")
PCIE_Memory_Region memory_regions[]=
{
	{0x80000000, 64*1024*1024}, //DDR3
	{0x0C100000, 1*1024*1024}, 	//SL2
	{PCIE_CORE1_LL2_BUF, 256*1024} 	//LL2
};

#pragma DATA_SECTION(prefetch_regions,".far:PCIEInit")
PCIE_Memory_Regions prefetch_regions; 

#pragma DATA_SECTION(nonfetch_regions,".far:PCIEInit")
PCIE_Memory_Regions nonfetch_regions;

#pragma DATA_SECTION(inbound_memory_regions,".far:PCIEInit")
PCIE_Inbound_Memory_Regions inbound_memory_regions;

//The dest memory is RC core 1's L2 memory in this example
#define PCIE_EP_OB_LL2_PREFETCH_BASE_ADDRESS 0x88000000
#pragma DATA_SECTION(EP_OB_PCIE_address,".far:PCIEInit")
/*outbound PCIE address, only used for 2 DSP test for EP to access RC*/
unsigned long long EP_OB_PCIE_address[]=
{
	0x0000000010000001,0x0000000080000001,0x0000000090000001
	/*0,0,0,0,
	0,0,0,0,
	0,0,0,0,
	PCIE_EP_OB_LL2_PREFETCH_BASE_ADDRESS   //ob_offset = 16,EP OB configuration should be done manually
	*/
};

#pragma DATA_SECTION(outbound_memory_regions,".far:PCIEInit")
PCIE_Outbound_Memory_Regions outbound_memory_regions;

#pragma DATA_SECTION(rc_cfg,".far:PCIEInit")
PCIE_RC_Config rc_cfg;

//#pragma DATA_SECTION(ep_cfg,".far:PCIEInit")
//PCIE_EP_Config ep_cfg;

#pragma DATA_SECTION(PCIE_int_cfg,".far:PCIEInit")
PCIE_Interrupt_Config PCIE_int_cfg;
#pragma DATA_SECTION(remote_cfg_setup,".far:PCIEInit")
PCIE_Remote_CFG_SETUP remote_cfg_setup;

#pragma DATA_SECTION(PCIE_cfg,".far:PCIEInit")
KeyStone_PCIE_Config PCIE_cfg;

extern PCIE_DSP_core_test(PCIERemoteTestAddress* remoteAddr);
extern PCIE_edma_test(PCIERemoteTestAddress* remoteAddr);

/*simple memory test to verfiy basic function of PCIE */
unsigned int PCIE_Mem_Test(unsigned int uiStartAddress, 
	unsigned int uiByteCount)
{
	unsigned int uiFailCount= 0;
	
	uiFailCount += Memory_Fill_Test(uiStartAddress,uiByteCount, 0x00000000, 8, 8);
	uiFailCount += Memory_Fill_Test(uiStartAddress,uiByteCount, 0xFFFFFFFF, 8, 8);
	uiFailCount += Memory_Address_Test(uiStartAddress,uiByteCount, 8, 8);

	if(0==uiFailCount)
		printf("PCIE memory test passed at address 0x%x, 0x%x bytes\n",
			uiStartAddress, uiByteCount);
			
    return uiFailCount; 
}

void PCIE_Remoteloopback_Test(PCIERemoteTestAddress * remoteAddr)
{
	Uint32 i;
	Uint8 ucErrorFlag = 0;
	/*Core 1 LL2*/
	//PCIE_Mem_Test(remoteAddr->LL2_DST_ADDR, 0x10000, 1);
    Uint32 *pudDestPCIEAddr;
    
    pudDestPCIEAddr = (Uint32*)remoteAddr->LL2_DST_ADDR;

    /* add dstOffset to pcieBase for data transfer */
	for (i=0; i<PCIE_BUFSIZE_APP; i++)
	{
		uadRxBuf[i] = 0;
		srcBuf[i] = i;
	}
	puts("#########RC starts sent data to EP!#########");
    for (i=0; i<PCIE_BUFSIZE_APP; i++)
    {
      *((volatile Uint32 *)pudDestPCIEAddr + i) = srcBuf[i];
    }

    /* Mark that the buffer is full, so EP can process it */
    *((volatile Uint32 *)pudDestPCIEAddr + PCIE_BUFSIZE_APP) = PCIE_EXAMPLE_BUF_FULL;

    /* Note on cache coherence: Write back is not necessary because pcieBase is in
       peripheral address space instead of physical memory*/

    /* Data sent to EP.
       RC waits for the loopback to be completed and
       receive data back from EP */

    do {

		      /*  Cleanup the prefetch buffer also. */
		      CSL_XMC_invalidatePrefetchBuffer();
		
		      CACHE_invL1d ((void *)gaudRCOutofRange,  PCIE_EXAMPLE_DSTBUF_BYTES, CACHE_FENCE_WAIT);
		      CACHE_invL2  ((void *)gaudRCOutofRange,  PCIE_EXAMPLE_DSTBUF_BYTES, CACHE_FENCE_WAIT);

    } while(gaudRCOutofRange[PCIE_BUFSIZE_APP]!= PCIE_EXAMPLE_BUF_FULL);
    puts ("RC has received echo data from EP!");
    puts("#########RC is verifying the received data!#########");

    /* check all the data */
    for (i=0; i<PCIE_BUFSIZE_APP; i++)
    {
      if(gaudRCOutofRange[i] != srcBuf[i])
      {
	        printf ("Received data = %d\nTransmited data = %d\nIndex = %d.\n\nTest failed.\n",
	        		gaudRCOutofRange[i], srcBuf[i], i);
	        ucErrorFlag = 1;
      }
    }
    if(!ucErrorFlag)
    {
    	puts("All the data are correct, Rc->EP->RC echo test passed!");
  	}	

}

void PCIE_Remote_Loopback_DataEcho(PCIERemoteTestAddress * remoteAddr)
{

	Uint32  i;
    Uint32 *pudDestPCIEAddr;
    Uint32 *pudRCOutofRange;
    /*As we use RC address filter feature, no address translation here,the remote address is located in DDR3*/
    pudDestPCIEAddr = (Uint32*)PcieFilterAddrTestAddr;


	/* EP waits for the data received from RC */
	do {
	  unsigned int key;

	  /* Disable Interrupts */
	  key = _disable_interrupts();

	  /*  Cleanup the prefetch buffer also. */
	  CSL_XMC_invalidatePrefetchBuffer();

	  CACHE_invL1d ((void *)uadRxBuf,  PCIE_EXAMPLE_DSTBUF_BYTES, CACHE_FENCE_WAIT);
	  CACHE_invL2  ((void *)uadRxBuf,  PCIE_EXAMPLE_DSTBUF_BYTES, CACHE_FENCE_WAIT);

	  /* Reenable Interrupts. */
	  _restore_interrupts(key);

	}	 while(uadRxBuf[PCIE_BUFSIZE_APP] != PCIE_EXAMPLE_BUF_FULL);

	puts("EP has received data from RC!");
	puts("#########EP starts to do RC memory base and limit range test!#########");
	/* Loopback to RC what was written in the DST buffer.
	   Write from EP to RC */
	puts("EP is sending received data back to RC....");
	/*access RC out range of limit and base address*/

	for (i=0; i<PCIE_BUFSIZE_APP; i++)
	{
	  *((volatile uint32_t *)pudDestPCIEAddr + i) = uadRxBuf[i];
	}

	/* Mark that the buffer is full, so RC can process it */
	*((volatile uint32_t *)pudDestPCIEAddr + PCIE_BUFSIZE_APP) = PCIE_EXAMPLE_BUF_FULL;
	/*0x88120000 as PCIE bus address,here, test for RC pcie memory base and limit mode*/

	puts("EP has sent data to RC, completed the loopback test on EP!");


	/*configuration parameter in RC side:
	 * rc_cfg.memory_base= PCIE_NONFETCH_BASE_ADDRESS;
	 * rc_cfg.memory_limit= PCIE_NONFETCH_BASE_ADDRESS+8*1024*1024;
		0x10000000~0x10800000;0x80000000~0x80800000*/

	/*pudRCOutofRange = (Uint32*)&gaudEPOutofRangeAddress;//0x80810000
	for (i=0; i<PCIE_BUFSIZE_APP; i++)
	{
		*((volatile uint32_t *)pudRCOutofRange + i) = i<<1;
	}*/
}


void PCIE_integrity_Test(PCIERemoteTestAddress * remoteAddr)
{
	/*DDR*/
	PCIE_Mem_Test(remoteAddr->DDR_DST_ADDR, 0x10000);

	/*SL2*/
	PCIE_Mem_Test(remoteAddr->SL2_DST_ADDR, 0x10000);

	/*Core 1 LL2*/
	PCIE_Mem_Test(remoteAddr->LL2_DST_ADDR, 0x10000);
}

/*this interrupt test is done in loopback mode,
a MSI is trigger manually, a interrupt packet is generated and 
loopback to this DSP and trigger interrupt to the DSP core. The latency 
between trigger and the entry of the ISR are measured*/
extern Uint32 PCIE_IntTSCL;
void PCIE_Interrupt_Latency_Test()
{
	Uint32 uiStartTSC= TSCL;

	/*manually trigger MSI, which will generate interrupt packet to remote side.
	For loopback test, the MSI_IRQ in application reister space are mapped 
	through BAR0 to first outbound window*/
	KeyStone_PCIE_generate_MSI(8,//DNUM+8,
		(Uint32*)(CSL_PCIE_REGS+((Uint32)&gpPCIE_app_regs->MSI_IRQ)-(Uint32)gpPCIE_app_regs));

	/*the interrupt packet is loop back to this DSP and trigger 
	interrupt to this DSP core, here waiting for the interrupt*/
	//if(PCIE_cfg.loop_mode == PCIE_PHY_LOOPBACK)
	//{
	asm(" IDLE");

	/*the time stamp at the entry of the interrupt is recorded in the ISR*/
	printf("PCIE interrupt latency is %d cycles\n", PCIE_IntTSCL- uiStartTSC);

	//}
}

extern Uint32 gudInterruptFlag;
void PCIE_Test(KeyStone_PCIE_Config * pcie_cfg)
{
	Uint32 uiNum_prefetch_regions;
	Uint32 uiNum_nonfetch_regions;
	Uint32 udIB_BAR_StartAddress;
	PCIE_Memory_Regions * prefetch_regions;
	PCIE_Memory_Regions * nonfetch_regions;

	prefetch_regions= pcie_cfg->inbound_memory_regions->prefetch_regions;
	nonfetch_regions= pcie_cfg->inbound_memory_regions->nonfetch_regions;

	if(prefetch_regions)
		uiNum_prefetch_regions= prefetch_regions->uiNumRegions;
	else
		uiNum_prefetch_regions= 0;
	if(nonfetch_regions)
		uiNum_nonfetch_regions= nonfetch_regions->uiNumRegions;
	else
		uiNum_nonfetch_regions= 0;

	/*for this test use DSP core number as DSP number,
	so, the program should be run on core 0 of DSP0 and core 1 of DSP1*/
	if(0==DNUM)
	{
		/*interrupt test is done in loopback mode*/
		if(PCIE_cfg.loop_mode == PCIE_PHY_LOOPBACK)
		{
		    PCIE_Interrupt_Latency_Test();
		   /*integrity test is only valid for loopback mode*/
			PCIE_integrity_Test(&PcieRemoteTestAddr);
		}
		else
		{
			 puts("RC starts to transfer data to EP!");
			 /*use normal inbound/outbound transfer method to tranfer data from EP to RC*/
#if 0/*Should be kept, as MSI interrupt generate on EP should only be
executed after RC finish configure EP's MSi register in remote config method*/
			/*judge IB_BAR_StartAddress*/
			if((uiNum_prefetch_regions+uiNum_nonfetch_regions)>1)
			{
				while(1)
				{
					udIB_BAR_StartAddress = *(Uint32*)((CSL_PCIE_REGS+(Uint32)&gpPCIE_app_regs->INBOUND_TRANSLATION[uiNum_prefetch_regions+uiNum_nonfetch_regions-1].IB_START_LO)-(Uint32)gpPCIE_app_regs);
					if(udIB_BAR_StartAddress !=0)
					{
						break;
					}
				}
			}
			if(uiNum_prefetch_regions>1)
			{
				while(1)
				{
					udIB_BAR_StartAddress =*(Uint32*)((CSL_PCIE_REGS+(Uint32)&gpPCIE_app_regs->INBOUND_TRANSLATION[uiNum_prefetch_regions-1].IB_START_LO)-(Uint32)gpPCIE_app_regs);
					if(udIB_BAR_StartAddress !=0)
					{
						break;
					}
				}
			}
#endif
			//PCIE_Interrupt_Latency_Test();
			//asm(" IDLE");
			while(1)
			{
			 if(1 == gudInterruptFlag)
				 break;
			}
			gudInterruptFlag = 0;

			PCIE_Remoteloopback_Test(&PcieRemoteTestAddr);
		}

		/*------performance test------*/
		puts("#########RC starts core access performance test!#########");
		PCIE_DSP_core_test(&PcieRemoteTestAddr);

		puts("#########RC starts EDMA performance test!#########");
		PCIE_edma_test(&PcieRemoteTestAddr);

	}
	else
	{


		//puts("standby for access by PCIE...");
		//while(1)
		//{
		puts("#########EP starts MSI interrupt to RC!#########");
		//PCIE_Interrupt_Latency_Test();
		KeyStone_PCIE_generate_MSI(8,//DNUM+8,
			(Uint32*)(CSL_PCIE_REGS+((Uint32)&gpPCIE_app_regs->MSI_IRQ)-(Uint32)gpPCIE_app_regs));

		//}

		puts("EP is waiting for receiving data from RC!");
		/*use RC address filter method to tranfer data from EP to RC*/
		PCIE_Remote_Loopback_DataEcho(&PcieRemoteTestAddr);


	}
	
	//print_PCIE_status();
}


void main()
{
	int i;
	TDSP_Board_Type DSP_Board_Type;


	IER= 0;	//disable interrupts

	/* Initialize Time stamp counter to measure cycles*/
	TSC_init();

	CACHE_invAllL1p(CACHE_WAIT);
	CACHE_wbInvAllL1d(CACHE_WAIT);
	CACHE_wbInvAllL2(CACHE_WAIT);
	CSL_XMC_invalidatePrefetchBuffer();

	memset(&uadRxBuf       , 0, sizeof(uadRxBuf       ));
	memset(&srcBuf       , 0, sizeof(srcBuf       ));
	/*clear all configuration data structure, make sure unused parameters are 0*/
	memset(&prefetch_regions       , 0, sizeof(prefetch_regions       )); 
	memset(&nonfetch_regions       , 0, sizeof(nonfetch_regions       ));
	memset(&rc_cfg                 , 0, sizeof(rc_cfg                 ));
	memset(&PCIE_cfg               , 0, sizeof(PCIE_cfg               ));

	memset(&inbound_memory_regions , 0, sizeof(inbound_memory_regions ));
	memset(&outbound_memory_regions, 0, sizeof(outbound_memory_regions));
	memset(&PCIE_int_cfg          , 0, sizeof(PCIE_int_cfg          ));

	/*make other space non-cacheable and non-prefetchable*/
	for(i=24; i<128; i++)
		gpCGEM_regs->MAR[i]=0;

	/*make DDR cacheable and prefetchable*/
	for(i=128; i<256; i++)
		gpCGEM_regs->MAR[i]=1|(1<<CSL_CGEM_MAR0_PFX_SHIFT);

	PCIE_cfg.serdes_cfg.inputRefClock_MHz = 100;
	PCIE_cfg.serdes_cfg.linkSpeed_GHz = 5;
	PCIE_cfg.serdes_cfg.numLanes= 2;

	PCIE_cfg.serdes_cfg.loopBandwidth    = SERDES_PLL_LOOP_BAND_MID;
	PCIE_cfg.serdes_cfg.txInvertPolarity = SERDES_TX_NORMAL_POLARITY; 
	PCIE_cfg.serdes_cfg.rxInvertPolarity = SERDES_RX_NORMAL_POLARITY; 
	PCIE_cfg.serdes_cfg.rxEqualizerConfig= SERDES_RX_EQ_ADAPTIVE; 
    PCIE_cfg.serdes_cfg.rxCDR            = SERDES_RX_CDR_3;
    PCIE_cfg.serdes_cfg.rxLos            = SERDES_RX_LOS_DISABLE;
	PCIE_cfg.serdes_cfg.rxAlign          = SERDES_RX_ALIGNMENT_DISABLE; 


	DSP_Board_Type= KeyStone_Get_dsp_board_type();
	if(C6678_EVM==DSP_Board_Type)
	{
		//DSP core speed: 100*10/1=1000MHz
		KeyStone_main_PLL_init(100, 10, 1); 
	}
	else if(TCI6614_EVM==DSP_Board_Type
		||DUAL_NYQUIST_EVM==DSP_Board_Type
		||C6670_EVM==DSP_Board_Type)
	{
		//DSP core speed: 122.88*236/29= 999.9889655MHz
		KeyStone_main_PLL_init(122.88, 236, 29);

		if(TCI6614_EVM==DSP_Board_Type)
			PCIE_cfg.serdes_cfg.inputRefClock_MHz = 156.25;
	}
	else
	{
		puts("Unknown DSP board type!");
		return;
	}

	//DDR init 66.66667*20/1= 1333
	KeyStone_DDR_init (66.66667, 20, 1, NULL);




//#if K2_SIMULATOR_MSI
	/* protect L1 as cache */
   	L1_cache_protection();

	/*enable L1P ED and scrub whole L1P*/
	L1P_EDC_setup();

   	/*enable LL2 EDC and scrub whole LL2*/
	LL2_EDC_setup();

	/*Enable MSMC EDC and setup scrubbing cycle counter= 255*1024*/
	KeyStone_SL2_EDC_enable(255);

   	/*exception configuration, also configure event 23 as the CIC exception input*/
   	KeyStone_Exception_cfg();
//#endif
	//PCIE_Interrupts_Init();

	PCIE_cfg.loop_mode= loop_mode;
	PCIE_cfg.address_width= address_width;
	/*enable all PCIE interrupt reception*/
	PCIE_int_cfg.MSI_rx_enable_mask= 0xFFFFFFFF;
	PCIE_int_cfg.Err_rx_enable = TRUE;
	PCIE_int_cfg.PMRST_rx_enable =TRUE;
	
	/*number of MSI may generate from this EP*/
	PCIE_int_cfg.number_tx_MSI = PCIE_16_MSI;
	PCIE_cfg.interrupt_cfg= &PCIE_int_cfg;
	PCIE_cfg.outbound_memory_regions= &outbound_memory_regions;
	PCIE_cfg.inbound_memory_regions= &inbound_memory_regions;

	outbound_memory_regions.OB_size= PCIE_OB_SIZE_8MB;

	rc_cfg.memory_base= PCIE_NONFETCH_BASE_ADDRESS;
	rc_cfg.memory_limit= PCIE_NONFETCH_BASE_ADDRESS+256*1024*1024;
	rc_cfg.prefetch_memory_base= PCIE_PREFETCH_BASE_ADDRESS;
	rc_cfg.prefetch_memory_limit= PCIE_PREFETCH_BASE_ADDRESS+256*1024*1024;
	
	/*Here, EP use the concept of RC to configure inbound BAR*/
	/*ep_cfg.memory_base= PCIE_NONFETCH_BASE_ADDRESS;
	ep_cfg.memory_limit= PCIE_NONFETCH_BASE_ADDRESS+8*1024*1024;
	ep_cfg.prefetch_memory_base= PCIE_PREFETCH_BASE_ADDRESS;
	ep_cfg.prefetch_memory_limit= PCIE_PREFETCH_BASE_ADDRESS+8*1024*1024;
	*/
	//inbound_memory_regions.prefetch_regions= &prefetch_regions; //Add for System reset passed
	//inbound_memory_regions.nonfetch_regions= &nonfetch_regions;

	//for loopback test
	if(PCIE_PHY_LOOPBACK==loop_mode)
	{//loopback only support in RC mode
		PCIE_cfg.PcieMode= PCIE_RC_MODE;
		PCIE_cfg.rc_cfg= &rc_cfg;

		/*for loopback test all memories are mapped to one prefeachable BAR
		because RC only has one memory BAR*/
		prefetch_regions.memory_regions= &memory_regions[0];
		prefetch_regions.uiNumRegions= 3;//special for RC loopback mode,in real RC mode, only 2 inbound regions
		prefetch_regions.bPrefetchable= TRUE;
		inbound_memory_regions.prefetch_regions= &prefetch_regions;
		printf("PCIE PHY loopback mode at %.1fGHz.\n", PCIE_cfg.serdes_cfg.linkSpeed_GHz);
	}
	else //for 2 DSP test
	{
		if(0==DNUM)
		{//first DSP is the RC
			PCIE_cfg.PcieMode= PCIE_RC_MODE;
			PCIE_cfg.rc_cfg= &rc_cfg;

			/*for 2 DSP test, at RC side only DDR are mapped to one prefeachable BAR,
			because RC only has one memory BAR*/
			prefetch_regions.memory_regions= &memory_regions[0];
			prefetch_regions.uiNumRegions= 3;//3
			prefetch_regions.bPrefetchable= TRUE;
			inbound_memory_regions.prefetch_regions= &prefetch_regions;
			/*for this test, two devices are connect directly,
			the bus, device and function number are all 0*/
			remote_cfg_setup.config_type= 0; 	//remote device is EP
			remote_cfg_setup.config_bus= 0;
			remote_cfg_setup.config_device= 0;
			remote_cfg_setup.config_function= 0;
			printf("PCIE normal and RC mode at %.1fGHz, should be running on core0.\n", PCIE_cfg.serdes_cfg.linkSpeed_GHz);
		}
		else
		{//the second DSP is the EP
			PCIE_cfg.PcieMode= PCIE_EP_MODE;
			//PCIE_cfg.ep_cfg= &ep_cfg;

			/*for 2 DSP test, at EP side, DDR are mapped to prefetchable BAR;
			SL2 and LL2 are mapped to nonprefetchable BAR*/
			prefetch_regions.memory_regions= &memory_regions[0];
			prefetch_regions.uiNumRegions= 3;//3
			prefetch_regions.bPrefetchable= TRUE;
			nonfetch_regions.memory_regions= 0;
			nonfetch_regions.uiNumRegions= 0;
			nonfetch_regions.bPrefetchable= FALSE;
			inbound_memory_regions.prefetch_regions= &prefetch_regions;
			inbound_memory_regions.nonfetch_regions= &nonfetch_regions;

			/*in EP mode, outbound memory regions must be setup manually.
			in RC mode, outbound memory regions is setup via enumeration*/
			outbound_memory_regions.address_offset= EP_OB_PCIE_address;
			outbound_memory_regions.uiNumRegions= sizeof(EP_OB_PCIE_address)/8; //use 1 pcie outbound region in EP mode
			printf("PCIE normal and EP mode at %.1fGHz, should be running on core1.\n", PCIE_cfg.serdes_cfg.linkSpeed_GHz);

		}
	}

	//PCIE initialize
	KeyStone_PCIE_Init(&PCIE_cfg);
	/*PCIE MSI allocation for one device.*/
	if(PCIE_PHY_LOOPBACK==loop_mode)
	{
		/*for loopback test, the MSI CAP registers are accessed through local bus. 
		The MSI_IRQ in application reister space are mapped through BAR0 to 
		PCIE_NONFETCH_BASE_ADDRESS*/
		KeyStone_PCIE_RC_MSI_allocate((PCIE_MSI_Regs *)&gpPCIE_EP_regs->MSI_CAP,
			PCIE_NONFETCH_BASE_ADDRESS+((Uint32)&gpPCIE_app_regs->MSI_IRQ)-(Uint32)gpPCIE_app_regs);
	}
	else
	{
		if(0==KeyStone_Get_DSP_Number())
		{//first DSP is the RC

			/*Remote Configuration Transaction Setup,
			select the bus, device and function number of the target*/
			KeyStone_PCIE_remote_CFG_setup(&remote_cfg_setup);

			/*for test between two device, the RC should access MSI CAP register of EP throught its remote
			configuration space, MSI PCIE write address depends on RC's PCIE address*/
			KeyStone_PCIE_RC_MSI_allocate((PCIE_MSI_Regs *)&gptPCIE_remote_EP_Regs->MSI_CAP,
				PCIE_NONFETCH_BASE_ADDRESS+((Uint32)&gpPCIE_app_regs->MSI_IRQ)-(Uint32)gpPCIE_app_regs);
				
		}
	}
	/*PCIE address allocation (via the PCI Express enumeration procedure).
	setup outbound and inbound address mapping*/

	KeyStone_PCIE_Address_setup(&PCIE_cfg);



	//interrupt route initialization
	PCIE_Interrupts_Init();
	//test PCIE
	PCIE_Test(&PCIE_cfg);

	if(0==KeyStone_Get_DSP_Number())
	{
		puts("PCIE test process is finished on Core 0!");
	}
}

