diff --git a/drivers/gpu/msm/Makefile b/drivers/gpu/msm/Makefile index fec53633bbebb05ac7d23a4fa37f2ff5eadf23d4..1a6b072a70772ac13c7d2d2d7635e49e0fd0cbad 100644 --- a/drivers/gpu/msm/Makefile +++ b/drivers/gpu/msm/Makefile @@ -23,8 +23,10 @@ msm_kgsl_core-$(CONFIG_SYNC) += kgsl_sync.o msm_adreno-y += \ adreno_ringbuffer.o \ adreno_drawctxt.o \ + adreno_dispatch.o \ adreno_postmortem.o \ adreno_snapshot.o \ + adreno_trace.o \ adreno_a2xx.o \ adreno_a2xx_trace.o \ adreno_a2xx_snapshot.o \ diff --git a/drivers/gpu/msm/a3xx_reg.h b/drivers/gpu/msm/a3xx_reg.h index be9f3acebcc5726f620248a8e7772fadffe108e3..21d475954fb22447b900785b3801797c169470e5 100644 --- a/drivers/gpu/msm/a3xx_reg.h +++ b/drivers/gpu/msm/a3xx_reg.h @@ -66,15 +66,103 @@ #define A3XX_RBBM_INT_0_MASK 0x063 #define A3XX_RBBM_INT_0_STATUS 0x064 #define A3XX_RBBM_PERFCTR_CTL 0x80 +#define A3XX_RBBM_PERFCTR_LOAD_CMD0 0x81 +#define A3XX_RBBM_PERFCTR_LOAD_CMD1 0x82 +#define A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x84 +#define A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x85 +#define A3XX_RBBM_PERFCOUNTER0_SELECT 0x86 +#define A3XX_RBBM_PERFCOUNTER1_SELECT 0x87 #define A3XX_RBBM_GPU_BUSY_MASKED 0x88 +#define A3XX_RBBM_PERFCTR_CP_0_LO 0x90 +#define A3XX_RBBM_PERFCTR_CP_0_HI 0x91 +#define A3XX_RBBM_PERFCTR_RBBM_0_LO 0x92 +#define A3XX_RBBM_PERFCTR_RBBM_0_HI 0x93 +#define A3XX_RBBM_PERFCTR_RBBM_1_LO 0x94 +#define A3XX_RBBM_PERFCTR_RBBM_1_HI 0x95 +#define A3XX_RBBM_PERFCTR_PC_0_LO 0x96 +#define A3XX_RBBM_PERFCTR_PC_0_HI 0x97 +#define A3XX_RBBM_PERFCTR_PC_1_LO 0x98 +#define A3XX_RBBM_PERFCTR_PC_1_HI 0x99 +#define A3XX_RBBM_PERFCTR_PC_2_LO 0x9A +#define A3XX_RBBM_PERFCTR_PC_2_HI 0x9B +#define A3XX_RBBM_PERFCTR_PC_3_LO 0x9C +#define A3XX_RBBM_PERFCTR_PC_3_HI 0x9D +#define A3XX_RBBM_PERFCTR_VFD_0_LO 0x9E +#define A3XX_RBBM_PERFCTR_VFD_0_HI 0x9F +#define A3XX_RBBM_PERFCTR_VFD_1_LO 0xA0 +#define A3XX_RBBM_PERFCTR_VFD_1_HI 0xA1 +#define A3XX_RBBM_PERFCTR_HLSQ_0_LO 0xA2 +#define A3XX_RBBM_PERFCTR_HLSQ_0_HI 0xA3 +#define A3XX_RBBM_PERFCTR_HLSQ_1_LO 0xA4 +#define A3XX_RBBM_PERFCTR_HLSQ_1_HI 0xA5 +#define A3XX_RBBM_PERFCTR_HLSQ_2_LO 0xA6 +#define A3XX_RBBM_PERFCTR_HLSQ_2_HI 0xA7 +#define A3XX_RBBM_PERFCTR_HLSQ_3_LO 0xA8 +#define A3XX_RBBM_PERFCTR_HLSQ_3_HI 0xA9 +#define A3XX_RBBM_PERFCTR_HLSQ_4_LO 0xAA +#define A3XX_RBBM_PERFCTR_HLSQ_4_HI 0xAB +#define A3XX_RBBM_PERFCTR_HLSQ_5_LO 0xAC +#define A3XX_RBBM_PERFCTR_HLSQ_5_HI 0xAD +#define A3XX_RBBM_PERFCTR_VPC_0_LO 0xAE +#define A3XX_RBBM_PERFCTR_VPC_0_HI 0xAF +#define A3XX_RBBM_PERFCTR_VPC_1_LO 0xB0 +#define A3XX_RBBM_PERFCTR_VPC_1_HI 0xB1 +#define A3XX_RBBM_PERFCTR_TSE_0_LO 0xB2 +#define A3XX_RBBM_PERFCTR_TSE_0_HI 0xB3 +#define A3XX_RBBM_PERFCTR_TSE_1_LO 0xB4 +#define A3XX_RBBM_PERFCTR_TSE_1_HI 0xB5 +#define A3XX_RBBM_PERFCTR_RAS_0_LO 0xB6 +#define A3XX_RBBM_PERFCTR_RAS_0_HI 0xB7 +#define A3XX_RBBM_PERFCTR_RAS_1_LO 0xB8 +#define A3XX_RBBM_PERFCTR_RAS_1_HI 0xB9 +#define A3XX_RBBM_PERFCTR_UCHE_0_LO 0xBA +#define A3XX_RBBM_PERFCTR_UCHE_0_HI 0xBB +#define A3XX_RBBM_PERFCTR_UCHE_1_LO 0xBC +#define A3XX_RBBM_PERFCTR_UCHE_1_HI 0xBD +#define A3XX_RBBM_PERFCTR_UCHE_2_LO 0xBE +#define A3XX_RBBM_PERFCTR_UCHE_2_HI 0xBF +#define A3XX_RBBM_PERFCTR_UCHE_3_LO 0xC0 +#define A3XX_RBBM_PERFCTR_UCHE_3_HI 0xC1 +#define A3XX_RBBM_PERFCTR_UCHE_4_LO 0xC2 +#define A3XX_RBBM_PERFCTR_UCHE_4_HI 0xC3 +#define A3XX_RBBM_PERFCTR_UCHE_5_LO 0xC4 +#define A3XX_RBBM_PERFCTR_UCHE_5_HI 0xC5 +#define A3XX_RBBM_PERFCTR_TP_0_LO 0xC6 +#define A3XX_RBBM_PERFCTR_TP_0_HI 0xC7 +#define A3XX_RBBM_PERFCTR_TP_1_LO 0xC8 +#define A3XX_RBBM_PERFCTR_TP_1_HI 0xC9 +#define A3XX_RBBM_PERFCTR_TP_2_LO 0xCA +#define A3XX_RBBM_PERFCTR_TP_2_HI 0xCB +#define A3XX_RBBM_PERFCTR_TP_3_LO 0xCC +#define A3XX_RBBM_PERFCTR_TP_3_HI 0xCD +#define A3XX_RBBM_PERFCTR_TP_4_LO 0xCE +#define A3XX_RBBM_PERFCTR_TP_4_HI 0xCF +#define A3XX_RBBM_PERFCTR_TP_5_LO 0xD0 +#define A3XX_RBBM_PERFCTR_TP_5_HI 0xD1 +#define A3XX_RBBM_PERFCTR_SP_0_LO 0xD2 +#define A3XX_RBBM_PERFCTR_SP_0_HI 0xD3 +#define A3XX_RBBM_PERFCTR_SP_1_LO 0xD4 +#define A3XX_RBBM_PERFCTR_SP_1_HI 0xD5 +#define A3XX_RBBM_PERFCTR_SP_2_LO 0xD6 +#define A3XX_RBBM_PERFCTR_SP_2_HI 0xD7 +#define A3XX_RBBM_PERFCTR_SP_3_LO 0xD8 +#define A3XX_RBBM_PERFCTR_SP_3_HI 0xD9 +#define A3XX_RBBM_PERFCTR_SP_4_LO 0xDA +#define A3XX_RBBM_PERFCTR_SP_4_HI 0xDB #define A3XX_RBBM_PERFCTR_SP_5_LO 0xDC #define A3XX_RBBM_PERFCTR_SP_5_HI 0xDD #define A3XX_RBBM_PERFCTR_SP_6_LO 0xDE #define A3XX_RBBM_PERFCTR_SP_6_HI 0xDF #define A3XX_RBBM_PERFCTR_SP_7_LO 0xE0 #define A3XX_RBBM_PERFCTR_SP_7_HI 0xE1 +#define A3XX_RBBM_PERFCTR_RB_0_LO 0xE2 +#define A3XX_RBBM_PERFCTR_RB_0_HI 0xE3 +#define A3XX_RBBM_PERFCTR_RB_1_LO 0xE4 +#define A3XX_RBBM_PERFCTR_RB_1_HI 0xE5 + #define A3XX_RBBM_RBBM_CTL 0x100 -#define A3XX_RBBM_RBBM_CTL 0x100 +#define A3XX_RBBM_PERFCTR_PWR_0_LO 0x0EA +#define A3XX_RBBM_PERFCTR_PWR_0_HI 0x0EB #define A3XX_RBBM_PERFCTR_PWR_1_LO 0x0EC #define A3XX_RBBM_PERFCTR_PWR_1_HI 0x0ED #define A3XX_RBBM_DEBUG_BUS_CTL 0x111 @@ -90,6 +178,7 @@ #define A3XX_CP_MERCIU_DATA2 0x1D3 #define A3XX_CP_MEQ_ADDR 0x1DA #define A3XX_CP_MEQ_DATA 0x1DB +#define A3XX_CP_PERFCOUNTER_SELECT 0x445 #define A3XX_CP_HW_FAULT 0x45C #define A3XX_CP_AHB_FAULT 0x54D #define A3XX_CP_PROTECT_CTRL 0x45E @@ -138,6 +227,14 @@ #define A3XX_VSC_PIPE_CONFIG_7 0xC1B #define A3XX_VSC_PIPE_DATA_ADDRESS_7 0xC1C #define A3XX_VSC_PIPE_DATA_LENGTH_7 0xC1D +#define A3XX_PC_PERFCOUNTER0_SELECT 0xC48 +#define A3XX_PC_PERFCOUNTER1_SELECT 0xC49 +#define A3XX_PC_PERFCOUNTER2_SELECT 0xC4A +#define A3XX_PC_PERFCOUNTER3_SELECT 0xC4B +#define A3XX_GRAS_PERFCOUNTER0_SELECT 0xC88 +#define A3XX_GRAS_PERFCOUNTER1_SELECT 0xC89 +#define A3XX_GRAS_PERFCOUNTER2_SELECT 0xC8A +#define A3XX_GRAS_PERFCOUNTER3_SELECT 0xC8B #define A3XX_GRAS_CL_USER_PLANE_X0 0xCA0 #define A3XX_GRAS_CL_USER_PLANE_Y0 0xCA1 #define A3XX_GRAS_CL_USER_PLANE_Z0 0xCA2 @@ -163,14 +260,42 @@ #define A3XX_GRAS_CL_USER_PLANE_Z5 0xCB6 #define A3XX_GRAS_CL_USER_PLANE_W5 0xCB7 #define A3XX_RB_GMEM_BASE_ADDR 0xCC0 +#define A3XX_RB_PERFCOUNTER0_SELECT 0xCC6 +#define A3XX_RB_PERFCOUNTER1_SELECT 0xCC7 +#define A3XX_HLSQ_PERFCOUNTER0_SELECT 0xE00 +#define A3XX_HLSQ_PERFCOUNTER1_SELECT 0xE01 +#define A3XX_HLSQ_PERFCOUNTER2_SELECT 0xE02 +#define A3XX_HLSQ_PERFCOUNTER3_SELECT 0xE03 +#define A3XX_HLSQ_PERFCOUNTER4_SELECT 0xE04 +#define A3XX_HLSQ_PERFCOUNTER5_SELECT 0xE05 #define A3XX_VFD_PERFCOUNTER0_SELECT 0xE44 +#define A3XX_VFD_PERFCOUNTER1_SELECT 0xE45 #define A3XX_VPC_VPC_DEBUG_RAM_SEL 0xE61 #define A3XX_VPC_VPC_DEBUG_RAM_READ 0xE62 +#define A3XX_VPC_PERFCOUNTER0_SELECT 0xE64 +#define A3XX_VPC_PERFCOUNTER1_SELECT 0xE65 #define A3XX_UCHE_CACHE_MODE_CONTROL_REG 0xE82 +#define A3XX_UCHE_PERFCOUNTER0_SELECT 0xE84 +#define A3XX_UCHE_PERFCOUNTER1_SELECT 0xE85 +#define A3XX_UCHE_PERFCOUNTER2_SELECT 0xE86 +#define A3XX_UCHE_PERFCOUNTER3_SELECT 0xE87 +#define A3XX_UCHE_PERFCOUNTER4_SELECT 0xE88 +#define A3XX_UCHE_PERFCOUNTER5_SELECT 0xE89 #define A3XX_UCHE_CACHE_INVALIDATE0_REG 0xEA0 +#define A3XX_SP_PERFCOUNTER0_SELECT 0xEC4 +#define A3XX_SP_PERFCOUNTER1_SELECT 0xEC5 +#define A3XX_SP_PERFCOUNTER2_SELECT 0xEC6 +#define A3XX_SP_PERFCOUNTER3_SELECT 0xEC7 +#define A3XX_SP_PERFCOUNTER4_SELECT 0xEC8 #define A3XX_SP_PERFCOUNTER5_SELECT 0xEC9 #define A3XX_SP_PERFCOUNTER6_SELECT 0xECA #define A3XX_SP_PERFCOUNTER7_SELECT 0xECB +#define A3XX_TP_PERFCOUNTER0_SELECT 0xF04 +#define A3XX_TP_PERFCOUNTER1_SELECT 0xF05 +#define A3XX_TP_PERFCOUNTER2_SELECT 0xF06 +#define A3XX_TP_PERFCOUNTER3_SELECT 0xF07 +#define A3XX_TP_PERFCOUNTER4_SELECT 0xF08 +#define A3XX_TP_PERFCOUNTER5_SELECT 0xF09 #define A3XX_GRAS_CL_CLIP_CNTL 0x2040 #define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044 #define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048 @@ -232,12 +357,14 @@ #define A3XX_SP_VS_OUT_REG_7 0x22CE #define A3XX_SP_VS_VPC_DST_REG_0 0x22D0 #define A3XX_SP_VS_OBJ_OFFSET_REG 0x22D4 +#define A3XX_SP_VS_OBJ_START_REG 0x22D5 #define A3XX_SP_VS_PVT_MEM_ADDR_REG 0x22D7 #define A3XX_SP_VS_PVT_MEM_SIZE_REG 0x22D8 #define A3XX_SP_VS_LENGTH_REG 0x22DF #define A3XX_SP_FS_CTRL_REG0 0x22E0 #define A3XX_SP_FS_CTRL_REG1 0x22E1 #define A3XX_SP_FS_OBJ_OFFSET_REG 0x22E2 +#define A3XX_SP_FS_OBJ_START_REG 0x22E3 #define A3XX_SP_FS_PVT_MEM_ADDR_REG 0x22E5 #define A3XX_SP_FS_PVT_MEM_SIZE_REG 0x22E6 #define A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x22E8 @@ -269,10 +396,25 @@ #define A3XX_VBIF_OUT_AXI_AMEMTYPE_CONF0 0x3058 #define A3XX_VBIF_OUT_AXI_AOOO_EN 0x305E #define A3XX_VBIF_OUT_AXI_AOOO 0x305F +#define A3XX_VBIF_PERF_CNT_EN 0x3070 +#define A3XX_VBIF_PERF_CNT_CLR 0x3071 +#define A3XX_VBIF_PERF_CNT_SEL 0x3072 +#define A3XX_VBIF_PERF_CNT0_LO 0x3073 +#define A3XX_VBIF_PERF_CNT0_HI 0x3074 +#define A3XX_VBIF_PERF_CNT1_LO 0x3075 +#define A3XX_VBIF_PERF_CNT1_HI 0x3076 +#define A3XX_VBIF_PERF_PWR_CNT0_LO 0x3077 +#define A3XX_VBIF_PERF_PWR_CNT0_HI 0x3078 +#define A3XX_VBIF_PERF_PWR_CNT1_LO 0x3079 +#define A3XX_VBIF_PERF_PWR_CNT1_HI 0x307a +#define A3XX_VBIF_PERF_PWR_CNT2_LO 0x307b +#define A3XX_VBIF_PERF_PWR_CNT2_HI 0x307c /* Bit flags for RBBM_CTL */ -#define RBBM_RBBM_CTL_RESET_PWR_CTR1 (1 << 1) -#define RBBM_RBBM_CTL_ENABLE_PWR_CTR1 (1 << 17) +#define RBBM_RBBM_CTL_RESET_PWR_CTR0 BIT(0) +#define RBBM_RBBM_CTL_RESET_PWR_CTR1 BIT(1) +#define RBBM_RBBM_CTL_ENABLE_PWR_CTR0 BIT(16) +#define RBBM_RBBM_CTL_ENABLE_PWR_CTR1 BIT(17) /* Various flags used by the context switch code */ @@ -537,7 +679,13 @@ #define RBBM_BLOCK_ID_MARB_3 0x2b /* RBBM_CLOCK_CTL default value */ -#define A3XX_RBBM_CLOCK_CTL_DEFAULT 0xBFFFFFFF +#define A305_RBBM_CLOCK_CTL_DEFAULT 0xAAAAAAAA +#define A320_RBBM_CLOCK_CTL_DEFAULT 0xBFFFFFFF +#define A330_RBBM_CLOCK_CTL_DEFAULT 0xAAAAAAAE +#define A330v2_RBBM_CLOCK_CTL_DEFAULT 0xAAAAAAAA + +#define A330_RBBM_GPR0_CTL_DEFAULT 0x0AE2B8AE +#define A330v2_RBBM_GPR0_CTL_DEFAULT 0x0AA2A8AA /* COUNTABLE FOR SP PERFCOUNTER */ #define SP_FS_FULL_ALU_INSTRUCTIONS 0x0E @@ -545,4 +693,20 @@ #define SP0_ICL1_MISSES 0x1A #define SP_FS_CFLOW_INSTRUCTIONS 0x0C +/* VBIF PERFCOUNTER ENA/CLR values */ +#define VBIF_PERF_CNT_0 BIT(0) +#define VBIF_PERF_CNT_1 BIT(1) +#define VBIF_PERF_PWR_CNT_0 BIT(2) +#define VBIF_PERF_PWR_CNT_1 BIT(3) +#define VBIF_PERF_PWR_CNT_2 BIT(4) + +/* VBIF PERFCOUNTER SEL values */ +#define VBIF_PERF_CNT_0_SEL 0 +#define VBIF_PERF_CNT_0_SEL_MASK 0x7f +#define VBIF_PERF_CNT_1_SEL 8 +#define VBIF_PERF_CNT_1_SEL_MASK 0x7f00 + +/* VBIF countables */ +#define VBIF_DDR_TOTAL_CYCLES 110 + #endif diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index 1ad44646fa7cb3a179e02c3d978505e82f4325d4..4eb982186e734fae63d9255f9e1ec19cb2dcbaf3 100644 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -30,10 +30,10 @@ #include "kgsl_cffdump.h" #include "kgsl_sharedmem.h" #include "kgsl_iommu.h" -#include "kgsl_trace.h" #include "adreno.h" #include "adreno_pm4types.h" +#include "adreno_trace.h" #include "a2xx_reg.h" #include "a3xx_reg.h" @@ -117,19 +117,10 @@ static struct adreno_device device_3d0 = { .ib_check_level = 0, }; -/* This set of registers are used for Hang detection - * If the values of these registers are same after - * KGSL_TIMEOUT_PART time, GPU hang is reported in - * kernel log. - * *****ALERT******ALERT********ALERT************* - * Order of registers below is important, registers - * from LONG_IB_DETECT_REG_INDEX_START to - * LONG_IB_DETECT_REG_INDEX_END are used in long ib detection. - */ #define LONG_IB_DETECT_REG_INDEX_START 1 #define LONG_IB_DETECT_REG_INDEX_END 5 -unsigned int ft_detect_regs[] = { +unsigned int ft_detect_regs[FT_DETECT_REGS_COUNT] = { A3XX_RBBM_STATUS, REG_CP_RB_RPTR, /* LONG_IB_DETECT_REG_INDEX_START */ REG_CP_IB1_BASE, @@ -144,8 +135,6 @@ unsigned int ft_detect_regs[] = { 0 }; -const unsigned int ft_detect_regs_count = ARRAY_SIZE(ft_detect_regs); - /* * This is the master list of all GPU cores that are supported by this * driver. @@ -206,31 +195,323 @@ static const struct { { ADRENO_REV_A320, 3, 2, ANY_ID, ANY_ID, "a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev, 512, 0, 2, SZ_512K, 0x3FF037, 0x3FF016 }, - { ADRENO_REV_A330, 3, 3, 0, 0, + { ADRENO_REV_A330, 3, 3, 0, ANY_ID, "a330_pm4.fw", "a330_pfp.fw", &adreno_a3xx_gpudev, 512, 0, 2, SZ_1M, NO_VER, NO_VER }, }; -static irqreturn_t adreno_irq_handler(struct kgsl_device *device) +/** + * adreno_perfcounter_init: Reserve kernel performance counters + * @device: device to configure + * + * The kernel needs/wants a certain group of performance counters for + * its own activities. Reserve these performance counters at init time + * to ensure that they are always reserved for the kernel. The performance + * counters used by the kernel can be obtained by the user, but these + * performance counters will remain active as long as the device is alive. + */ + +static void adreno_perfcounter_init(struct kgsl_device *device) { - irqreturn_t result; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - result = adreno_dev->gpudev->irq_handler(adreno_dev); + if (adreno_dev->gpudev->perfcounter_init) + adreno_dev->gpudev->perfcounter_init(adreno_dev); +}; + +/** + * adreno_perfcounter_start: Enable performance counters + * @adreno_dev: Adreno device to configure + * + * Ensure all performance counters are enabled that are allocated. Since + * the device was most likely stopped, we can't trust that the counters + * are still valid so make it so. + */ + +static void adreno_perfcounter_start(struct adreno_device *adreno_dev) +{ + struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters; + struct adreno_perfcount_group *group; + unsigned int i, j; + + /* group id iter */ + for (i = 0; i < counters->group_count; i++) { + group = &(counters->groups[i]); + + /* countable iter */ + for (j = 0; j < group->reg_count; j++) { + if (group->regs[j].countable == + KGSL_PERFCOUNTER_NOT_USED) + continue; - if (device->requested_state == KGSL_STATE_NONE) { - if (device->pwrctrl.nap_allowed == true) { - kgsl_pwrctrl_request_state(device, KGSL_STATE_NAP); - queue_work(device->work_queue, &device->idle_check_ws); - } else if (device->pwrscale.policy != NULL) { - queue_work(device->work_queue, &device->idle_check_ws); + if (adreno_dev->gpudev->perfcounter_enable) + adreno_dev->gpudev->perfcounter_enable( + adreno_dev, i, j, + group->regs[j].countable); } } +} - /* Reset the time-out in our idle timer */ - mod_timer_pending(&device->idle_timer, - jiffies + device->pwrctrl.interval_timeout); - return result; +/** + * adreno_perfcounter_read_group: Determine which countables are in counters + * @adreno_dev: Adreno device to configure + * @reads: List of kgsl_perfcounter_read_groups + * @count: Length of list + * + * Read the performance counters for the groupid/countable pairs and return + * the 64 bit result for each pair + */ + +int adreno_perfcounter_read_group(struct adreno_device *adreno_dev, + struct kgsl_perfcounter_read_group *reads, unsigned int count) +{ + struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters; + struct adreno_perfcount_group *group; + struct kgsl_perfcounter_read_group *list = NULL; + unsigned int i, j; + int ret = 0; + + /* perfcounter get/put/query/read not allowed on a2xx */ + if (adreno_is_a2xx(adreno_dev)) + return -EINVAL; + + /* sanity check for later */ + if (!adreno_dev->gpudev->perfcounter_read) + return -EINVAL; + + /* sanity check params passed in */ + if (reads == NULL || count == 0 || count > 100) + return -EINVAL; + + /* verify valid inputs group ids and countables */ + for (i = 0; i < count; i++) { + if (reads[i].groupid >= counters->group_count) + return -EINVAL; + } + + list = kmalloc(sizeof(struct kgsl_perfcounter_read_group) * count, + GFP_KERNEL); + if (!list) + return -ENOMEM; + + if (copy_from_user(list, reads, + sizeof(struct kgsl_perfcounter_read_group) * count)) { + ret = -EFAULT; + goto done; + } + + /* list iterator */ + for (j = 0; j < count; j++) { + list[j].value = 0; + + group = &(counters->groups[list[j].groupid]); + + /* group/counter iterator */ + for (i = 0; i < group->reg_count; i++) { + if (group->regs[i].countable == list[j].countable) { + list[j].value = + adreno_dev->gpudev->perfcounter_read( + adreno_dev, list[j].groupid, + i, group->regs[i].offset); + break; + } + } + } + + /* write the data */ + if (copy_to_user(reads, list, + sizeof(struct kgsl_perfcounter_read_group) * + count) != 0) + ret = -EFAULT; + +done: + kfree(list); + return ret; +} + +/** + * adreno_perfcounter_query_group: Determine which countables are in counters + * @adreno_dev: Adreno device to configure + * @groupid: Desired performance counter group + * @countables: Return list of all countables in the groups counters + * @count: Max length of the array + * @max_counters: max counters for the groupid + * + * Query the current state of counters for the group. + */ + +int adreno_perfcounter_query_group(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int *countables, unsigned int count, + unsigned int *max_counters) +{ + struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters; + struct adreno_perfcount_group *group; + unsigned int i; + + *max_counters = 0; + + /* perfcounter get/put/query not allowed on a2xx */ + if (adreno_is_a2xx(adreno_dev)) + return -EINVAL; + + if (groupid >= counters->group_count) + return -EINVAL; + + group = &(counters->groups[groupid]); + *max_counters = group->reg_count; + + /* + * if NULL countable or *count of zero, return max reg_count in + * *max_counters and return success + */ + if (countables == NULL || count == 0) + return 0; + + /* + * Go through all available counters. Write upto *count * countable + * values. + */ + for (i = 0; i < group->reg_count && i < count; i++) { + if (copy_to_user(&countables[i], &(group->regs[i].countable), + sizeof(unsigned int)) != 0) + return -EFAULT; + } + + return 0; +} + +/** + * adreno_perfcounter_get: Try to put a countable in an available counter + * @adreno_dev: Adreno device to configure + * @groupid: Desired performance counter group + * @countable: Countable desired to be in a counter + * @offset: Return offset of the countable + * @flags: Used to setup kernel perf counters + * + * Try to place a countable in an available counter. If the countable is + * already in a counter, reference count the counter/countable pair resource + * and return success + */ + +int adreno_perfcounter_get(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable, unsigned int *offset, + unsigned int flags) +{ + struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters; + struct adreno_perfcount_group *group; + unsigned int i, empty = -1; + + /* always clear return variables */ + if (offset) + *offset = 0; + + /* perfcounter get/put/query not allowed on a2xx */ + if (adreno_is_a2xx(adreno_dev)) + return -EINVAL; + + if (groupid >= counters->group_count) + return -EINVAL; + + group = &(counters->groups[groupid]); + + /* + * Check if the countable is already associated with a counter. + * Refcount and return the offset, otherwise, try and find an empty + * counter and assign the countable to it. + */ + for (i = 0; i < group->reg_count; i++) { + if (group->regs[i].countable == countable) { + /* Countable already associated with counter */ + group->regs[i].refcount++; + group->regs[i].flags |= flags; + if (offset) + *offset = group->regs[i].offset; + return 0; + } else if (group->regs[i].countable == + KGSL_PERFCOUNTER_NOT_USED) { + /* keep track of unused counter */ + empty = i; + } + } + + /* no available counters, so do nothing else */ + if (empty == -1) + return -EBUSY; + + /* initialize the new counter */ + group->regs[empty].countable = countable; + group->regs[empty].refcount = 1; + + /* enable the new counter */ + adreno_dev->gpudev->perfcounter_enable(adreno_dev, groupid, empty, + countable); + + group->regs[empty].flags = flags; + + if (offset) + *offset = group->regs[empty].offset; + + return 0; +} + + +/** + * adreno_perfcounter_put: Release a countable from counter resource + * @adreno_dev: Adreno device to configure + * @groupid: Desired performance counter group + * @countable: Countable desired to be freed from a counter + * + * Put a performance counter/countable pair that was previously received. If + * noone else is using the countable, free up the counter for others. + */ +int adreno_perfcounter_put(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable) +{ + struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters; + struct adreno_perfcount_group *group; + + unsigned int i; + + /* perfcounter get/put/query not allowed on a2xx */ + if (adreno_is_a2xx(adreno_dev)) + return -EINVAL; + + if (groupid >= counters->group_count) + return -EINVAL; + + group = &(counters->groups[groupid]); + + for (i = 0; i < group->reg_count; i++) { + if (group->regs[i].countable == countable) { + if (group->regs[i].refcount > 0) { + group->regs[i].refcount--; + + /* + * book keeping to ensure we never free a + * perf counter used by kernel + */ + if (group->regs[i].flags && + group->regs[i].refcount == 0) + group->regs[i].refcount++; + + /* make available if not used */ + if (group->regs[i].refcount == 0) + group->regs[i].countable = + KGSL_PERFCOUNTER_NOT_USED; + } + + return 0; + } + } + + return -EINVAL; +} + +static irqreturn_t adreno_irq_handler(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + return adreno_dev->gpudev->irq_handler(adreno_dev); } static void adreno_cleanup_pt(struct kgsl_device *device, @@ -255,23 +536,19 @@ static int adreno_setup_pt(struct kgsl_device *device, struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; - result = kgsl_mmu_map_global(pagetable, &rb->buffer_desc, - GSL_PT_PAGE_RV); + result = kgsl_mmu_map_global(pagetable, &rb->buffer_desc); if (result) goto error; - result = kgsl_mmu_map_global(pagetable, &rb->memptrs_desc, - GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); + result = kgsl_mmu_map_global(pagetable, &rb->memptrs_desc); if (result) goto unmap_buffer_desc; - result = kgsl_mmu_map_global(pagetable, &device->memstore, - GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); + result = kgsl_mmu_map_global(pagetable, &device->memstore); if (result) goto unmap_memptrs_desc; - result = kgsl_mmu_map_global(pagetable, &device->mmu.setstate_memory, - GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); + result = kgsl_mmu_map_global(pagetable, &device->mmu.setstate_memory); if (result) goto unmap_memstore_desc; @@ -297,7 +574,7 @@ error: return result; } -static void adreno_iommu_setstate(struct kgsl_device *device, +static int adreno_iommu_setstate(struct kgsl_device *device, unsigned int context_id, uint32_t flags) { @@ -309,13 +586,15 @@ static void adreno_iommu_setstate(struct kgsl_device *device, int num_iommu_units, i; struct kgsl_context *context; struct adreno_context *adreno_ctx = NULL; + int result = 0; /* * If we're idle and we don't need to use the GPU to save context * state, use the CPU instead of the GPU to reprogram the * iommu for simplicity's sake. */ - if (!adreno_dev->drawctxt_active || device->ftbl->isidle(device)) + if (!adreno_dev->drawctxt_active || device->ftbl->isidle(device) || + !atomic_read(&device->active_cnt)) return kgsl_mmu_device_setstate(&device->mmu, flags); num_iommu_units = kgsl_mmu_get_num_iommu_units(&device->mmu); @@ -323,11 +602,13 @@ static void adreno_iommu_setstate(struct kgsl_device *device, context = kgsl_context_get(device, context_id); if (context == NULL) - return; - adreno_ctx = context->devctxt; + return 0; + + adreno_ctx = ADRENO_CONTEXT(context); + + result = kgsl_mmu_enable_clk(&device->mmu, KGSL_IOMMU_CONTEXT_USER); - if (kgsl_mmu_enable_clk(&device->mmu, - KGSL_IOMMU_CONTEXT_USER)) + if (result) goto done; cmds += __adreno_add_idle_indirect_cmds(cmds, @@ -429,17 +710,28 @@ static void adreno_iommu_setstate(struct kgsl_device *device, sizedwords += (cmds - &link[0]); if (sizedwords) { + /* invalidate all base pointers */ *cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1); *cmds++ = 0x7fff; sizedwords += 2; /* This returns the per context timestamp but we need to * use the global timestamp for iommu clock disablement */ - adreno_ringbuffer_issuecmds(device, adreno_ctx, + result = adreno_ringbuffer_issuecmds(device, adreno_ctx, KGSL_CMD_FLAGS_PMODE, &link[0], sizedwords); + + if (result) { + /* On error disable the IOMMU clock right away */ + kgsl_mmu_disable_clk(&device->mmu); + goto done; + } + kgsl_mmu_disable_clk_on_ts(&device->mmu, adreno_dev->ringbuffer.global_ts, true); + + if (result) + goto done; } if (sizedwords > (sizeof(link)/sizeof(unsigned int))) { @@ -448,9 +740,10 @@ static void adreno_iommu_setstate(struct kgsl_device *device, } done: kgsl_context_put(context); + return result; } -static void adreno_gpummu_setstate(struct kgsl_device *device, +static int adreno_gpummu_setstate(struct kgsl_device *device, unsigned int context_id, uint32_t flags) { @@ -461,6 +754,7 @@ static void adreno_gpummu_setstate(struct kgsl_device *device, unsigned int mh_mmu_invalidate = 0x00000003; /*invalidate all and tc */ struct kgsl_context *context; struct adreno_context *adreno_ctx = NULL; + int ret = 0; /* * Fix target freeze issue by adding TLB flush for each submit @@ -477,8 +771,9 @@ static void adreno_gpummu_setstate(struct kgsl_device *device, if (!kgsl_cff_dump_enable && adreno_dev->drawctxt_active) { context = kgsl_context_get(device, context_id); if (context == NULL) - return; - adreno_ctx = context->devctxt; + return -EINVAL; + + adreno_ctx = ADRENO_CONTEXT(context); if (flags & KGSL_MMUFLAGS_PTUPDATE) { /* wait for graphics pipe to be idle */ @@ -552,7 +847,7 @@ static void adreno_gpummu_setstate(struct kgsl_device *device, sizedwords += 2; } - adreno_ringbuffer_issuecmds(device, adreno_ctx, + ret = adreno_ringbuffer_issuecmds(device, adreno_ctx, KGSL_CMD_FLAGS_PMODE, &link[0], sizedwords); @@ -560,9 +855,11 @@ static void adreno_gpummu_setstate(struct kgsl_device *device, } else { kgsl_mmu_device_setstate(&device->mmu, flags); } + + return ret; } -static void adreno_setstate(struct kgsl_device *device, +static int adreno_setstate(struct kgsl_device *device, unsigned int context_id, uint32_t flags) { @@ -571,6 +868,8 @@ static void adreno_setstate(struct kgsl_device *device, return adreno_gpummu_setstate(device, context_id, flags); else if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) return adreno_iommu_setstate(device, context_id, flags); + + return 0; } static unsigned int @@ -1188,6 +1487,10 @@ adreno_probe(struct platform_device *pdev) if (status) goto error_close_rb; + status = adreno_dispatcher_init(adreno_dev); + if (status) + goto error_close_device; + adreno_debugfs_init(device); kgsl_pwrscale_init(device); @@ -1196,6 +1499,8 @@ adreno_probe(struct platform_device *pdev) device->flags &= ~KGSL_FLAGS_SOFT_RESET; return 0; +error_close_device: + kgsl_device_platform_remove(device); error_close_rb: adreno_ringbuffer_close(&adreno_dev->ringbuffer); error: @@ -1215,19 +1520,18 @@ static int __devexit adreno_remove(struct platform_device *pdev) kgsl_pwrscale_detach_policy(device); kgsl_pwrscale_close(device); + adreno_dispatcher_close(adreno_dev); adreno_ringbuffer_close(&adreno_dev->ringbuffer); kgsl_device_platform_remove(device); return 0; } -static int adreno_start(struct kgsl_device *device, unsigned int init_ram) +static int adreno_init(struct kgsl_device *device) { - int status = -EINVAL; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - if (KGSL_STATE_DUMP_AND_FT != device->state) - kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT); + kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT); /* Power up the device */ kgsl_pwrctrl_enable(device); @@ -1250,10 +1554,9 @@ static int adreno_start(struct kgsl_device *device, unsigned int init_ram) if (adreno_dev->gpurev == ADRENO_REV_UNKNOWN) { KGSL_DRV_ERR(device, "Unknown chip ID %x\n", adreno_dev->chip_id); - goto error_clk_off; + BUG_ON(1); } - /* * Check if firmware supports the sync lock PM4 packets needed * for IOMMUv1 @@ -1265,7 +1568,32 @@ static int adreno_start(struct kgsl_device *device, unsigned int init_ram) adreno_gpulist[adreno_dev->gpulist_index].sync_lock_pfp_ver)) device->mmu.flags |= KGSL_MMU_FLAGS_IOMMU_SYNC; - /* Set up the MMU */ + /* Assign correct RBBM status register to hang detect regs + */ + ft_detect_regs[0] = adreno_dev->gpudev->reg_rbbm_status; + + adreno_perfcounter_init(device); + + /* Power down the device */ + kgsl_pwrctrl_disable(device); + + return 0; +} + +static int adreno_start(struct kgsl_device *device) +{ + int status = -EINVAL; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int state = device->state; + + kgsl_cffdump_open(device); + + kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT); + + /* Power up the device */ + kgsl_pwrctrl_enable(device); + + /* Set up a2xx special case */ if (adreno_is_a2xx(adreno_dev)) { /* * the MH_CLNT_INTF_CTRL_CONFIG registers aren't present @@ -1279,20 +1607,6 @@ static int adreno_start(struct kgsl_device *device, unsigned int init_ram) kgsl_mh_start(device); } - /* Assign correct RBBM status register to hang detect regs - */ - ft_detect_regs[0] = adreno_dev->gpudev->reg_rbbm_status; - - /* Add A3XX specific registers for hang detection */ - if (adreno_is_a3xx(adreno_dev)) { - ft_detect_regs[6] = A3XX_RBBM_PERFCTR_SP_7_LO; - ft_detect_regs[7] = A3XX_RBBM_PERFCTR_SP_7_HI; - ft_detect_regs[8] = A3XX_RBBM_PERFCTR_SP_6_LO; - ft_detect_regs[9] = A3XX_RBBM_PERFCTR_SP_6_HI; - ft_detect_regs[10] = A3XX_RBBM_PERFCTR_SP_5_LO; - ft_detect_regs[11] = A3XX_RBBM_PERFCTR_SP_5_HI; - } - status = kgsl_mmu_start(device); if (status) goto error_clk_off; @@ -1309,17 +1623,14 @@ static int adreno_start(struct kgsl_device *device, unsigned int init_ram) kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON); device->ftbl->irqctrl(device, 1); - status = adreno_ringbuffer_start(&adreno_dev->ringbuffer, init_ram); + status = adreno_ringbuffer_start(&adreno_dev->ringbuffer); if (status) goto error_irq_off; - /* - * While recovery is on we do not want timer to - * fire and attempt to change any device state - */ + /* Start the dispatcher */ + adreno_dispatcher_start(adreno_dev); - if (KGSL_STATE_DUMP_AND_FT != device->state) - mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT); + adreno_perfcounter_start(adreno_dev); device->reset_counter++; @@ -1332,7 +1643,11 @@ error_mmu_off: kgsl_mmu_stop(&device->mmu); error_clk_off: - kgsl_pwrctrl_disable(device); + if (KGSL_STATE_DUMP_AND_FT != device->state) { + kgsl_pwrctrl_disable(device); + /* set the state back to original state */ + kgsl_pwrctrl_set_state(device, state); + } return status; } @@ -1341,8 +1656,12 @@ static int adreno_stop(struct kgsl_device *device) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + if (adreno_dev->drawctxt_active) + kgsl_context_put(&adreno_dev->drawctxt_active->base); + adreno_dev->drawctxt_active = NULL; + adreno_dispatcher_stop(adreno_dev); adreno_ringbuffer_stop(&adreno_dev->ringbuffer); kgsl_mmu_stop(&device->mmu); @@ -1356,790 +1675,59 @@ static int adreno_stop(struct kgsl_device *device) /* Power down the device */ kgsl_pwrctrl_disable(device); + kgsl_cffdump_close(device->id); + return 0; } -static void adreno_mark_context_status(struct kgsl_device *device, - int ft_status) +/** + * adreno_reset() - Helper function to reset the GPU + * @device: Pointer to the KGSL device structure for the GPU + * + * Helper function to reset the GPU hardware by toggling the footswitch + */ +int adreno_reset(struct kgsl_device *device) { - struct kgsl_context *context; - int next = 0; - /* - * Set the reset status of all contexts to - * INNOCENT_CONTEXT_RESET_EXT except for the bad context - * since thats the guilty party, if fault tolerance failed then - * mark all as guilty - */ + int ret; - rcu_read_lock(); - while ((context = idr_get_next(&device->context_idr, &next))) { - struct adreno_context *adreno_context = context->devctxt; - if (ft_status) { - context->reset_status = - KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT; - adreno_context->flags |= CTXT_FLAGS_GPU_HANG; - } else if (KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT != - context->reset_status) { - if (adreno_context->flags & (CTXT_FLAGS_GPU_HANG | - CTXT_FLAGS_GPU_HANG_FT)) - context->reset_status = - KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT; - else - context->reset_status = - KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT; - } - next = next + 1; - } - rcu_read_unlock(); -} + ret = adreno_stop(device); + if (ret) + return ret; -static void adreno_set_max_ts_for_bad_ctxs(struct kgsl_device *device) -{ - struct kgsl_context *context; - struct adreno_context *temp_adreno_context; - int next = 0; + ret = adreno_init(device); + if (ret) + return ret; - rcu_read_lock(); - while ((context = idr_get_next(&device->context_idr, &next))) { - temp_adreno_context = context->devctxt; - if (temp_adreno_context->flags & CTXT_FLAGS_GPU_HANG) { - kgsl_sharedmem_writel(&device->memstore, - KGSL_MEMSTORE_OFFSET(context->id, - soptimestamp), - temp_adreno_context->timestamp); - kgsl_sharedmem_writel(&device->memstore, - KGSL_MEMSTORE_OFFSET(context->id, - eoptimestamp), - temp_adreno_context->timestamp); - } - next = next + 1; - } - rcu_read_unlock(); -} + ret = adreno_start(device); -static void adreno_destroy_ft_data(struct adreno_ft_data *ft_data) -{ - vfree(ft_data->rb_buffer); - vfree(ft_data->bad_rb_buffer); - vfree(ft_data->good_rb_buffer); -} + if (ret == 0) { + /* + * If active_cnt is non-zero then the system was active before + * going into a reset - put it back in that state + */ -static int _find_start_of_cmd_seq(struct adreno_ringbuffer *rb, - unsigned int *ptr, - bool inc) -{ - int status = -EINVAL; - unsigned int val1; - unsigned int size = rb->buffer_desc.size; - unsigned int start_ptr = *ptr; - - while ((start_ptr / sizeof(unsigned int)) != rb->wptr) { - if (inc) - start_ptr = adreno_ringbuffer_inc_wrapped(start_ptr, - size); - else - start_ptr = adreno_ringbuffer_dec_wrapped(start_ptr, - size); - kgsl_sharedmem_readl(&rb->buffer_desc, &val1, start_ptr); - if (KGSL_CMD_IDENTIFIER == val1) { - if ((start_ptr / sizeof(unsigned int)) != rb->wptr) - start_ptr = adreno_ringbuffer_dec_wrapped( - start_ptr, size); - *ptr = start_ptr; - status = 0; - break; - } + if (atomic_read(&device->active_cnt)) + kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); } - return status; + + return ret; } -static int _find_cmd_seq_after_eop_ts(struct adreno_ringbuffer *rb, - unsigned int *rb_rptr, - unsigned int global_eop, - bool inc) +static int adreno_getproperty(struct kgsl_device *device, + enum kgsl_property_type type, + void *value, + unsigned int sizebytes) { int status = -EINVAL; - unsigned int temp_rb_rptr = *rb_rptr; - unsigned int size = rb->buffer_desc.size; - unsigned int val[3]; - int i = 0; - bool check = false; - - if (inc && temp_rb_rptr / sizeof(unsigned int) != rb->wptr) - return status; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - do { - /* - * when decrementing we need to decrement first and - * then read make sure we cover all the data - */ - if (!inc) - temp_rb_rptr = adreno_ringbuffer_dec_wrapped( - temp_rb_rptr, size); - kgsl_sharedmem_readl(&rb->buffer_desc, &val[i], - temp_rb_rptr); - - if (check && ((inc && val[i] == global_eop) || - (!inc && (val[i] == - cp_type3_packet(CP_MEM_WRITE, 2) || - val[i] == CACHE_FLUSH_TS)))) { - /* decrement i, i.e i = (i - 1 + 3) % 3 if - * we are going forward, else increment i */ - i = (i + 2) % 3; - if (val[i] == rb->device->memstore.gpuaddr + - KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, - eoptimestamp)) { - int j = ((i + 2) % 3); - if ((inc && (val[j] == CACHE_FLUSH_TS || - val[j] == cp_type3_packet( - CP_MEM_WRITE, 2))) || - (!inc && val[j] == global_eop)) { - /* Found the global eop */ - status = 0; - break; - } - } - /* if no match found then increment i again - * since we decremented before matching */ - i = (i + 1) % 3; - } - if (inc) - temp_rb_rptr = adreno_ringbuffer_inc_wrapped( - temp_rb_rptr, size); - - i = (i + 1) % 3; - if (2 == i) - check = true; - } while (temp_rb_rptr / sizeof(unsigned int) != rb->wptr); - /* temp_rb_rptr points to the command stream after global eop, - * move backward till the start of command sequence */ - if (!status) { - status = _find_start_of_cmd_seq(rb, &temp_rb_rptr, false); - if (!status) { - *rb_rptr = temp_rb_rptr; - KGSL_FT_INFO(rb->device, - "Offset of cmd sequence after eop timestamp: 0x%x\n", - temp_rb_rptr / sizeof(unsigned int)); - } - } - if (status) - KGSL_FT_ERR(rb->device, - "Failed to find the command sequence after eop timestamp\n"); - return status; -} + switch (type) { + case KGSL_PROP_DEVICE_INFO: + { + struct kgsl_devinfo devinfo; -static int _find_hanging_ib_sequence(struct adreno_ringbuffer *rb, - unsigned int *rb_rptr, - unsigned int ib1) -{ - int status = -EINVAL; - unsigned int temp_rb_rptr = *rb_rptr; - unsigned int size = rb->buffer_desc.size; - unsigned int val[2]; - int i = 0; - bool check = false; - bool ctx_switch = false; - - while (temp_rb_rptr / sizeof(unsigned int) != rb->wptr) { - kgsl_sharedmem_readl(&rb->buffer_desc, &val[i], temp_rb_rptr); - - if (check && val[i] == ib1) { - /* decrement i, i.e i = (i - 1 + 2) % 2 */ - i = (i + 1) % 2; - if (adreno_cmd_is_ib(val[i])) { - /* go till start of command sequence */ - status = _find_start_of_cmd_seq(rb, - &temp_rb_rptr, false); - - KGSL_FT_INFO(rb->device, - "Found the hanging IB at offset 0x%x\n", - temp_rb_rptr / sizeof(unsigned int)); - break; - } - /* if no match the increment i since we decremented - * before checking */ - i = (i + 1) % 2; - } - /* Make sure you do not encounter a context switch twice, we can - * encounter it once for the bad context as the start of search - * can point to the context switch */ - if (val[i] == KGSL_CONTEXT_TO_MEM_IDENTIFIER) { - if (ctx_switch) { - KGSL_FT_ERR(rb->device, - "Context switch encountered before bad " - "IB found\n"); - break; - } - ctx_switch = true; - } - i = (i + 1) % 2; - if (1 == i) - check = true; - temp_rb_rptr = adreno_ringbuffer_inc_wrapped(temp_rb_rptr, - size); - } - if (!status) - *rb_rptr = temp_rb_rptr; - return status; -} - -static int adreno_setup_ft_data(struct kgsl_device *device, - struct adreno_ft_data *ft_data) -{ - int ret = 0; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; - struct kgsl_context *context; - struct adreno_context *adreno_context; - unsigned int rb_rptr = rb->wptr * sizeof(unsigned int); - - memset(ft_data, 0, sizeof(*ft_data)); - ft_data->start_of_replay_cmds = 0xFFFFFFFF; - ft_data->replay_for_snapshot = 0xFFFFFFFF; - - adreno_regread(device, REG_CP_IB1_BASE, &ft_data->ib1); - - kgsl_sharedmem_readl(&device->memstore, &ft_data->context_id, - KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, - current_context)); - - kgsl_sharedmem_readl(&device->memstore, - &ft_data->global_eop, - KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, - eoptimestamp)); - - ft_data->rb_buffer = vmalloc(rb->buffer_desc.size); - if (!ft_data->rb_buffer) { - KGSL_MEM_ERR(device, "vmalloc(%d) failed\n", - rb->buffer_desc.size); - return -ENOMEM; - } - - ft_data->bad_rb_buffer = vmalloc(rb->buffer_desc.size); - if (!ft_data->bad_rb_buffer) { - KGSL_MEM_ERR(device, "vmalloc(%d) failed\n", - rb->buffer_desc.size); - ret = -ENOMEM; - goto done; - } - - ft_data->good_rb_buffer = vmalloc(rb->buffer_desc.size); - if (!ft_data->good_rb_buffer) { - KGSL_MEM_ERR(device, "vmalloc(%d) failed\n", - rb->buffer_desc.size); - ret = -ENOMEM; - goto done; - } - - ft_data->status = 0; - - /* find the start of bad command sequence in rb */ - context = idr_find(&device->context_idr, ft_data->context_id); - /* Look for the command stream that is right after the global eop */ - - if (!context) { - /* - * If there is no context then fault tolerance does not need to - * replay anything, just reset GPU and thats it - */ - goto done; - } - ret = _find_cmd_seq_after_eop_ts(rb, &rb_rptr, - ft_data->global_eop + 1, false); - if (ret) - goto done; - - ft_data->start_of_replay_cmds = rb_rptr; - - if (!adreno_dev->ft_policy) - adreno_dev->ft_policy = KGSL_FT_DEFAULT_POLICY; - - ft_data->ft_policy = adreno_dev->ft_policy; - - - adreno_context = context->devctxt; - if (adreno_context->flags & CTXT_FLAGS_PREAMBLE) { - if (ft_data->ib1) { - ret = _find_hanging_ib_sequence(rb, - &rb_rptr, ft_data->ib1); - if (ret) { - KGSL_FT_ERR(device, - "Start not found for replay IB sequence\n"); - ret = 0; - goto done; - } - ft_data->start_of_replay_cmds = rb_rptr; - ft_data->replay_for_snapshot = rb_rptr; - } - } - -done: - if (ret) { - vfree(ft_data->rb_buffer); - vfree(ft_data->bad_rb_buffer); - vfree(ft_data->good_rb_buffer); - } - return ret; -} - -static int -_adreno_check_long_ib(struct kgsl_device *device) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - unsigned int curr_global_ts = 0; - - /* check if the global ts is still the same */ - kgsl_sharedmem_readl(&device->memstore, - &curr_global_ts, - KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, - eoptimestamp)); - - /* Mark long ib as handled */ - adreno_dev->long_ib = 0; - - if (curr_global_ts == adreno_dev->long_ib_ts) { - KGSL_FT_ERR(device, - "IB ran too long, invalidate ctxt\n"); - return 1; - } else { - /* Do nothing GPU has gone ahead */ - KGSL_FT_INFO(device, "false long ib detection return\n"); - return 0; - } -} - -static int -_adreno_ft_restart_device(struct kgsl_device *device, - struct kgsl_context *context, - struct adreno_ft_data *ft_data) -{ - - struct adreno_context *adreno_context = context->devctxt; - - /* restart device */ - if (adreno_stop(device)) { - KGSL_FT_ERR(device, "Device stop failed\n"); - return 1; - } - - if (adreno_start(device, true)) { - KGSL_FT_ERR(device, "Device start failed\n"); - return 1; - } - - if (context) - kgsl_mmu_setstate(&device->mmu, adreno_context->pagetable, - KGSL_MEMSTORE_GLOBAL); - - /* If iommu is used then we need to make sure that the iommu clocks - * are on since there could be commands in pipeline that touch iommu */ - if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) { - if (kgsl_mmu_enable_clk(&device->mmu, - KGSL_IOMMU_CONTEXT_USER)) - return 1; - } - - return 0; -} - -static inline void -_adreno_debug_ft_info(struct kgsl_device *device, - struct adreno_ft_data *ft_data) -{ - - /* - * Dumping rb is a very useful tool to debug FT. - * It will tell us if we are extracting the rb correctly - * NOP'ing the right IB, skipping the EOF correctly etc. - */ - if (device->ft_log >= 7) { - - /* Print fault tolerance data here */ - KGSL_FT_INFO(device, "Temp RB buffer size 0x%X\n", - ft_data->rb_size); - adreno_dump_rb(device, ft_data->rb_buffer, - ft_data->rb_size<<2, 0, ft_data->rb_size); - - KGSL_FT_INFO(device, "Bad RB buffer size 0x%X\n", - ft_data->bad_rb_size); - adreno_dump_rb(device, ft_data->bad_rb_buffer, - ft_data->bad_rb_size<<2, 0, ft_data->bad_rb_size); - - KGSL_FT_INFO(device, "Good RB buffer size 0x%X\n", - ft_data->good_rb_size); - adreno_dump_rb(device, ft_data->good_rb_buffer, - ft_data->good_rb_size<<2, 0, ft_data->good_rb_size); - - } -} - -static int -_adreno_ft_resubmit_rb(struct kgsl_device *device, - struct adreno_ringbuffer *rb, - struct kgsl_context *context, - struct adreno_ft_data *ft_data, - unsigned int *buff, unsigned int size) -{ - unsigned int ret = 0; - - _adreno_debug_ft_info(device, ft_data); - - if (_adreno_ft_restart_device(device, context, ft_data)) - return 1; - - if (size) { - - /* submit commands and wait for them to pass */ - adreno_ringbuffer_restore(rb, buff, size); - - ret = adreno_idle(device); - } - - return ret; -} - - -static int -_adreno_ft(struct kgsl_device *device, - struct adreno_ft_data *ft_data) -{ - int ret = 0, i; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; - struct kgsl_context *context; - struct adreno_context *adreno_context = NULL; - struct adreno_context *last_active_ctx = adreno_dev->drawctxt_active; - - context = kgsl_context_get(device, ft_data->context_id); - - if (context == NULL) { - KGSL_FT_CRIT(device, "Last context unknown id:%d\n", - ft_data->context_id); - } else { - adreno_context = context->devctxt; - adreno_context->flags |= CTXT_FLAGS_GPU_HANG; - /* - * set the invalid ts flag to 0 for this context since we have - * detected a hang for it - */ - context->wait_on_invalid_ts = false; - - /* - * This flag will be set by userspace for contexts - * that do not want to be fault tolerant (ex: OPENCL) - */ - if (adreno_context->flags & CTXT_FLAGS_NO_FAULT_TOLERANCE) { - KGSL_FT_ERR(device, - "No FT set for this context play good cmds\n"); - goto play_good_cmds; - } - - } - - /* - * Extract valid contents from rb which can still be executed after - * hang - */ - adreno_ringbuffer_extract(rb, ft_data); - - /* Check if we detected a long running IB, - * if true do not attempt replay of bad cmds */ - if (adreno_dev->long_ib) { - if (_adreno_check_long_ib(device)) { - ft_data->status = 1; - _adreno_debug_ft_info(device, ft_data); - goto play_good_cmds; - } else { - adreno_context->flags &= ~CTXT_FLAGS_GPU_HANG; - return 0; - } - } - - /* Do not try the bad commands if hang is due to a fault */ - if (device->mmu.fault) { - KGSL_FT_ERR(device, "MMU fault skipping bad cmds\n"); - device->mmu.fault = 0; - goto play_good_cmds; - } - - if (ft_data->ft_policy & KGSL_FT_DISABLE) { - KGSL_FT_ERR(device, "NO FT policy play only good cmds\n"); - goto play_good_cmds; - } - - if (ft_data->ft_policy & KGSL_FT_REPLAY) { - - ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data, - ft_data->bad_rb_buffer, ft_data->bad_rb_size); - - if (ret) { - KGSL_FT_ERR(device, "Replay unsuccessful\n"); - ft_data->status = 1; - } else - goto play_good_cmds; - } - - if (ft_data->ft_policy & KGSL_FT_SKIPIB) { - - for (i = 0; i < ft_data->bad_rb_size; i++) { - if ((ft_data->bad_rb_buffer[i] == - CP_HDR_INDIRECT_BUFFER_PFD) && - (ft_data->bad_rb_buffer[i+1] == ft_data->ib1)) { - - ft_data->bad_rb_buffer[i] = cp_nop_packet(2); - ft_data->bad_rb_buffer[i+1] = - KGSL_NOP_IB_IDENTIFIER; - ft_data->bad_rb_buffer[i+2] = - KGSL_NOP_IB_IDENTIFIER; - break; - } - } - - if ((i == (ft_data->bad_rb_size)) || (!ft_data->ib1)) { - KGSL_FT_ERR(device, "Bad IB to NOP not found\n"); - ft_data->status = 1; - goto play_good_cmds; - } - - ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data, - ft_data->bad_rb_buffer, ft_data->bad_rb_size); - - if (ret) { - KGSL_FT_ERR(device, "NOP faulty IB unsuccessful\n"); - ft_data->status = 1; - } else { - ft_data->status = 0; - goto play_good_cmds; - } - } - - if (ft_data->ft_policy & KGSL_FT_SKIPFRAME) { - - for (i = 0; i < ft_data->bad_rb_size; i++) { - if (ft_data->bad_rb_buffer[i] == - KGSL_END_OF_FRAME_IDENTIFIER) { - ft_data->bad_rb_buffer[0] = cp_nop_packet(i); - break; - } - } - - /* EOF not found in RB, discard till EOF in - next IB submission */ - if (i == ft_data->bad_rb_size) { - adreno_context->flags |= CTXT_FLAGS_SKIP_EOF; - KGSL_FT_INFO(device, - "EOF not found in RB, skip next issueib till EOF\n"); - ft_data->bad_rb_buffer[0] = cp_nop_packet(i); - } - - ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data, - ft_data->bad_rb_buffer, ft_data->bad_rb_size); - - if (ret) { - KGSL_FT_ERR(device, "Skip EOF unsuccessful\n"); - ft_data->status = 1; - } else { - ft_data->status = 0; - goto play_good_cmds; - } - } - -play_good_cmds: - - if (ft_data->status) - KGSL_FT_ERR(device, "Bad context commands failed\n"); - else { - KGSL_FT_INFO(device, "Bad context commands success\n"); - - if (adreno_context) { - adreno_context->flags = (adreno_context->flags & - ~CTXT_FLAGS_GPU_HANG) | CTXT_FLAGS_GPU_HANG_FT; - } - adreno_dev->drawctxt_active = last_active_ctx; - } - - ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data, - ft_data->good_rb_buffer, ft_data->good_rb_size); - - if (ret) { - /* If we fail here we can try to invalidate another - * context and try fault tolerance again */ - ret = -EAGAIN; - KGSL_FT_ERR(device, "Playing good commands unsuccessful\n"); - goto done; - } else - KGSL_FT_INFO(device, "Playing good commands successful\n"); - - /* ringbuffer now has data from the last valid context id, - * so restore the active_ctx to the last valid context */ - if (ft_data->last_valid_ctx_id) { - struct kgsl_context *last_ctx = kgsl_context_get(device, - ft_data->last_valid_ctx_id); - - if (last_ctx) - adreno_dev->drawctxt_active = last_ctx->devctxt; - - kgsl_context_put(last_ctx); - } - -done: - /* Turn off iommu clocks */ - if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) - kgsl_mmu_disable_clk_on_ts(&device->mmu, 0, false); - - kgsl_context_put(context); - return ret; -} - -static int -adreno_ft(struct kgsl_device *device, - struct adreno_ft_data *ft_data) -{ - int ret = 0; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; - - KGSL_FT_INFO(device, - "Start Parameters: IB1: 0x%X, " - "Bad context_id: %u, global_eop: 0x%x\n", - ft_data->ib1, ft_data->context_id, ft_data->global_eop); - - KGSL_FT_INFO(device, "Last issued global timestamp: %x\n", - rb->global_ts); - - /* We may need to replay commands multiple times based on whether - * multiple contexts hang the GPU */ - while (true) { - - ret = _adreno_ft(device, ft_data); - - if (-EAGAIN == ret) { - /* setup new fault tolerance parameters and retry, this - * means more than 1 contexts are causing hang */ - adreno_destroy_ft_data(ft_data); - ret = adreno_setup_ft_data(device, ft_data); - if (ret) - goto done; - KGSL_FT_INFO(device, - "Retry. Parameters: " - "IB1: 0x%X, Bad context_id: %u, global_eop: 0x%x\n", - ft_data->ib1, ft_data->context_id, - ft_data->global_eop); - } else { - break; - } - } - - if (ret) - goto done; - - /* Restore correct states after fault tolerance */ - if (adreno_dev->drawctxt_active) - device->mmu.hwpagetable = - adreno_dev->drawctxt_active->pagetable; - else - device->mmu.hwpagetable = device->mmu.defaultpagetable; - kgsl_sharedmem_writel(&device->memstore, - KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, - eoptimestamp), rb->global_ts); - - /* switch to NULL ctxt */ - if (adreno_dev->drawctxt_active != NULL) - adreno_drawctxt_switch(adreno_dev, NULL, 0); - -done: - adreno_set_max_ts_for_bad_ctxs(device); - adreno_mark_context_status(device, ret); - KGSL_FT_ERR(device, "policy 0x%X status 0x%x\n", - ft_data->ft_policy, ret); - return ret; -} - -int -adreno_dump_and_exec_ft(struct kgsl_device *device) -{ - int result = -ETIMEDOUT; - struct adreno_ft_data ft_data; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct kgsl_pwrctrl *pwr = &device->pwrctrl; - unsigned int curr_pwrlevel; - - if (device->state == KGSL_STATE_HUNG) - goto done; - if (device->state == KGSL_STATE_DUMP_AND_FT) { - mutex_unlock(&device->mutex); - wait_for_completion(&device->ft_gate); - mutex_lock(&device->mutex); - if (device->state != KGSL_STATE_HUNG) - result = 0; - } else { - kgsl_pwrctrl_set_state(device, KGSL_STATE_DUMP_AND_FT); - INIT_COMPLETION(device->ft_gate); - /* Detected a hang */ - - /* Run fault tolerance at max power level */ - curr_pwrlevel = pwr->active_pwrlevel; - kgsl_pwrctrl_pwrlevel_change(device, pwr->max_pwrlevel); - - /* Get the fault tolerance data as soon as hang is detected */ - result = adreno_setup_ft_data(device, &ft_data); - - /* - * If long ib is detected, do not attempt postmortem or - * snapshot, if GPU is still executing commands - * we will get errors - */ - if (!adreno_dev->long_ib) { - /* - * Trigger an automatic dump of the state to - * the console - */ - kgsl_postmortem_dump(device, 0); - - /* - * Make a GPU snapshot. For now, do it after the - * PM dump so we can at least be sure the PM dump - * will work as it always has - */ - kgsl_device_snapshot(device, 1); - } - - if (!result) { - result = adreno_ft(device, &ft_data); - adreno_destroy_ft_data(&ft_data); - } - - /* restore power level */ - kgsl_pwrctrl_pwrlevel_change(device, curr_pwrlevel); - - if (result) { - kgsl_pwrctrl_set_state(device, KGSL_STATE_HUNG); - } else { - kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); - mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT); - } - complete_all(&device->ft_gate); - } -done: - return result; -} -EXPORT_SYMBOL(adreno_dump_and_exec_ft); - -static int adreno_getproperty(struct kgsl_device *device, - enum kgsl_property_type type, - void *value, - unsigned int sizebytes) -{ - int status = -EINVAL; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - - switch (type) { - case KGSL_PROP_DEVICE_INFO: - { - struct kgsl_devinfo devinfo; - - if (sizebytes != sizeof(devinfo)) { - status = -EINVAL; + if (sizebytes != sizeof(devinfo)) { + status = -EINVAL; break; } @@ -2260,199 +1848,121 @@ static int adreno_setproperty(struct kgsl_device *device, status = 0; } break; - case KGSL_PROP_FAULT_TOLERANCE: { - struct kgsl_ft_config ftd; - - if (adreno_dev->ft_user_control == 0) - break; - - if (sizebytes != sizeof(ftd)) - break; + default: + break; + } - if (copy_from_user(&ftd, (void __user *) value, - sizeof(ftd))) { - status = -EFAULT; - break; - } + return status; +} - if (ftd.ft_policy) - adreno_dev->ft_policy = ftd.ft_policy; - else - adreno_dev->ft_policy = KGSL_FT_DEFAULT_POLICY; +/** + * adreno_hw_isidle() - Check if the GPU core is idle + * @device: Pointer to the KGSL device structure for the GPU + * + * Return true if the RBBM status register for the GPU type indicates that the + * hardware is idle + */ +static bool adreno_hw_isidle(struct kgsl_device *device) +{ + unsigned int reg_rbbm_status; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - if (ftd.ft_pf_policy) - adreno_dev->ft_pf_policy = ftd.ft_policy; - else - adreno_dev->ft_pf_policy = - KGSL_FT_PAGEFAULT_DEFAULT_POLICY; + /* Don't consider ourselves idle if there is an IRQ pending */ + if (adreno_dev->gpudev->irq_pending(adreno_dev)) + return false; - if (ftd.ft_pm_dump) - device->pm_dump_enable = 1; - else - device->pm_dump_enable = 0; + /* Read the correct RBBM status for the GPU type */ + adreno_regread(device, + adreno_dev->gpudev->reg_rbbm_status, + ®_rbbm_status); - } - break; - default: - break; + if (adreno_is_a2xx(adreno_dev)) { + if (reg_rbbm_status == 0x110) + return true; + } else if (adreno_is_a3xx(adreno_dev)) { + if (!(reg_rbbm_status & 0x80000000)) + return true; + } else { + BUG(); } - return status; + return false; } -static int adreno_ringbuffer_drain(struct kgsl_device *device, - unsigned int *regs) +/** + * adreno_isidle() - return true if the GPU hardware is idle + * @device: Pointer to the KGSL device structure for the GPU + * + * Return true if the GPU hardware is idle and there are no commands pending in + * the ringbuffer + */ +static bool adreno_isidle(struct kgsl_device *device) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; - unsigned long wait; - unsigned long timeout = jiffies + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT); - if (!(rb->flags & KGSL_FLAGS_STARTED)) - return 0; - - /* - * The first time into the loop, wait for 100 msecs and kick wptr again - * to ensure that the hardware has updated correctly. After that, kick - * it periodically every KGSL_TIMEOUT_PART msecs until the timeout - * expires - */ - - wait = jiffies + msecs_to_jiffies(100); - - do { - if (time_after(jiffies, wait)) { - /* Check to see if the core is hung */ - if (adreno_ft_detect(device, regs)) - return -ETIMEDOUT; + /* If the device isn't active, don't force it on. */ + if (device->state != KGSL_STATE_ACTIVE) + return true; - wait = jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART); - } - GSL_RB_GET_READPTR(rb, &rb->rptr); + GSL_RB_GET_READPTR(rb, &rb->rptr); - if (time_after(jiffies, timeout)) { - KGSL_DRV_ERR(device, "rptr: %x, wptr: %x\n", - rb->rptr, rb->wptr); - return -ETIMEDOUT; - } - } while (rb->rptr != rb->wptr); + if (rb->rptr == rb->wptr) + return adreno_hw_isidle(device); - return 0; + return false; } -/* Caller must hold the device mutex. */ +/** + * adreno_idle() - wait for the GPU hardware to go idle + * @device: Pointer to the KGSL device structure for the GPU + * + * Wait up to ADRENO_IDLE_TIMEOUT milliseconds for the GPU hardware to go quiet. + */ + int adreno_idle(struct kgsl_device *device) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - unsigned int rbbm_status; - unsigned long wait_time; - unsigned long wait_time_part; - unsigned int prev_reg_val[ft_detect_regs_count]; - - memset(prev_reg_val, 0, sizeof(prev_reg_val)); + unsigned long wait = jiffies + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT); - kgsl_cffdump_regpoll(device->id, - adreno_dev->gpudev->reg_rbbm_status << 2, - 0x00000000, 0x80000000); - -retry: - /* First, wait for the ringbuffer to drain */ - if (adreno_ringbuffer_drain(device, prev_reg_val)) - goto err; - - /* now, wait for the GPU to finish its operations */ - wait_time = jiffies + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT); - wait_time_part = jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART); + /* + * Make sure the device mutex is held so the dispatcher can't send any + * more commands to the hardware + */ - while (time_before(jiffies, wait_time)) { - adreno_regread(device, adreno_dev->gpudev->reg_rbbm_status, - &rbbm_status); - if (adreno_is_a2xx(adreno_dev)) { - if (rbbm_status == 0x110) - return 0; - } else { - if (!(rbbm_status & 0x80000000)) - return 0; - } + BUG_ON(!mutex_is_locked(&device->mutex)); - /* Dont wait for timeout, detect hang faster. - */ - if (time_after(jiffies, wait_time_part)) { - wait_time_part = jiffies + - msecs_to_jiffies(KGSL_TIMEOUT_PART); - if ((adreno_ft_detect(device, prev_reg_val))) - goto err; - } + if (adreno_is_a2xx(adreno_dev)) + kgsl_cffdump_regpoll(device, + adreno_dev->gpudev->reg_rbbm_status << 2, 0x110, 0x110); + else + kgsl_cffdump_regpoll(device, + adreno_dev->gpudev->reg_rbbm_status << 2, 0, + 0x80000000); + while (time_before(jiffies, wait)) { + if (adreno_isidle(device)) + return 0; } -err: - KGSL_DRV_ERR(device, "spun too long waiting for RB to idle\n"); - if (KGSL_STATE_DUMP_AND_FT != device->state && - !adreno_dump_and_exec_ft(device)) { - wait_time = jiffies + ADRENO_IDLE_TIMEOUT; - goto retry; - } + kgsl_postmortem_dump(device, 0); + return -ETIMEDOUT; } /** - * is_adreno_rbbm_status_idle - Check if GPU core is idle by probing - * rbbm_status register - * @device - Pointer to the GPU device whose idle status is to be - * checked - * @returns - Returns whether the core is idle (based on rbbm_status) - * false if the core is active, true if the core is idle + * adreno_drain() - Drain the dispatch queue + * @device: Pointer to the KGSL device structure for the GPU + * + * Tell the dispatcher to pause - this has the effect of draining the inflight + * command batches */ -static bool is_adreno_rbbm_status_idle(struct kgsl_device *device) -{ - unsigned int reg_rbbm_status; - bool status = false; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - - /* Is the core idle? */ - adreno_regread(device, - adreno_dev->gpudev->reg_rbbm_status, - ®_rbbm_status); - - if (adreno_is_a2xx(adreno_dev)) { - if (reg_rbbm_status == 0x110) - status = true; - } else { - if (!(reg_rbbm_status & 0x80000000)) - status = true; - } - return status; -} - -static unsigned int adreno_isidle(struct kgsl_device *device) +static int adreno_drain(struct kgsl_device *device) { - int status = false; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; - WARN_ON(device->state == KGSL_STATE_INIT); - /* If the device isn't active, don't force it on. */ - if (device->state == KGSL_STATE_ACTIVE) { - /* Is the ring buffer is empty? */ - GSL_RB_GET_READPTR(rb, &rb->rptr); - if (!device->active_cnt && (rb->rptr == rb->wptr)) { - /* - * Are there interrupts pending? If so then pretend we - * are not idle - this avoids the possiblity that we go - * to a lower power state without handling interrupts - * first. - */ - - if (!adreno_dev->gpudev->irq_pending(adreno_dev)) { - /* Is the core idle? */ - status = is_adreno_rbbm_status_idle(device); - } - } - } else { - status = true; - } - return status; + adreno_dispatcher_pause(adreno_dev); + return 0; } /* Caller must hold the device mutex. */ @@ -2476,20 +1986,20 @@ struct kgsl_memdesc *adreno_find_ctxtmem(struct kgsl_device *device, unsigned int pt_base, unsigned int gpuaddr, unsigned int size) { struct kgsl_context *context; - struct adreno_context *adreno_context = NULL; int next = 0; struct kgsl_memdesc *desc = NULL; - rcu_read_lock(); + read_lock(&device->context_lock); while (1) { context = idr_get_next(&device->context_idr, &next); if (context == NULL) break; - adreno_context = (struct adreno_context *)context->devctxt; - - if (kgsl_mmu_pt_equal(&device->mmu, adreno_context->pagetable, + if (kgsl_mmu_pt_equal(&device->mmu, context->pagetable, pt_base)) { + struct adreno_context *adreno_context; + + adreno_context = ADRENO_CONTEXT(context); desc = &adreno_context->gpustate; if (kgsl_gpuaddr_in_memdesc(desc, gpuaddr, size)) break; @@ -2501,7 +2011,7 @@ struct kgsl_memdesc *adreno_find_ctxtmem(struct kgsl_device *device, next = next + 1; desc = NULL; } - rcu_read_unlock(); + read_unlock(&device->context_lock); return desc; } @@ -2571,7 +2081,7 @@ void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords, if (!in_interrupt()) kgsl_pre_hwaccess(device); - trace_kgsl_regwrite(device, offsetwords, value); + kgsl_trace_regwrite(device, offsetwords, value); kgsl_cffdump_regwrite(device->id, offsetwords << 2, value); reg = (unsigned int *)(device->reg_virt + (offsetwords << 2)); @@ -2582,366 +2092,6 @@ void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords, __raw_writel(value, reg); } -static unsigned int _get_context_id(struct kgsl_context *k_ctxt) -{ - unsigned int context_id = KGSL_MEMSTORE_GLOBAL; - if (k_ctxt != NULL) { - struct adreno_context *a_ctxt = k_ctxt->devctxt; - if (k_ctxt->id == KGSL_CONTEXT_INVALID || a_ctxt == NULL) - context_id = KGSL_CONTEXT_INVALID; - else if (a_ctxt->flags & CTXT_FLAGS_PER_CONTEXT_TS) - context_id = k_ctxt->id; - } - - return context_id; -} - -static unsigned int adreno_check_hw_ts(struct kgsl_device *device, - struct kgsl_context *context, unsigned int timestamp) -{ - int status = 0; - unsigned int ref_ts, enableflag; - unsigned int context_id = _get_context_id(context); - - /* - * If the context ID is invalid, we are in a race with - * the context being destroyed by userspace so bail. - */ - if (context_id == KGSL_CONTEXT_INVALID) { - KGSL_DRV_WARN(device, "context was detached"); - return -EINVAL; - } - - status = kgsl_check_timestamp(device, context, timestamp); - if (status) - return status; - - kgsl_sharedmem_readl(&device->memstore, &enableflag, - KGSL_MEMSTORE_OFFSET(context_id, ts_cmp_enable)); - /* - * Barrier is needed here to make sure the read from memstore - * has posted - */ - - mb(); - - if (enableflag) { - kgsl_sharedmem_readl(&device->memstore, &ref_ts, - KGSL_MEMSTORE_OFFSET(context_id, - ref_wait_ts)); - - /* Make sure the memstore read has posted */ - mb(); - if (timestamp_cmp(ref_ts, timestamp) >= 0) { - kgsl_sharedmem_writel(&device->memstore, - KGSL_MEMSTORE_OFFSET(context_id, - ref_wait_ts), timestamp); - /* Make sure the memstore write is posted */ - wmb(); - } - } else { - kgsl_sharedmem_writel(&device->memstore, - KGSL_MEMSTORE_OFFSET(context_id, - ref_wait_ts), timestamp); - enableflag = 1; - kgsl_sharedmem_writel(&device->memstore, - KGSL_MEMSTORE_OFFSET(context_id, - ts_cmp_enable), enableflag); - /* Make sure the memstore write gets posted */ - wmb(); - - /* - * submit a dummy packet so that even if all - * commands upto timestamp get executed we will still - * get an interrupt - */ - - if (context && device->state != KGSL_STATE_SLUMBER) - adreno_ringbuffer_issuecmds(device, context->devctxt, - KGSL_CMD_FLAGS_GET_INT, NULL, 0); - } - - return 0; -} - -/* Return 1 if the event timestmp has already passed, 0 if it was marked */ -static int adreno_next_event(struct kgsl_device *device, - struct kgsl_event *event) -{ - return adreno_check_hw_ts(device, event->context, event->timestamp); -} - -static int adreno_check_interrupt_timestamp(struct kgsl_device *device, - struct kgsl_context *context, unsigned int timestamp) -{ - int status; - - mutex_lock(&device->mutex); - status = adreno_check_hw_ts(device, context, timestamp); - mutex_unlock(&device->mutex); - - return status; -} - -/* - wait_event_interruptible_timeout checks for the exit condition before - placing a process in wait q. For conditional interrupts we expect the - process to already be in its wait q when its exit condition checking - function is called. -*/ -#define kgsl_wait_event_interruptible_timeout(wq, condition, timeout, io)\ -({ \ - long __ret = timeout; \ - if (io) \ - __wait_io_event_interruptible_timeout(wq, condition, __ret);\ - else \ - __wait_event_interruptible_timeout(wq, condition, __ret);\ - __ret; \ -}) - - - -unsigned int adreno_ft_detect(struct kgsl_device *device, - unsigned int *prev_reg_val) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - unsigned int curr_reg_val[ft_detect_regs_count]; - unsigned int fast_hang_detected = 1; - unsigned int long_ib_detected = 1; - unsigned int i; - static unsigned long next_hang_detect_time; - static unsigned int prev_global_ts; - unsigned int curr_global_ts = 0; - unsigned int curr_context_id = 0; - static struct adreno_context *curr_context; - static struct kgsl_context *context; - - if (!adreno_dev->fast_hang_detect) - fast_hang_detected = 0; - - if (!adreno_dev->long_ib_detect) - long_ib_detected = 0; - - if (is_adreno_rbbm_status_idle(device)) { - - /* - * On A20X if the RPTR != WPTR and the device is idle, then - * the last write to WPTR probably failed to latch so write it - * again - */ - - if (adreno_is_a2xx(adreno_dev)) { - unsigned int rptr; - adreno_regread(device, REG_CP_RB_RPTR, &rptr); - if (rptr != adreno_dev->ringbuffer.wptr) - adreno_regwrite(device, REG_CP_RB_WPTR, - adreno_dev->ringbuffer.wptr); - } - - return 0; - } - - /* - * Time interval between hang detection should be KGSL_TIMEOUT_PART - * or more, if next hang detection is requested < KGSL_TIMEOUT_PART - * from the last time do nothing. - */ - if ((next_hang_detect_time) && - (time_before(jiffies, next_hang_detect_time))) - return 0; - else - next_hang_detect_time = (jiffies + - msecs_to_jiffies(KGSL_TIMEOUT_PART-1)); - - /* Read the current Hang detect reg values here */ - for (i = 0; i < ft_detect_regs_count; i++) { - if (ft_detect_regs[i] == 0) - continue; - adreno_regread(device, ft_detect_regs[i], - &curr_reg_val[i]); - } - - /* Read the current global timestamp here */ - kgsl_sharedmem_readl(&device->memstore, - &curr_global_ts, - KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, - eoptimestamp)); - - mb(); - - if (curr_global_ts == prev_global_ts) { - - /* Get the current context here */ - if (context == NULL) { - kgsl_sharedmem_readl(&device->memstore, - &curr_context_id, - KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, - current_context)); - context = idr_find(&device->context_idr, - curr_context_id); - if (context != NULL) { - curr_context = context->devctxt; - curr_context->ib_gpu_time_used = 0; - } else { - KGSL_DRV_ERR(device, - "Fault tolerance no context found\n"); - } - } - - mb(); - - if (curr_context != NULL) { - - curr_context->ib_gpu_time_used += KGSL_TIMEOUT_PART; - KGSL_FT_INFO(device, - "Proc %s used GPU Time %d ms on timestamp 0x%X\n", - curr_context->pid_name, curr_context->ib_gpu_time_used, - curr_global_ts+1); - - for (i = 0; i < ft_detect_regs_count; i++) { - if (curr_reg_val[i] != prev_reg_val[i]) { - fast_hang_detected = 0; - - /* Check for long IB here */ - if ((i >= - LONG_IB_DETECT_REG_INDEX_START) - && - (i <= - LONG_IB_DETECT_REG_INDEX_END)) - long_ib_detected = 0; - } - } - - if (fast_hang_detected) { - KGSL_FT_ERR(device, - "Proc %s, ctxt_id %d ts %d triggered fault tolerance" - " on global ts %d\n", - curr_context->pid_name, curr_context->id - , (kgsl_readtimestamp(device, context, - KGSL_TIMESTAMP_RETIRED)+1), - curr_global_ts+1); - return 1; - } - - if ((long_ib_detected) && - (!(curr_context->flags & - CTXT_FLAGS_NO_FAULT_TOLERANCE))) { - curr_context->ib_gpu_time_used += - KGSL_TIMEOUT_PART; - if (curr_context->ib_gpu_time_used > - KGSL_TIMEOUT_LONG_IB_DETECTION) { - if (adreno_dev->long_ib_ts != - curr_global_ts) { - KGSL_FT_ERR(device, - "Proc %s, ctxt_id %d ts %d" - "used GPU for %d ms long ib " - "detected on global ts %d\n", - curr_context->pid_name, - curr_context->id, - (kgsl_readtimestamp(device, - context, - KGSL_TIMESTAMP_RETIRED)+1), - curr_context->ib_gpu_time_used, - curr_global_ts+1); - adreno_dev->long_ib = 1; - adreno_dev->long_ib_ts = - curr_global_ts; - curr_context->ib_gpu_time_used = - 0; - return 1; - } - } - } - } else { - KGSL_FT_ERR(device, - "Last context unknown id:%d\n", - curr_context_id); - } - } else { - /* GPU is moving forward */ - prev_global_ts = curr_global_ts; - context = NULL; - curr_context = NULL; - adreno_dev->long_ib = 0; - adreno_dev->long_ib_ts = 0; - } - - - /* If hangs are not detected copy the current reg values - * to previous values and return no hang */ - for (i = 0; i < ft_detect_regs_count; i++) - prev_reg_val[i] = curr_reg_val[i]; - return 0; -} - -/** - * adreno_handle_hang - Process a hang detected in adreno_waittimestamp - * @device - pointer to a KGSL device structure - * @context - pointer to the active KGSL context - * @timestamp - the timestamp that the process was waiting for - * - * Process a possible GPU hang and try fault tolerance from it - * cleanly - */ -static int adreno_handle_hang(struct kgsl_device *device, - struct kgsl_context *context, unsigned int timestamp) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - unsigned int context_id = _get_context_id(context); - unsigned int ts_issued; - unsigned int rptr; - - /* Do one last check to see if we somehow made it through */ - if (kgsl_check_timestamp(device, context, timestamp)) - return 0; - - ts_issued = adreno_context_timestamp(context, &adreno_dev->ringbuffer); - - adreno_regread(device, REG_CP_RB_RPTR, &rptr); - mb(); - - KGSL_DRV_WARN(device, - "Device hang detected while waiting for timestamp: " - "<%d:0x%x>, last submitted timestamp: <%d:0x%x>, " - "retired timestamp: <%d:0x%x>, wptr: 0x%x, rptr: 0x%x\n", - context_id, timestamp, context_id, ts_issued, context_id, - kgsl_readtimestamp(device, context, - KGSL_TIMESTAMP_RETIRED), - adreno_dev->ringbuffer.wptr, rptr); - - /* Return 0 after a successful fault tolerance */ - if (!adreno_dump_and_exec_ft(device)) - return 0; - - return -ETIMEDOUT; -} - -static int _check_pending_timestamp(struct kgsl_device *device, - struct kgsl_context *context, unsigned int timestamp) -{ - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - unsigned int context_id = _get_context_id(context); - unsigned int ts_issued; - - if (context_id == KGSL_CONTEXT_INVALID) - return -EINVAL; - - ts_issued = adreno_context_timestamp(context, &adreno_dev->ringbuffer); - - if (timestamp_cmp(timestamp, ts_issued) <= 0) - return 0; - - if (context && !context->wait_on_invalid_ts) { - KGSL_DRV_ERR(device, "Cannot wait for invalid ts <%d:0x%x>, last issued ts <%d:0x%x>\n", - context_id, timestamp, context_id, ts_issued); - - /* Only print this message once */ - context->wait_on_invalid_ts = true; - } - - return -EINVAL; -} - /** * adreno_waittimestamp - sleep while waiting for the specified timestamp * @device - pointer to a KGSL device structure @@ -2949,155 +2099,35 @@ static int _check_pending_timestamp(struct kgsl_device *device, * @timestamp - GPU timestamp to wait for * @msecs - amount of time to wait (in milliseconds) * - * Wait 'msecs' milliseconds for the specified timestamp to expire. Wake up - * every KGSL_TIMEOUT_PART milliseconds to check for a device hang and process - * one if it happened. Otherwise, spend most of our time in an interruptible - * wait for the timestamp interrupt to be processed. This function must be - * called with the mutex already held. + * Wait up to 'msecs' milliseconds for the specified timestamp to expire. */ static int adreno_waittimestamp(struct kgsl_device *device, - struct kgsl_context *context, - unsigned int timestamp, - unsigned int msecs) + struct kgsl_context *context, + unsigned int timestamp, + unsigned int msecs) { - static unsigned int io_cnt; - struct adreno_context *adreno_ctx = context ? context->devctxt : NULL; - struct kgsl_pwrctrl *pwr = &device->pwrctrl; - unsigned int context_id = _get_context_id(context); - unsigned int prev_reg_val[ft_detect_regs_count]; - unsigned int time_elapsed = 0; - unsigned int wait; - int ts_compare = 1; - int io, ret = -ETIMEDOUT; - - if (context_id == KGSL_CONTEXT_INVALID) { - KGSL_DRV_WARN(device, "context was detached"); - return -EINVAL; - } - - /* - * Check to see if the requested timestamp is "newer" then the last - * timestamp issued. If it is complain once and return error. Only - * print the message once per context so that badly behaving - * applications don't spam the logs - */ + int ret; + struct adreno_context *drawctxt; - if (adreno_ctx && !(adreno_ctx->flags & CTXT_FLAGS_USER_GENERATED_TS)) { - if (_check_pending_timestamp(device, context, timestamp)) - return -EINVAL; - - /* Reset the invalid timestamp flag on a valid wait */ - context->wait_on_invalid_ts = false; + if (context == NULL) { + /* If they are doing then complain once */ + dev_WARN_ONCE(device->dev, 1, + "IOCTL_KGSL_DEVICE_WAITTIMESTAMP is deprecated\n"); + return -EINVAL; } + /* Return -EINVAL if the context has been detached */ + if (kgsl_context_detached(context)) + return -EINVAL; - /* Clear the registers used for hang detection */ - memset(prev_reg_val, 0, sizeof(prev_reg_val)); - - /* - * On the first time through the loop only wait 100ms. - * this gives enough time for the engine to start moving and oddly - * provides better hang detection results than just going the full - * KGSL_TIMEOUT_PART right off the bat. The exception to this rule - * is if msecs happens to be < 100ms then just use the full timeout - */ - - wait = 100; - - do { - long status; - - /* - * if the timestamp happens while we're not - * waiting, there's a chance that an interrupt - * will not be generated and thus the timestamp - * work needs to be queued. - */ - - if (kgsl_check_timestamp(device, context, timestamp)) { - queue_work(device->work_queue, &device->ts_expired_ws); - ret = 0; - break; - } - - /* Check to see if the GPU is hung */ - if (adreno_ft_detect(device, prev_reg_val)) { - ret = adreno_handle_hang(device, context, timestamp); - break; - } - - /* - * For proper power accounting sometimes we need to call - * io_wait_interruptible_timeout and sometimes we need to call - * plain old wait_interruptible_timeout. We call the regular - * timeout N times out of 100, where N is a number specified by - * the current power level - */ - - io_cnt = (io_cnt + 1) % 100; - io = (io_cnt < pwr->pwrlevels[pwr->active_pwrlevel].io_fraction) - ? 0 : 1; - - mutex_unlock(&device->mutex); - - /* Wait for a timestamp event */ - status = kgsl_wait_event_interruptible_timeout( - device->wait_queue, - adreno_check_interrupt_timestamp(device, context, - timestamp), msecs_to_jiffies(wait), io); - - mutex_lock(&device->mutex); - - /* - * If status is non zero then either the condition was satisfied - * or there was an error. In either event, this is the end of - * the line for us - */ - - if (status != 0) { - ret = (status > 0) ? 0 : (int) status; - break; - } - time_elapsed += wait; - - - /* If user specified timestamps are being used, wait at least - * KGSL_SYNCOBJ_SERVER_TIMEOUT msecs for the user driver to - * issue a IB for a timestamp before checking to see if the - * current timestamp we are waiting for is valid or not - */ - - if (ts_compare && (adreno_ctx && - (adreno_ctx->flags & CTXT_FLAGS_USER_GENERATED_TS))) { - if (time_elapsed > KGSL_SYNCOBJ_SERVER_TIMEOUT) { - ret = _check_pending_timestamp(device, context, - timestamp); - if (ret) - break; - - /* Don't do this check again */ - ts_compare = 0; - - /* - * Reset the invalid timestamp flag on a valid - * wait - */ - - context->wait_on_invalid_ts = false; - } - } - - /* - * We want to wait the floor of KGSL_TIMEOUT_PART - * and (msecs - time_elapsed). - */ + ret = adreno_drawctxt_wait(ADRENO_DEVICE(device), context, + timestamp, msecs_to_jiffies(msecs)); - if (KGSL_TIMEOUT_PART < (msecs - time_elapsed)) - wait = KGSL_TIMEOUT_PART; - else - wait = (msecs - time_elapsed); + /* If the context got invalidated then return a specific error */ + drawctxt = ADRENO_CONTEXT(context); - } while (!msecs || time_elapsed < msecs); + if (drawctxt->state == ADRENO_CONTEXT_STATE_INVALID) + ret = -EDEADLK; return ret; } @@ -3106,13 +2136,13 @@ static unsigned int adreno_readtimestamp(struct kgsl_device *device, struct kgsl_context *context, enum kgsl_timestamp_type type) { unsigned int timestamp = 0; - unsigned int context_id = _get_context_id(context); + unsigned int id = context ? context->id : KGSL_MEMSTORE_GLOBAL; /* - * If the context ID is invalid, we are in a race with + * If the context is detached we are in a race with * the context being destroyed by userspace so bail. */ - if (context_id == KGSL_CONTEXT_INVALID) { + if (context && kgsl_context_detached(context)) { KGSL_DRV_WARN(device, "context was detached"); return timestamp; } @@ -3126,11 +2156,11 @@ static unsigned int adreno_readtimestamp(struct kgsl_device *device, } case KGSL_TIMESTAMP_CONSUMED: kgsl_sharedmem_readl(&device->memstore, ×tamp, - KGSL_MEMSTORE_OFFSET(context_id, soptimestamp)); + KGSL_MEMSTORE_OFFSET(id, soptimestamp)); break; case KGSL_TIMESTAMP_RETIRED: kgsl_sharedmem_readl(&device->memstore, ×tamp, - KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp)); + KGSL_MEMSTORE_OFFSET(id, eoptimestamp)); break; } @@ -3142,30 +2172,58 @@ static unsigned int adreno_readtimestamp(struct kgsl_device *device, static long adreno_ioctl(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) { + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); int result = 0; - struct kgsl_drawctxt_set_bin_base_offset *binbase; - struct kgsl_context *context; switch (cmd) { - case IOCTL_KGSL_DRAWCTXT_SET_BIN_BASE_OFFSET: + case IOCTL_KGSL_DRAWCTXT_SET_BIN_BASE_OFFSET: { + struct kgsl_drawctxt_set_bin_base_offset *binbase = data; + struct kgsl_context *context; + binbase = data; context = kgsl_context_get_owner(dev_priv, binbase->drawctxt_id); if (context) { adreno_drawctxt_set_bin_base_offset( - dev_priv->device, context, binbase->offset); + device, context, binbase->offset); } else { result = -EINVAL; - KGSL_DRV_ERR(dev_priv->device, + KGSL_DRV_ERR(device, "invalid drawctxt drawctxt_id %d " "device_id=%d\n", - binbase->drawctxt_id, dev_priv->device->id); + binbase->drawctxt_id, device->id); } kgsl_context_put(context); break; - + } + case IOCTL_KGSL_PERFCOUNTER_GET: { + struct kgsl_perfcounter_get *get = data; + result = adreno_perfcounter_get(adreno_dev, get->groupid, + get->countable, &get->offset, PERFCOUNTER_FLAG_NONE); + break; + } + case IOCTL_KGSL_PERFCOUNTER_PUT: { + struct kgsl_perfcounter_put *put = data; + result = adreno_perfcounter_put(adreno_dev, put->groupid, + put->countable); + break; + } + case IOCTL_KGSL_PERFCOUNTER_QUERY: { + struct kgsl_perfcounter_query *query = data; + result = adreno_perfcounter_query_group(adreno_dev, + query->groupid, query->countables, + query->count, &query->max_counters); + break; + } + case IOCTL_KGSL_PERFCOUNTER_READ: { + struct kgsl_perfcounter_read *read = data; + result = adreno_perfcounter_read_group(adreno_dev, + read->reads, read->count); + break; + } default: KGSL_DRV_INFO(dev_priv->device, "invalid ioctl code %08x\n", cmd); @@ -3187,15 +2245,20 @@ static void adreno_power_stats(struct kgsl_device *device, { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct kgsl_pwrctrl *pwr = &device->pwrctrl; - unsigned int cycles; - - /* Get the busy cycles counted since the counter was last reset */ - /* Calling this function also resets and restarts the counter */ + unsigned int cycles = 0; - cycles = adreno_dev->gpudev->busy_cycles(adreno_dev); + /* + * Get the busy cycles counted since the counter was last reset. + * If we're not currently active, there shouldn't have been + * any cycles since the last time this function was called. + */ + if (device->state == KGSL_STATE_ACTIVE) + cycles = adreno_dev->gpudev->busy_cycles(adreno_dev); - /* In order to calculate idle you have to have run the algorithm * - * at least once to get a start time. */ + /* + * In order to calculate idle you have to have run the algorithm + * at least once to get a start time. + */ if (pwr->time != 0) { s64 tmp = ktime_to_us(ktime_get()); stats->total_time = tmp - pwr->time; @@ -3242,6 +2305,7 @@ static const struct kgsl_functable adreno_functable = { .idle = adreno_idle, .isidle = adreno_isidle, .suspend_context = adreno_suspend_context, + .init = adreno_init, .start = adreno_start, .stop = adreno_stop, .getproperty = adreno_getproperty, @@ -3256,13 +2320,15 @@ static const struct kgsl_functable adreno_functable = { .gpuid = adreno_gpuid, .snapshot = adreno_snapshot, .irq_handler = adreno_irq_handler, + .drain = adreno_drain, /* Optional functions */ .setstate = adreno_setstate, .drawctxt_create = adreno_drawctxt_create, + .drawctxt_detach = adreno_drawctxt_detach, .drawctxt_destroy = adreno_drawctxt_destroy, .setproperty = adreno_setproperty, .postmortem_dump = adreno_dump, - .next_event = adreno_next_event, + .drawctxt_sched = adreno_drawctxt_sched, }; static struct platform_driver adreno_platform_driver = { diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h index e7ad20c20bbb8f43fa7916e12d7a60482d5329f6..25d1fdd3ad3803b66348cdef44ec27450be0b548 100644 --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-2012, The Linux Foundation. All rights reserved. +/* Copyright (c) 2008-2013, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -25,17 +25,20 @@ #define ADRENO_DEVICE(device) \ KGSL_CONTAINER_OF(device, struct adreno_device, dev) +#define ADRENO_CONTEXT(device) \ + KGSL_CONTAINER_OF(device, struct adreno_context, base) + #define ADRENO_CHIPID_CORE(_id) (((_id) >> 24) & 0xFF) #define ADRENO_CHIPID_MAJOR(_id) (((_id) >> 16) & 0xFF) #define ADRENO_CHIPID_MINOR(_id) (((_id) >> 8) & 0xFF) #define ADRENO_CHIPID_PATCH(_id) ((_id) & 0xFF) /* Flags to control command packet settings */ -#define KGSL_CMD_FLAGS_NONE 0x00000000 -#define KGSL_CMD_FLAGS_PMODE 0x00000001 -#define KGSL_CMD_FLAGS_INTERNAL_ISSUE 0x00000002 -#define KGSL_CMD_FLAGS_GET_INT 0x00000004 -#define KGSL_CMD_FLAGS_EOF 0x00000100 +#define KGSL_CMD_FLAGS_NONE 0 +#define KGSL_CMD_FLAGS_PMODE BIT(0) +#define KGSL_CMD_FLAGS_INTERNAL_ISSUE BIT(1) +#define KGSL_CMD_FLAGS_GET_INT BIT(2) +#define KGSL_CMD_FLAGS_WFI BIT(3) /* Command identifiers */ #define KGSL_CONTEXT_TO_MEM_IDENTIFIER 0x2EADBEEF @@ -78,6 +81,47 @@ enum adreno_gpurev { ADRENO_REV_A330 = 330, }; +/* + * Maximum size of the dispatcher ringbuffer - the actual inflight size will be + * smaller then this but this size will allow for a larger range of inflight + * sizes that can be chosen at runtime + */ + +#define ADRENO_DISPATCH_CMDQUEUE_SIZE 128 + +/** + * struct adreno_dispatcher - container for the adreno GPU dispatcher + * @mutex: Mutex to protect the structure + * @state: Current state of the dispatcher (active or paused) + * @timer: Timer to monitor the progress of the command batches + * @inflight: Number of command batch operations pending in the ringbuffer + * @fault: True if a HW fault was detected + * @pending: Priority list of contexts waiting to submit command batches + * @plist_lock: Spin lock to protect the pending queue + * @cmdqueue: Queue of command batches currently flight + * @head: pointer to the head of of the cmdqueue. This is the oldest pending + * operation + * @tail: pointer to the tail of the cmdqueue. This is the most recently + * submitted operation + * @work: work_struct to put the dispatcher in a work queue + * @kobj: kobject for the dispatcher directory in the device sysfs node + */ +struct adreno_dispatcher { + struct mutex mutex; + unsigned int state; + struct timer_list timer; + struct timer_list fault_timer; + unsigned int inflight; + int fault; + struct plist_head pending; + spinlock_t plist_lock; + struct kgsl_cmdbatch *cmdqueue[ADRENO_DISPATCH_CMDQUEUE_SIZE]; + unsigned int head; + unsigned int tail; + struct work_struct work; + struct kobject kobj; +}; + struct adreno_gpudev; struct adreno_device { @@ -105,7 +149,6 @@ struct adreno_device { unsigned int ib_check_level; unsigned int fast_hang_detect; unsigned int ft_policy; - unsigned int ft_user_control; unsigned int long_ib_detect; unsigned int long_ib; unsigned int long_ib_ts; @@ -113,6 +156,46 @@ struct adreno_device { unsigned int gpulist_index; struct ocmem_buf *ocmem_hdl; unsigned int ocmem_base; + unsigned int gpu_cycles; + struct adreno_dispatcher dispatcher; +}; + +#define PERFCOUNTER_FLAG_NONE 0x0 +#define PERFCOUNTER_FLAG_KERNEL 0x1 + +/* Structs to maintain the list of active performance counters */ + +/** + * struct adreno_perfcount_register: register state + * @countable: countable the register holds + * @refcount: number of users of the register + * @offset: register hardware offset + */ +struct adreno_perfcount_register { + unsigned int countable; + unsigned int refcount; + unsigned int offset; + unsigned int flags; +}; + +/** + * struct adreno_perfcount_group: registers for a hardware group + * @regs: available registers for this group + * @reg_count: total registers for this group + */ +struct adreno_perfcount_group { + struct adreno_perfcount_register *regs; + unsigned int reg_count; +}; + +/** + * adreno_perfcounts: all available perfcounter groups + * @groups: available groups for this device + * @group_count: total groups for this device + */ +struct adreno_perfcounters { + struct adreno_perfcount_group *groups; + unsigned int group_count; }; struct adreno_gpudev { @@ -126,60 +209,50 @@ struct adreno_gpudev { /* keeps track of when we need to execute the draw workaround code */ int ctx_switches_since_last_draw; + struct adreno_perfcounters *perfcounters; + /* GPU specific function hooks */ int (*ctxt_create)(struct adreno_device *, struct adreno_context *); - void (*ctxt_save)(struct adreno_device *, struct adreno_context *); - void (*ctxt_restore)(struct adreno_device *, struct adreno_context *); - void (*ctxt_draw_workaround)(struct adreno_device *, + int (*ctxt_save)(struct adreno_device *, struct adreno_context *); + int (*ctxt_restore)(struct adreno_device *, struct adreno_context *); + int (*ctxt_draw_workaround)(struct adreno_device *, struct adreno_context *); irqreturn_t (*irq_handler)(struct adreno_device *); void (*irq_control)(struct adreno_device *, int); unsigned int (*irq_pending)(struct adreno_device *); void * (*snapshot)(struct adreno_device *, void *, int *, int); - void (*rb_init)(struct adreno_device *, struct adreno_ringbuffer *); + int (*rb_init)(struct adreno_device *, struct adreno_ringbuffer *); + void (*perfcounter_init)(struct adreno_device *); void (*start)(struct adreno_device *); unsigned int (*busy_cycles)(struct adreno_device *); + void (*perfcounter_enable)(struct adreno_device *, unsigned int group, + unsigned int counter, unsigned int countable); + uint64_t (*perfcounter_read)(struct adreno_device *adreno_dev, + unsigned int group, unsigned int counter, + unsigned int offset); }; -/* - * struct adreno_ft_data - Structure that contains all information to - * perform gpu fault tolerance - * @ib1 - IB1 that the GPU was executing when hang happened - * @context_id - Context which caused the hang - * @global_eop - eoptimestamp at time of hang - * @rb_buffer - Buffer that holds the commands from good contexts - * @rb_size - Number of valid dwords in rb_buffer - * @bad_rb_buffer - Buffer that holds commands from the hanging context - * bad_rb_size - Number of valid dwords in bad_rb_buffer - * @good_rb_buffer - Buffer that holds commands from good contexts - * good_rb_size - Number of valid dwords in good_rb_buffer - * @last_valid_ctx_id - The last context from which commands were placed in - * ringbuffer before the GPU hung - * @step - Current fault tolerance step being executed - * @err_code - Fault tolerance error code - * @fault - Indicates whether the hang was caused due to a pagefault - * @start_of_replay_cmds - Offset in ringbuffer from where commands can be - * replayed during fault tolerance - * @replay_for_snapshot - Offset in ringbuffer where IB's can be saved for - * replaying with snapshot - */ -struct adreno_ft_data { - unsigned int ib1; - unsigned int context_id; - unsigned int global_eop; - unsigned int *rb_buffer; - unsigned int rb_size; - unsigned int *bad_rb_buffer; - unsigned int bad_rb_size; - unsigned int *good_rb_buffer; - unsigned int good_rb_size; - unsigned int last_valid_ctx_id; - unsigned int status; - unsigned int ft_policy; - unsigned int err_code; - unsigned int start_of_replay_cmds; - unsigned int replay_for_snapshot; -}; +#define FT_DETECT_REGS_COUNT 12 + +/* Fault Tolerance policy flags */ +#define KGSL_FT_OFF BIT(0) +#define KGSL_FT_REPLAY BIT(1) +#define KGSL_FT_SKIPIB BIT(2) +#define KGSL_FT_SKIPFRAME BIT(3) +#define KGSL_FT_DISABLE BIT(4) +#define KGSL_FT_TEMP_DISABLE BIT(5) +#define KGSL_FT_DEFAULT_POLICY (KGSL_FT_REPLAY + KGSL_FT_SKIPIB) + +/* This internal bit is used to skip the PM dump on replayed command batches */ +#define KGSL_FT_SKIP_PMDUMP BIT(31) + +/* Pagefault policy flags */ +#define KGSL_FT_PAGEFAULT_INT_ENABLE BIT(0) +#define KGSL_FT_PAGEFAULT_GPUHALT_ENABLE BIT(1) +#define KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE BIT(2) +#define KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT BIT(3) +#define KGSL_FT_PAGEFAULT_DEFAULT_POLICY (KGSL_FT_PAGEFAULT_INT_ENABLE + \ + KGSL_FT_PAGEFAULT_GPUHALT_ENABLE) extern struct adreno_gpudev adreno_a2xx_gpudev; extern struct adreno_gpudev adreno_a3xx_gpudev; @@ -203,7 +276,6 @@ extern const unsigned int a330_registers[]; extern const unsigned int a330_registers_count; extern unsigned int ft_detect_regs[]; -extern const unsigned int ft_detect_regs_count; int adreno_idle(struct kgsl_device *device); @@ -213,6 +285,8 @@ void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords, unsigned int value); int adreno_dump(struct kgsl_device *device, int manual); +unsigned int adreno_a3xx_rbbm_clock_ctl_default(struct adreno_device + *adreno_dev); struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device, unsigned int pt_base, @@ -228,13 +302,30 @@ struct kgsl_memdesc *adreno_find_ctxtmem(struct kgsl_device *device, void *adreno_snapshot(struct kgsl_device *device, void *snapshot, int *remain, int hang); -int adreno_dump_and_exec_ft(struct kgsl_device *device); +void adreno_dispatcher_start(struct adreno_device *adreno_dev); +int adreno_dispatcher_init(struct adreno_device *adreno_dev); +void adreno_dispatcher_close(struct adreno_device *adreno_dev); +int adreno_dispatcher_idle(struct adreno_device *adreno_dev, + unsigned int timeout); +void adreno_dispatcher_irq_fault(struct kgsl_device *device); +void adreno_dispatcher_stop(struct adreno_device *adreno_dev); + +int adreno_context_queue_cmd(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, struct kgsl_cmdbatch *cmdbatch, + uint32_t *timestamp); + +void adreno_dispatcher_schedule(struct kgsl_device *device); +void adreno_dispatcher_pause(struct adreno_device *adreno_dev); +void adreno_dispatcher_queue_context(struct kgsl_device *device, + struct adreno_context *drawctxt); +int adreno_reset(struct kgsl_device *device); -void adreno_dump_rb(struct kgsl_device *device, const void *buf, - size_t len, int start, int size); +int adreno_perfcounter_get(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable, unsigned int *offset, + unsigned int flags); -unsigned int adreno_ft_detect(struct kgsl_device *device, - unsigned int *prev_reg_val); +int adreno_perfcounter_put(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable); static inline int adreno_is_a200(struct adreno_device *adreno_dev) { @@ -297,23 +388,33 @@ static inline int adreno_is_a330(struct adreno_device *adreno_dev) return (adreno_dev->gpurev == ADRENO_REV_A330); } +static inline int adreno_is_a330v2(struct adreno_device *adreno_dev) +{ + return ((adreno_dev->gpurev == ADRENO_REV_A330) && + (ADRENO_CHIPID_PATCH(adreno_dev->chip_id) > 0)); +} + static inline int adreno_rb_ctxtswitch(unsigned int *cmd) { return (cmd[0] == cp_nop_packet(1) && cmd[1] == KGSL_CONTEXT_TO_MEM_IDENTIFIER); } +/** + * adreno_context_timestamp() - Return the last queued timestamp for the context + * @k_ctxt: Pointer to the KGSL context to query + * @rb: Pointer to the ringbuffer structure for the GPU + * + * Return the last queued context for the given context. This is used to verify + * that incoming requests are not using an invalid (unsubmitted) timestamp + */ static inline int adreno_context_timestamp(struct kgsl_context *k_ctxt, struct adreno_ringbuffer *rb) { - struct adreno_context *a_ctxt = NULL; - - if (k_ctxt) - a_ctxt = k_ctxt->devctxt; - - if (a_ctxt && a_ctxt->flags & CTXT_FLAGS_PER_CONTEXT_TS) + if (k_ctxt) { + struct adreno_context *a_ctxt = ADRENO_CONTEXT(k_ctxt); return a_ctxt->timestamp; - + } return rb->global_ts; } diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c index f3ebe0158f564f8302801907e7087d6d35723088..93068c05c86fcdb50a2fff07a39d4eef63134684 100644 --- a/drivers/gpu/msm/adreno_a2xx.c +++ b/drivers/gpu/msm/adreno_a2xx.c @@ -1355,7 +1355,7 @@ static int a2xx_create_gmem_shadow(struct adreno_device *adreno_dev, tmp_ctx.gmem_base = adreno_dev->gmem_base; result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow, - drawctxt->pagetable, drawctxt->context_gmem_shadow.size); + drawctxt->base.pagetable, drawctxt->context_gmem_shadow.size); if (result) return result; @@ -1365,7 +1365,7 @@ static int a2xx_create_gmem_shadow(struct adreno_device *adreno_dev, /* blank out gmem shadow. */ kgsl_sharedmem_set(&drawctxt->context_gmem_shadow.gmemshadow, 0, 0, - drawctxt->context_gmem_shadow.size); + drawctxt->context_gmem_shadow.size); /* build quad vertex buffer */ build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow, @@ -1409,13 +1409,13 @@ static int a2xx_drawctxt_create(struct adreno_device *adreno_dev, */ ret = kgsl_allocate(&drawctxt->gpustate, - drawctxt->pagetable, _context_size(adreno_dev)); + drawctxt->base.pagetable, _context_size(adreno_dev)); if (ret) return ret; - kgsl_sharedmem_set(&drawctxt->gpustate, 0, 0, - _context_size(adreno_dev)); + kgsl_sharedmem_set(&drawctxt->gpustate, + 0, 0, _context_size(adreno_dev)); tmp_ctx.cmd = tmp_ctx.start = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET); @@ -1439,8 +1439,8 @@ static int a2xx_drawctxt_create(struct adreno_device *adreno_dev, kgsl_cache_range_op(&drawctxt->gpustate, KGSL_CACHE_OP_FLUSH); - kgsl_cffdump_syncmem(NULL, &drawctxt->gpustate, - drawctxt->gpustate.gpuaddr, + kgsl_cffdump_syncmem(NULL, + &drawctxt->gpustate, drawctxt->gpustate.gpuaddr, drawctxt->gpustate.size, false); done: @@ -1450,7 +1450,7 @@ done: return ret; } -static void a2xx_drawctxt_draw_workaround(struct adreno_device *adreno_dev, +static int a2xx_drawctxt_draw_workaround(struct adreno_device *adreno_dev, struct adreno_context *context) { struct kgsl_device *device = &adreno_dev->dev; @@ -1467,7 +1467,7 @@ static void a2xx_drawctxt_draw_workaround(struct adreno_device *adreno_dev, ADRENO_NUM_CTX_SWITCH_ALLOWED_BEFORE_DRAW) adreno_dev->gpudev->ctx_switches_since_last_draw = 0; else - return; + return 0; /* * Issue an empty draw call to avoid possible hangs due to * repeated idles without intervening draw calls. @@ -1498,138 +1498,201 @@ static void a2xx_drawctxt_draw_workaround(struct adreno_device *adreno_dev, | adreno_dev->pix_shader_start; } - adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_PMODE, - &cmd[0], cmds - cmd); + return adreno_ringbuffer_issuecmds(device, context, + KGSL_CMD_FLAGS_PMODE, &cmd[0], cmds - cmd); } -static void a2xx_drawctxt_save(struct adreno_device *adreno_dev, +static int a2xx_drawctxt_save(struct adreno_device *adreno_dev, struct adreno_context *context) { struct kgsl_device *device = &adreno_dev->dev; + int ret; if (context == NULL || (context->flags & CTXT_FLAGS_BEING_DESTROYED)) - return; + return 0; - if (context->flags & CTXT_FLAGS_GPU_HANG) - KGSL_CTXT_WARN(device, - "Current active context has caused gpu hang\n"); + if (context->state == ADRENO_CONTEXT_STATE_INVALID) + return 0; if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { - + kgsl_cffdump_syncmem(NULL, &context->gpustate, + context->reg_save[1], + context->reg_save[2] << 2, true); /* save registers and constants. */ - adreno_ringbuffer_issuecmds(device, context, + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, context->reg_save, 3); + if (ret) + return ret; + if (context->flags & CTXT_FLAGS_SHADER_SAVE) { + kgsl_cffdump_syncmem(NULL, + &context->gpustate, + context->shader_save[1], + context->shader_save[2] << 2, true); /* save shader partitioning and instructions. */ - adreno_ringbuffer_issuecmds(device, context, + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_PMODE, context->shader_save, 3); + kgsl_cffdump_syncmem(NULL, + &context->gpustate, + context->shader_fixup[1], + context->shader_fixup[2] << 2, true); /* * fixup shader partitioning parameter for * SET_SHADER_BASES. */ - adreno_ringbuffer_issuecmds(device, context, + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, context->shader_fixup, 3); + if (ret) + return ret; + context->flags |= CTXT_FLAGS_SHADER_RESTORE; } } if ((context->flags & CTXT_FLAGS_GMEM_SAVE) && (context->flags & CTXT_FLAGS_GMEM_SHADOW)) { + kgsl_cffdump_syncmem(NULL, &context->gpustate, + context->context_gmem_shadow.gmem_save[1], + context->context_gmem_shadow.gmem_save[2] << 2, true); /* save gmem. * (note: changes shader. shader must already be saved.) */ - adreno_ringbuffer_issuecmds(device, context, + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_PMODE, context->context_gmem_shadow.gmem_save, 3); + if (ret) + return ret; + + kgsl_cffdump_syncmem(NULL, &context->gpustate, + context->chicken_restore[1], + context->chicken_restore[2] << 2, true); + /* Restore TP0_CHICKEN */ if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { - adreno_ringbuffer_issuecmds(device, context, + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, context->chicken_restore, 3); + + if (ret) + return ret; } adreno_dev->gpudev->ctx_switches_since_last_draw = 0; context->flags |= CTXT_FLAGS_GMEM_RESTORE; } else if (adreno_is_a2xx(adreno_dev)) - a2xx_drawctxt_draw_workaround(adreno_dev, context); + return a2xx_drawctxt_draw_workaround(adreno_dev, context); + + return 0; } -static void a2xx_drawctxt_restore(struct adreno_device *adreno_dev, +static int a2xx_drawctxt_restore(struct adreno_device *adreno_dev, struct adreno_context *context) { struct kgsl_device *device = &adreno_dev->dev; unsigned int cmds[5]; + int ret = 0; if (context == NULL) { - /* No context - set the default apgetable and thats it */ + /* No context - set the default pagetable and thats it */ + unsigned int id; + /* + * If there isn't a current context, the kgsl_mmu_setstate + * will use the CPU path so we don't need to give + * it a valid context id. + */ + id = (adreno_dev->drawctxt_active != NULL) + ? adreno_dev->drawctxt_active->base.id + : KGSL_CONTEXT_INVALID; kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable, - adreno_dev->drawctxt_active->id); - return; + id); + return 0; } - KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags); - cmds[0] = cp_nop_packet(1); cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER; cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2); cmds[3] = device->memstore.gpuaddr + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context); - cmds[4] = context->id; - adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, + cmds[4] = context->base.id; + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, cmds, 5); - kgsl_mmu_setstate(&device->mmu, context->pagetable, context->id); + if (ret) + return ret; -#ifndef CONFIG_MSM_KGSL_CFF_DUMP_NO_CONTEXT_MEM_DUMP - kgsl_cffdump_syncmem(NULL, &context->gpustate, - context->gpustate.gpuaddr, LCC_SHADOW_SIZE + - REG_SHADOW_SIZE + CMD_BUFFER_SIZE + TEX_SHADOW_SIZE, false); -#endif + kgsl_mmu_setstate(&device->mmu, context->base.pagetable, + context->base.id); /* restore gmem. * (note: changes shader. shader must not already be restored.) */ if (context->flags & CTXT_FLAGS_GMEM_RESTORE) { - adreno_ringbuffer_issuecmds(device, context, + kgsl_cffdump_syncmem(NULL, &context->gpustate, + context->context_gmem_shadow.gmem_restore[1], + context->context_gmem_shadow.gmem_restore[2] << 2, + true); + + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_PMODE, context->context_gmem_shadow.gmem_restore, 3); + if (ret) + return ret; if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { + kgsl_cffdump_syncmem(NULL, &context->gpustate, + context->chicken_restore[1], + context->chicken_restore[2] << 2, true); + /* Restore TP0_CHICKEN */ - adreno_ringbuffer_issuecmds(device, context, + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, context->chicken_restore, 3); + if (ret) + return ret; } context->flags &= ~CTXT_FLAGS_GMEM_RESTORE; } if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { + kgsl_cffdump_syncmem(NULL, &context->gpustate, + context->reg_restore[1], + context->reg_restore[2] << 2, true); /* restore registers and constants. */ - adreno_ringbuffer_issuecmds(device, context, + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, context->reg_restore, 3); + if (ret) + return ret; /* restore shader instructions & partitioning. */ if (context->flags & CTXT_FLAGS_SHADER_RESTORE) { - adreno_ringbuffer_issuecmds(device, context, + kgsl_cffdump_syncmem(NULL, &context->gpustate, + context->shader_restore[1], + context->shader_restore[2] << 2, true); + + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, context->shader_restore, 3); + if (ret) + return ret; } } if (adreno_is_a20x(adreno_dev)) { cmds[0] = cp_type3_packet(CP_SET_BIN_BASE_OFFSET, 1); cmds[1] = context->bin_base_offset; - adreno_ringbuffer_issuecmds(device, context, + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, cmds, 2); } + + return ret; } /* @@ -1696,13 +1759,14 @@ static void a2xx_cp_intrcallback(struct kgsl_device *device) if (!status) { if (master_status & MASTER_INT_SIGNAL__CP_INT_STAT) { - /* This indicates that we could not read CP_INT_STAT. - * As a precaution just wake up processes so - * they can check their timestamps. Since, we - * did not ack any interrupts this interrupt will - * be generated again */ + /* + * This indicates that we could not read CP_INT_STAT. + * As a precaution schedule the dispatcher to check + * things out. Since we did not ack any interrupts this + * interrupt will be generated again + */ KGSL_DRV_WARN(device, "Unable to read CP_INT_STATUS\n"); - wake_up_interruptible_all(&device->wait_queue); + adreno_dispatcher_schedule(device); } else KGSL_DRV_WARN(device, "Spurious interrput detected\n"); return; @@ -1727,9 +1791,8 @@ static void a2xx_cp_intrcallback(struct kgsl_device *device) adreno_regwrite(device, REG_CP_INT_ACK, status); if (status & (CP_INT_CNTL__IB1_INT_MASK | CP_INT_CNTL__RB_INT_MASK)) { - KGSL_CMD_WARN(rb->device, "ringbuffer ib1/rb interrupt\n"); queue_work(device->work_queue, &device->ts_expired_ws); - wake_up_interruptible_all(&device->wait_queue); + adreno_dispatcher_schedule(device); } } @@ -1828,13 +1891,16 @@ static unsigned int a2xx_irq_pending(struct adreno_device *adreno_dev) (mh & kgsl_mmu_get_int_mask())) ? 1 : 0; } -static void a2xx_rb_init(struct adreno_device *adreno_dev, +static int a2xx_rb_init(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb) { unsigned int *cmds, cmds_gpu; /* ME_INIT */ cmds = adreno_ringbuffer_allocspace(rb, NULL, 19); + if (cmds == NULL) + return -ENOMEM; + cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-19); GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 18)); @@ -1887,6 +1953,8 @@ static void a2xx_rb_init(struct adreno_device *adreno_dev, GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); adreno_ringbuffer_submit(rb); + + return 0; } static unsigned int a2xx_busy_cycles(struct adreno_device *adreno_dev) diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c index 019a6c78a8eca4201524b6614e2a635e8a8296be..1e61279e10f70fb9ceca583d23926f4fb6f02fa7 100644 --- a/drivers/gpu/msm/adreno_a3xx.c +++ b/drivers/gpu/msm/adreno_a3xx.c @@ -445,6 +445,21 @@ static void build_regconstantsave_cmds(struct adreno_device *adreno_dev, tmp_ctx.cmd = cmd; } +unsigned int adreno_a3xx_rbbm_clock_ctl_default(struct adreno_device + *adreno_dev) +{ + if (adreno_is_a305(adreno_dev)) + return A305_RBBM_CLOCK_CTL_DEFAULT; + else if (adreno_is_a320(adreno_dev)) + return A320_RBBM_CLOCK_CTL_DEFAULT; + else if (adreno_is_a330v2(adreno_dev)) + return A330v2_RBBM_CLOCK_CTL_DEFAULT; + else if (adreno_is_a330(adreno_dev)) + return A330_RBBM_CLOCK_CTL_DEFAULT; + + BUG_ON(1); +} + /* Copy GMEM contents to system memory shadow. */ static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, struct adreno_context *drawctxt, @@ -454,7 +469,7 @@ static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, unsigned int *start = cmds; *cmds++ = cp_type0_packet(A3XX_RBBM_CLOCK_CTL, 1); - *cmds++ = A3XX_RBBM_CLOCK_CTL_DEFAULT; + *cmds++ = adreno_a3xx_rbbm_clock_ctl_default(adreno_dev); *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(A3XX_RB_MODE_CONTROL); @@ -1250,7 +1265,7 @@ static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, unsigned int *start = cmds; *cmds++ = cp_type0_packet(A3XX_RBBM_CLOCK_CTL, 1); - *cmds++ = A3XX_RBBM_CLOCK_CTL_DEFAULT; + *cmds++ = adreno_a3xx_rbbm_clock_ctl_default(adreno_dev); *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5); *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); @@ -2302,7 +2317,7 @@ static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev, tmp_ctx.gmem_base = adreno_dev->gmem_base; result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow, - drawctxt->pagetable, drawctxt->context_gmem_shadow.size); + drawctxt->base.pagetable, drawctxt->context_gmem_shadow.size); if (result) return result; @@ -2336,7 +2351,7 @@ static int a3xx_drawctxt_create(struct adreno_device *adreno_dev, */ ret = kgsl_allocate(&drawctxt->gpustate, - drawctxt->pagetable, CONTEXT_SIZE); + drawctxt->base.pagetable, CONTEXT_SIZE); if (ret) return ret; @@ -2362,32 +2377,38 @@ done: return ret; } -static void a3xx_drawctxt_save(struct adreno_device *adreno_dev, +static int a3xx_drawctxt_save(struct adreno_device *adreno_dev, struct adreno_context *context) { struct kgsl_device *device = &adreno_dev->dev; + int ret; if (context == NULL || (context->flags & CTXT_FLAGS_BEING_DESTROYED)) - return; + return 0; - if (context->flags & CTXT_FLAGS_GPU_HANG) - KGSL_CTXT_WARN(device, - "Current active context has caused gpu hang\n"); + if (context->state == ADRENO_CONTEXT_STATE_INVALID) + return 0; if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { /* Fixup self modifying IBs for save operations */ - adreno_ringbuffer_issuecmds(device, context, + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, context->save_fixup, 3); + if (ret) + return ret; /* save registers and constants. */ - adreno_ringbuffer_issuecmds(device, context, + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, context->regconstant_save, 3); + if (ret) + return ret; if (context->flags & CTXT_FLAGS_SHADER_SAVE) { /* Save shader instructions */ - adreno_ringbuffer_issuecmds(device, context, + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_PMODE, context->shader_save, 3); + if (ret) + return ret; context->flags |= CTXT_FLAGS_SHADER_RESTORE; } @@ -2400,38 +2421,60 @@ static void a3xx_drawctxt_save(struct adreno_device *adreno_dev, * already be saved.) */ - adreno_ringbuffer_issuecmds(device, context, + kgsl_cffdump_syncmem(context->base.device, + &context->gpustate, + context->context_gmem_shadow.gmem_save[1], + context->context_gmem_shadow.gmem_save[2] << 2, true); + + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_PMODE, context->context_gmem_shadow. gmem_save, 3); + if (ret) + return ret; + context->flags |= CTXT_FLAGS_GMEM_RESTORE; } + + return 0; } -static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev, +static int a3xx_drawctxt_restore(struct adreno_device *adreno_dev, struct adreno_context *context) { struct kgsl_device *device = &adreno_dev->dev; unsigned int cmds[5]; + int ret = 0; if (context == NULL) { /* No context - set the default pagetable and thats it */ + unsigned int id; + /* + * If there isn't a current context, the kgsl_mmu_setstate + * will use the CPU path so we don't need to give + * it a valid context id. + */ + id = (adreno_dev->drawctxt_active != NULL) + ? adreno_dev->drawctxt_active->base.id + : KGSL_CONTEXT_INVALID; kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable, - adreno_dev->drawctxt_active->id); - return; + id); + return 0; } - KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags); - cmds[0] = cp_nop_packet(1); cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER; cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2); cmds[3] = device->memstore.gpuaddr + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context); - cmds[4] = context->id; - adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, + cmds[4] = context->base.id; + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, cmds, 5); - kgsl_mmu_setstate(&device->mmu, context->pagetable, context->id); + if (ret) + return ret; + + kgsl_mmu_setstate(&device->mmu, context->base.pagetable, + context->base.id); /* * Restore GMEM. (note: changes shader. @@ -2439,43 +2482,63 @@ static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev, */ if (context->flags & CTXT_FLAGS_GMEM_RESTORE) { - adreno_ringbuffer_issuecmds(device, context, + kgsl_cffdump_syncmem(NULL, + &context->gpustate, + context->context_gmem_shadow.gmem_restore[1], + context->context_gmem_shadow.gmem_restore[2] << 2, + true); + + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_PMODE, context->context_gmem_shadow. gmem_restore, 3); + if (ret) + return ret; context->flags &= ~CTXT_FLAGS_GMEM_RESTORE; } if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { - adreno_ringbuffer_issuecmds(device, context, + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, context->reg_restore, 3); + if (ret) + return ret; /* Fixup self modifying IBs for restore operations */ - adreno_ringbuffer_issuecmds(device, context, + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, context->restore_fixup, 3); + if (ret) + return ret; - adreno_ringbuffer_issuecmds(device, context, + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, context->constant_restore, 3); + if (ret) + return ret; if (context->flags & CTXT_FLAGS_SHADER_RESTORE) - adreno_ringbuffer_issuecmds(device, context, + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, context->shader_restore, 3); - + if (ret) + return ret; /* Restore HLSQ_CONTROL_0 register */ - adreno_ringbuffer_issuecmds(device, context, + ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, context->hlsqcontrol_restore, 3); } + + return ret; } -static void a3xx_rb_init(struct adreno_device *adreno_dev, +static int a3xx_rb_init(struct adreno_device *adreno_dev, struct adreno_ringbuffer *rb) { unsigned int *cmds, cmds_gpu; cmds = adreno_ringbuffer_allocspace(rb, NULL, 18); + if (cmds == NULL) + return -ENOMEM; + cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18); GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17)); @@ -2499,6 +2562,8 @@ static void a3xx_rb_init(struct adreno_device *adreno_dev, GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); adreno_ringbuffer_submit(rb); + + return 0; } static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit) @@ -2525,6 +2590,9 @@ static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit) /* Clear the error */ adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3)); + + /* Trigger a fault in the interrupt handler */ + adreno_dispatcher_irq_fault(device); return; } case A3XX_INT_RBBM_REG_TIMEOUT: @@ -2566,8 +2634,13 @@ static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit) case A3XX_INT_UCHE_OOB_ACCESS: err = "UCHE: Out of bounds access"; break; + default: + return; } + /* Trigger a fault in the dispatcher - this will effect a restart */ + adreno_dispatcher_irq_fault(device); + KGSL_DRV_CRIT(device, "%s\n", err); kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF); } @@ -2576,11 +2649,276 @@ static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq) { struct kgsl_device *device = &adreno_dev->dev; - /* Wake up everybody waiting for the interrupt */ - wake_up_interruptible_all(&device->wait_queue); - - /* Schedule work to free mem and issue ibs */ + /* Schedule the event queue */ queue_work(device->work_queue, &device->ts_expired_ws); + + adreno_dispatcher_schedule(device); +} + +/** + * struct a3xx_perfcounter_register - Define a performance counter register + * @load_bit: the bit to set in RBBM_LOAD_CMD0/RBBM_LOAD_CMD1 to force the RBBM + * to load the reset value into the appropriate counter + * @select: The dword offset of the register to write the selected + * countable into + */ + +struct a3xx_perfcounter_register { + unsigned int load_bit; + unsigned int select; +}; + +static struct a3xx_perfcounter_register a3xx_perfcounter_reg_cp[] = { + { 0, A3XX_CP_PERFCOUNTER_SELECT }, +}; + +static struct a3xx_perfcounter_register a3xx_perfcounter_reg_rbbm[] = { + { 1, A3XX_RBBM_PERFCOUNTER0_SELECT }, + { 2, A3XX_RBBM_PERFCOUNTER1_SELECT }, +}; + +static struct a3xx_perfcounter_register a3xx_perfcounter_reg_pc[] = { + { 3, A3XX_PC_PERFCOUNTER0_SELECT }, + { 4, A3XX_PC_PERFCOUNTER1_SELECT }, + { 5, A3XX_PC_PERFCOUNTER2_SELECT }, + { 6, A3XX_PC_PERFCOUNTER3_SELECT }, +}; + +static struct a3xx_perfcounter_register a3xx_perfcounter_reg_vfd[] = { + { 7, A3XX_VFD_PERFCOUNTER0_SELECT }, + { 8, A3XX_VFD_PERFCOUNTER1_SELECT }, +}; + +static struct a3xx_perfcounter_register a3xx_perfcounter_reg_hlsq[] = { + { 9, A3XX_HLSQ_PERFCOUNTER0_SELECT }, + { 10, A3XX_HLSQ_PERFCOUNTER1_SELECT }, + { 11, A3XX_HLSQ_PERFCOUNTER2_SELECT }, + { 12, A3XX_HLSQ_PERFCOUNTER3_SELECT }, + { 13, A3XX_HLSQ_PERFCOUNTER4_SELECT }, + { 14, A3XX_HLSQ_PERFCOUNTER5_SELECT }, +}; + +static struct a3xx_perfcounter_register a3xx_perfcounter_reg_vpc[] = { + { 15, A3XX_VPC_PERFCOUNTER0_SELECT }, + { 16, A3XX_VPC_PERFCOUNTER1_SELECT }, +}; + +static struct a3xx_perfcounter_register a3xx_perfcounter_reg_tse[] = { + { 17, A3XX_GRAS_PERFCOUNTER0_SELECT }, + { 18, A3XX_GRAS_PERFCOUNTER1_SELECT }, +}; + +static struct a3xx_perfcounter_register a3xx_perfcounter_reg_ras[] = { + { 19, A3XX_GRAS_PERFCOUNTER2_SELECT }, + { 20, A3XX_GRAS_PERFCOUNTER3_SELECT }, +}; + +static struct a3xx_perfcounter_register a3xx_perfcounter_reg_uche[] = { + { 21, A3XX_UCHE_PERFCOUNTER0_SELECT }, + { 22, A3XX_UCHE_PERFCOUNTER1_SELECT }, + { 23, A3XX_UCHE_PERFCOUNTER2_SELECT }, + { 24, A3XX_UCHE_PERFCOUNTER3_SELECT }, + { 25, A3XX_UCHE_PERFCOUNTER4_SELECT }, + { 26, A3XX_UCHE_PERFCOUNTER5_SELECT }, +}; + +static struct a3xx_perfcounter_register a3xx_perfcounter_reg_tp[] = { + { 27, A3XX_TP_PERFCOUNTER0_SELECT }, + { 28, A3XX_TP_PERFCOUNTER1_SELECT }, + { 29, A3XX_TP_PERFCOUNTER2_SELECT }, + { 30, A3XX_TP_PERFCOUNTER3_SELECT }, + { 31, A3XX_TP_PERFCOUNTER4_SELECT }, + { 32, A3XX_TP_PERFCOUNTER5_SELECT }, +}; + +static struct a3xx_perfcounter_register a3xx_perfcounter_reg_sp[] = { + { 33, A3XX_SP_PERFCOUNTER0_SELECT }, + { 34, A3XX_SP_PERFCOUNTER1_SELECT }, + { 35, A3XX_SP_PERFCOUNTER2_SELECT }, + { 36, A3XX_SP_PERFCOUNTER3_SELECT }, + { 37, A3XX_SP_PERFCOUNTER4_SELECT }, + { 38, A3XX_SP_PERFCOUNTER5_SELECT }, + { 39, A3XX_SP_PERFCOUNTER6_SELECT }, + { 40, A3XX_SP_PERFCOUNTER7_SELECT }, +}; + +static struct a3xx_perfcounter_register a3xx_perfcounter_reg_rb[] = { + { 41, A3XX_RB_PERFCOUNTER0_SELECT }, + { 42, A3XX_RB_PERFCOUNTER1_SELECT }, +}; + +#define REGCOUNTER_GROUP(_x) { (_x), ARRAY_SIZE((_x)) } + +static struct { + struct a3xx_perfcounter_register *regs; + int count; +} a3xx_perfcounter_reglist[] = { + REGCOUNTER_GROUP(a3xx_perfcounter_reg_cp), + REGCOUNTER_GROUP(a3xx_perfcounter_reg_rbbm), + REGCOUNTER_GROUP(a3xx_perfcounter_reg_pc), + REGCOUNTER_GROUP(a3xx_perfcounter_reg_vfd), + REGCOUNTER_GROUP(a3xx_perfcounter_reg_hlsq), + REGCOUNTER_GROUP(a3xx_perfcounter_reg_vpc), + REGCOUNTER_GROUP(a3xx_perfcounter_reg_tse), + REGCOUNTER_GROUP(a3xx_perfcounter_reg_ras), + REGCOUNTER_GROUP(a3xx_perfcounter_reg_uche), + REGCOUNTER_GROUP(a3xx_perfcounter_reg_tp), + REGCOUNTER_GROUP(a3xx_perfcounter_reg_sp), + REGCOUNTER_GROUP(a3xx_perfcounter_reg_rb), +}; + +static void a3xx_perfcounter_enable_pwr(struct kgsl_device *device, + unsigned int countable) +{ + unsigned int in, out; + + adreno_regread(device, A3XX_RBBM_RBBM_CTL, &in); + + if (countable == 0) + out = in | RBBM_RBBM_CTL_RESET_PWR_CTR0; + else + out = in | RBBM_RBBM_CTL_RESET_PWR_CTR1; + + adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, out); + + if (countable == 0) + out = in | RBBM_RBBM_CTL_ENABLE_PWR_CTR0; + else + out = in | RBBM_RBBM_CTL_ENABLE_PWR_CTR1; + + adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, out); + + return; +} + +static void a3xx_perfcounter_enable_vbif(struct kgsl_device *device, + unsigned int counter, + unsigned int countable) +{ + unsigned int in, out, bit, sel; + + if (counter > 1 || countable > 0x7f) + return; + + adreno_regread(device, A3XX_VBIF_PERF_CNT_EN, &in); + adreno_regread(device, A3XX_VBIF_PERF_CNT_SEL, &sel); + + if (counter == 0) { + bit = VBIF_PERF_CNT_0; + sel = (sel & ~VBIF_PERF_CNT_0_SEL_MASK) | countable; + } else { + bit = VBIF_PERF_CNT_1; + sel = (sel & ~VBIF_PERF_CNT_1_SEL_MASK) + | (countable << VBIF_PERF_CNT_1_SEL); + } + + out = in | bit; + + adreno_regwrite(device, A3XX_VBIF_PERF_CNT_SEL, sel); + + adreno_regwrite(device, A3XX_VBIF_PERF_CNT_CLR, bit); + adreno_regwrite(device, A3XX_VBIF_PERF_CNT_CLR, 0); + + adreno_regwrite(device, A3XX_VBIF_PERF_CNT_EN, out); +} + +static void a3xx_perfcounter_enable_vbif_pwr(struct kgsl_device *device, + unsigned int countable) +{ + unsigned int in, out, bit; + + adreno_regread(device, A3XX_VBIF_PERF_CNT_EN, &in); + if (countable == 0) + bit = VBIF_PERF_PWR_CNT_0; + else if (countable == 1) + bit = VBIF_PERF_PWR_CNT_1; + else + bit = VBIF_PERF_PWR_CNT_2; + + out = in | bit; + + adreno_regwrite(device, A3XX_VBIF_PERF_CNT_CLR, bit); + adreno_regwrite(device, A3XX_VBIF_PERF_CNT_CLR, 0); + + adreno_regwrite(device, A3XX_VBIF_PERF_CNT_EN, out); +} + +/* + * a3xx_perfcounter_enable - Configure a performance counter for a countable + * @adreno_dev - Adreno device to configure + * @group - Desired performance counter group + * @counter - Desired performance counter in the group + * @countable - Desired countable + * + * Physically set up a counter within a group with the desired countable + */ + +static void a3xx_perfcounter_enable(struct adreno_device *adreno_dev, + unsigned int group, unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int val = 0; + struct a3xx_perfcounter_register *reg; + + if (group >= ARRAY_SIZE(a3xx_perfcounter_reglist)) + return; + + if (counter >= a3xx_perfcounter_reglist[group].count) + return; + + /* Special cases */ + if (group == KGSL_PERFCOUNTER_GROUP_PWR) + return a3xx_perfcounter_enable_pwr(device, countable); + else if (group == KGSL_PERFCOUNTER_GROUP_VBIF) + return a3xx_perfcounter_enable_vbif(device, counter, countable); + else if (group == KGSL_PERFCOUNTER_GROUP_VBIF_PWR) + return a3xx_perfcounter_enable_vbif_pwr(device, countable); + + reg = &(a3xx_perfcounter_reglist[group].regs[counter]); + + /* Select the desired perfcounter */ + adreno_regwrite(device, reg->select, countable); + + if (reg->load_bit < 32) { + val = 1 << reg->load_bit; + adreno_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD0, val); + } else { + val = 1 << (reg->load_bit - 32); + adreno_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD1, val); + } +} + +static uint64_t a3xx_perfcounter_read(struct adreno_device *adreno_dev, + unsigned int group, unsigned int counter, + unsigned int offset) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct a3xx_perfcounter_register *reg = NULL; + unsigned int lo = 0, hi = 0; + unsigned int val; + + if (group >= ARRAY_SIZE(a3xx_perfcounter_reglist)) + return 0; + + if (counter >= a3xx_perfcounter_reglist[group].count) + return 0; + + reg = &(a3xx_perfcounter_reglist[group].regs[counter]); + + /* Freeze the counter */ + adreno_regread(device, A3XX_RBBM_PERFCTR_CTL, &val); + val &= ~reg->load_bit; + adreno_regwrite(device, A3XX_RBBM_PERFCTR_CTL, val); + + /* Read the values */ + adreno_regread(device, offset, &lo); + adreno_regread(device, offset + 1, &hi); + + /* Re-Enable the counter */ + val |= reg->load_bit; + adreno_regwrite(device, A3XX_RBBM_PERFCTR_CTL, val); + + return (((uint64_t) hi) << 32) | lo; } #define A3XX_IRQ_CALLBACK(_c) { .func = _c } @@ -2684,26 +3022,22 @@ static unsigned int a3xx_irq_pending(struct adreno_device *adreno_dev) static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev) { struct kgsl_device *device = &adreno_dev->dev; - unsigned int reg, val; - - /* Freeze the counter */ - adreno_regread(device, A3XX_RBBM_RBBM_CTL, ®); - reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1; - adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg); + unsigned int val; + unsigned int ret = 0; /* Read the value */ adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val); - /* Reset the counter */ - reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1; - adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg); - - /* Re-enable the counter */ - reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1; - reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1; - adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg); + /* Return 0 for the first read */ + if (adreno_dev->gpu_cycles != 0) { + if (val < adreno_dev->gpu_cycles) + ret = (0xFFFFFFFF - adreno_dev->gpu_cycles) + val; + else + ret = val - adreno_dev->gpu_cycles; + } - return val; + adreno_dev->gpu_cycles = val; + return ret; } struct a3xx_vbif_data { @@ -2781,17 +3115,83 @@ static struct a3xx_vbif_data a330_vbif[] = { {0, 0}, }; +/* + * Most of the VBIF registers on 8974v2 have the correct values at power on, so + * we won't modify those if we don't need to + */ +static struct a3xx_vbif_data a330v2_vbif[] = { + /* Enable 1k sort */ + { A3XX_VBIF_ABIT_SORT, 0x0001003F }, + { A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 }, + /* Enable WR-REQ */ + { A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003F }, + { A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 }, + /* Set up VBIF_ROUND_ROBIN_QOS_ARB */ + { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 }, + /* Disable VBIF clock gating. This is to enable AXI running + * higher frequency than GPU. + */ + { A3XX_VBIF_CLKON, 1 }, + {0, 0}, +}; + +static struct { + int(*devfunc)(struct adreno_device *); + struct a3xx_vbif_data *vbif; +} a3xx_vbif_platforms[] = { + { adreno_is_a305, a305_vbif }, + { adreno_is_a320, a320_vbif }, + /* A330v2 needs to be ahead of A330 so the right device matches */ + { adreno_is_a330v2, a330v2_vbif }, + { adreno_is_a330, a330_vbif }, +}; + +static void a3xx_perfcounter_init(struct adreno_device *adreno_dev) +{ + /* + * Set SP to count SP_ALU_ACTIVE_CYCLES, it includes + * all ALU instruction execution regardless precision or shader ID. + * Set SP to count SP0_ICL1_MISSES, It counts + * USP L1 instruction miss request. + * Set SP to count SP_FS_FULL_ALU_INSTRUCTIONS, it + * counts USP flow control instruction execution. + * we will use this to augment our hang detection + */ + if (adreno_dev->fast_hang_detect) { + adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP, + SP_ALU_ACTIVE_CYCLES, &ft_detect_regs[6], + PERFCOUNTER_FLAG_KERNEL); + ft_detect_regs[7] = ft_detect_regs[6] + 1; + adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP, + SP0_ICL1_MISSES, &ft_detect_regs[8], + PERFCOUNTER_FLAG_KERNEL); + ft_detect_regs[9] = ft_detect_regs[8] + 1; + adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP, + SP_FS_CFLOW_INSTRUCTIONS, &ft_detect_regs[10], + PERFCOUNTER_FLAG_KERNEL); + ft_detect_regs[11] = ft_detect_regs[10] + 1; + } + + adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP, + SP_FS_FULL_ALU_INSTRUCTIONS, NULL, PERFCOUNTER_FLAG_KERNEL); + + /* Reserve and start countable 1 in the PWR perfcounter group */ + adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1, + NULL, PERFCOUNTER_FLAG_KERNEL); +} + static void a3xx_start(struct adreno_device *adreno_dev) { struct kgsl_device *device = &adreno_dev->dev; struct a3xx_vbif_data *vbif = NULL; + int i; - if (adreno_is_a305(adreno_dev)) - vbif = a305_vbif; - else if (adreno_is_a320(adreno_dev)) - vbif = a320_vbif; - else if (adreno_is_a330(adreno_dev)) - vbif = a330_vbif; + for (i = 0; i < ARRAY_SIZE(a3xx_vbif_platforms); i++) { + if (a3xx_vbif_platforms[i].devfunc(adreno_dev)) { + vbif = a3xx_vbif_platforms[i].vbif; + break; + } + } BUG_ON(vbif == NULL); @@ -2829,7 +3229,14 @@ static void a3xx_start(struct adreno_device *adreno_dev) /* Enable Clock gating */ adreno_regwrite(device, A3XX_RBBM_CLOCK_CTL, - A3XX_RBBM_CLOCK_CTL_DEFAULT); + adreno_a3xx_rbbm_clock_ctl_default(adreno_dev)); + + if (adreno_is_a330v2(adreno_dev)) + adreno_regwrite(device, A3XX_RBBM_GPR0_CTL, + A330v2_RBBM_GPR0_CTL_DEFAULT); + else if (adreno_is_a330(adreno_dev)) + adreno_regwrite(device, A3XX_RBBM_GPR0_CTL, + A330_RBBM_GPR0_CTL_DEFAULT); /* Set the OCMEM base address for A330 */ if (adreno_is_a330(adreno_dev)) { @@ -2840,25 +3247,133 @@ static void a3xx_start(struct adreno_device *adreno_dev) /* Turn on performance counters */ adreno_regwrite(device, A3XX_RBBM_PERFCTR_CTL, 0x01); - /* - * Set SP perfcounter 5 to count SP_ALU_ACTIVE_CYCLES, it includes - * all ALU instruction execution regardless precision or shader ID. - * Set SP perfcounter 6 to count SP0_ICL1_MISSES, It counts - * USP L1 instruction miss request. - * Set SP perfcounter 7 to count SP_FS_FULL_ALU_INSTRUCTIONS, it - * counts USP flow control instruction execution. - * we will use this to augment our hang detection - */ - if (adreno_dev->fast_hang_detect) { - adreno_regwrite(device, A3XX_SP_PERFCOUNTER5_SELECT, - SP_ALU_ACTIVE_CYCLES); - adreno_regwrite(device, A3XX_SP_PERFCOUNTER6_SELECT, - SP0_ICL1_MISSES); - adreno_regwrite(device, A3XX_SP_PERFCOUNTER7_SELECT, - SP_FS_CFLOW_INSTRUCTIONS); - } + /* Turn on the GPU busy counter and let it run free */ + + adreno_dev->gpu_cycles = 0; } +/* + * Define the available perfcounter groups - these get used by + * adreno_perfcounter_get and adreno_perfcounter_put + */ + +static struct adreno_perfcount_register a3xx_perfcounters_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_CP_0_LO, 0 }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_rbbm[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RBBM_0_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RBBM_1_LO, 0 }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_pc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PC_0_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PC_1_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PC_2_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PC_3_LO, 0 }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_vfd[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_VFD_0_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_VFD_1_LO, 0 }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_hlsq[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_0_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_1_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_2_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_3_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_4_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_5_LO, 0 }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_vpc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_VPC_0_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_VPC_1_LO, 0 }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_tse[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TSE_0_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TSE_1_LO, 0 }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_ras[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RAS_0_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RAS_1_LO, 0 }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_uche[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_0_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_1_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_2_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_3_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_4_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_5_LO, 0 }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_tp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_0_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_1_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_2_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_3_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_4_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_5_LO, 0 }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_sp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_0_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_1_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_2_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_3_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_4_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_5_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_6_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_7_LO, 0 }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_rb[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RB_0_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RB_1_LO, 0 }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_pwr[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PWR_0_LO, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PWR_1_LO, 0 }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_vbif[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_VBIF_PERF_CNT0_LO }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_VBIF_PERF_CNT1_LO }, +}; +static struct adreno_perfcount_register a3xx_perfcounters_vbif_pwr[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_VBIF_PERF_PWR_CNT0_LO }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_VBIF_PERF_PWR_CNT1_LO }, + { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_VBIF_PERF_PWR_CNT2_LO }, +}; + +static struct adreno_perfcount_group a3xx_perfcounter_groups[] = { + { a3xx_perfcounters_cp, ARRAY_SIZE(a3xx_perfcounters_cp) }, + { a3xx_perfcounters_rbbm, ARRAY_SIZE(a3xx_perfcounters_rbbm) }, + { a3xx_perfcounters_pc, ARRAY_SIZE(a3xx_perfcounters_pc) }, + { a3xx_perfcounters_vfd, ARRAY_SIZE(a3xx_perfcounters_vfd) }, + { a3xx_perfcounters_hlsq, ARRAY_SIZE(a3xx_perfcounters_hlsq) }, + { a3xx_perfcounters_vpc, ARRAY_SIZE(a3xx_perfcounters_vpc) }, + { a3xx_perfcounters_tse, ARRAY_SIZE(a3xx_perfcounters_tse) }, + { a3xx_perfcounters_ras, ARRAY_SIZE(a3xx_perfcounters_ras) }, + { a3xx_perfcounters_uche, ARRAY_SIZE(a3xx_perfcounters_uche) }, + { a3xx_perfcounters_tp, ARRAY_SIZE(a3xx_perfcounters_tp) }, + { a3xx_perfcounters_sp, ARRAY_SIZE(a3xx_perfcounters_sp) }, + { a3xx_perfcounters_rb, ARRAY_SIZE(a3xx_perfcounters_rb) }, + { a3xx_perfcounters_pwr, ARRAY_SIZE(a3xx_perfcounters_pwr) }, + { a3xx_perfcounters_vbif, ARRAY_SIZE(a3xx_perfcounters_vbif) }, + { a3xx_perfcounters_vbif_pwr, ARRAY_SIZE(a3xx_perfcounters_vbif_pwr) }, +}; + +static struct adreno_perfcounters a3xx_perfcounters = { + a3xx_perfcounter_groups, + ARRAY_SIZE(a3xx_perfcounter_groups), +}; + /* Defined in adreno_a3xx_snapshot.c */ void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot, int *remain, int hang); @@ -2867,16 +3382,20 @@ struct adreno_gpudev adreno_a3xx_gpudev = { .reg_rbbm_status = A3XX_RBBM_STATUS, .reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR, .reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA, + .perfcounters = &a3xx_perfcounters, .ctxt_create = a3xx_drawctxt_create, .ctxt_save = a3xx_drawctxt_save, .ctxt_restore = a3xx_drawctxt_restore, .ctxt_draw_workaround = NULL, .rb_init = a3xx_rb_init, + .perfcounter_init = a3xx_perfcounter_init, .irq_control = a3xx_irq_control, .irq_handler = a3xx_irq_handler, .irq_pending = a3xx_irq_pending, .busy_cycles = a3xx_busy_cycles, .start = a3xx_start, .snapshot = a3xx_snapshot, + .perfcounter_enable = a3xx_perfcounter_enable, + .perfcounter_read = a3xx_perfcounter_read, }; diff --git a/drivers/gpu/msm/adreno_a3xx_snapshot.c b/drivers/gpu/msm/adreno_a3xx_snapshot.c index d9d5ec8286d8c1dbd65fc58427e5ee7eb6bc7626..34cac7944479aad4c2fdd9fa928e15fccb52c754 100644 --- a/drivers/gpu/msm/adreno_a3xx_snapshot.c +++ b/drivers/gpu/msm/adreno_a3xx_snapshot.c @@ -21,6 +21,22 @@ #define SHADER_MEMORY_SIZE 0x4000 +/** + * _rbbm_debug_bus_read - Helper function to read data from the RBBM + * debug bus. + * @device - GPU device to read/write registers + * @block_id - Debug bus block to read from + * @index - Index in the debug bus block to read + * @ret - Value of the register read + */ +static void _rbbm_debug_bus_read(struct kgsl_device *device, + unsigned int block_id, unsigned int index, unsigned int *val) +{ + unsigned int block = (block_id << 8) | 1 << 16; + adreno_regwrite(device, A3XX_RBBM_DEBUG_BUS_CTL, block | index); + adreno_regread(device, A3XX_RBBM_DEBUG_BUS_DATA_STATUS, val); +} + static int a3xx_snapshot_shader_memory(struct kgsl_device *device, void *snapshot, int remain, void *priv) { @@ -243,11 +259,8 @@ static int a3xx_snapshot_debugbus_block(struct kgsl_device *device, header->id = id; header->count = DEBUGFS_BLOCK_SIZE; - for (i = 0; i < DEBUGFS_BLOCK_SIZE; i++) { - adreno_regwrite(device, A3XX_RBBM_DEBUG_BUS_CTL, val | i); - adreno_regread(device, A3XX_RBBM_DEBUG_BUS_DATA_STATUS, - &data[i]); - } + for (i = 0; i < DEBUGFS_BLOCK_SIZE; i++) + _rbbm_debug_bus_read(device, id, i, &data[i]); return size; } @@ -309,18 +322,58 @@ static void _snapshot_hlsq_regs(struct kgsl_snapshot_registers *regs, struct kgsl_snapshot_registers_list *list, struct adreno_device *adreno_dev) { - /* HLSQ specific registers */ + struct kgsl_device *device = &adreno_dev->dev; + /* - * Don't dump any a3xx HLSQ registers just yet. Reading the HLSQ - * registers can cause the device to hang if the HLSQ block is - * busy. Add specific checks for each a3xx core as the requirements - * are discovered. Disable by default for now. + * Trying to read HLSQ registers when the HLSQ block is busy + * will cause the device to hang. The RBBM_DEBUG_BUS has information + * that will tell us if the HLSQ block is busy or not. Read values + * from the debug bus to ensure the HLSQ block is not busy (this + * is hardware dependent). If the HLSQ block is busy do not + * dump the registers, otherwise dump the HLSQ registers. */ - if (!adreno_is_a3xx(adreno_dev)) { - regs[list->count].regs = (unsigned int *) a3xx_hlsq_registers; - regs[list->count].count = a3xx_hlsq_registers_count; - list->count++; + + if (adreno_is_a330(adreno_dev)) { + /* + * stall_ctxt_full status bit: RBBM_BLOCK_ID_HLSQ index 49 [27] + * + * if (!stall_context_full) + * then dump HLSQ registers + */ + unsigned int stall_context_full = 0; + + _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 49, + &stall_context_full); + stall_context_full &= 0x08000000; + + if (stall_context_full) + return; + } else { + /* + * tpif status bits: RBBM_BLOCK_ID_HLSQ index 4 [4:0] + * spif status bits: RBBM_BLOCK_ID_HLSQ index 7 [5:0] + * + * if ((tpif == 0, 1, 28) && (spif == 0, 1, 10)) + * then dump HLSQ registers + */ + unsigned int next_pif = 0; + + /* check tpif */ + _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 4, &next_pif); + next_pif &= 0x1f; + if (next_pif != 0 && next_pif != 1 && next_pif != 28) + return; + + /* check spif */ + _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 7, &next_pif); + next_pif &= 0x3f; + if (next_pif != 0 && next_pif != 1 && next_pif != 10) + return; } + + regs[list->count].regs = (unsigned int *) a3xx_hlsq_registers; + regs[list->count].count = a3xx_hlsq_registers_count; + list->count++; } static void _snapshot_a330_regs(struct kgsl_snapshot_registers *regs, @@ -414,7 +467,7 @@ void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot, /* Enable Clock gating */ adreno_regwrite(device, A3XX_RBBM_CLOCK_CTL, - A3XX_RBBM_CLOCK_CTL_DEFAULT); + adreno_a3xx_rbbm_clock_ctl_default(adreno_dev)); return snapshot; } diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c index 890c8a1805919018a3c8097c07c3dfb21785f86f..e6e4d769d7962bcfdb2fa2de5dd3c8b7b7abf496 100644 --- a/drivers/gpu/msm/adreno_debugfs.c +++ b/drivers/gpu/msm/adreno_debugfs.c @@ -43,6 +43,17 @@ static int kgsl_cff_dump_enable_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(kgsl_cff_dump_enable_fops, kgsl_cff_dump_enable_get, kgsl_cff_dump_enable_set, "%llu\n"); +static int _active_count_get(void *data, u64 *val) +{ + struct kgsl_device *device = data; + unsigned int i = atomic_read(&device->active_cnt); + + *val = (u64) i; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(_active_count_fops, _active_count_get, NULL, "%llu\n"); + typedef void (*reg_read_init_t)(struct kgsl_device *device); typedef void (*reg_read_fill_t)(struct kgsl_device *device, int i, unsigned int *vals, int linec); @@ -64,23 +75,18 @@ void adreno_debugfs_init(struct kgsl_device *device) adreno_dev->fast_hang_detect = 1; debugfs_create_u32("fast_hang_detect", 0644, device->d_debugfs, &adreno_dev->fast_hang_detect); - - /* Top level switch to enable/disable userspace FT control */ - adreno_dev->ft_user_control = 0; - debugfs_create_u32("ft_user_control", 0644, device->d_debugfs, - &adreno_dev->ft_user_control); /* * FT policy can be set to any of the options below. - * KGSL_FT_DISABLE -> BIT(0) Set to disable FT + * KGSL_FT_OFF -> BIT(0) Set to turn off FT * KGSL_FT_REPLAY -> BIT(1) Set to enable replay * KGSL_FT_SKIPIB -> BIT(2) Set to skip IB * KGSL_FT_SKIPFRAME -> BIT(3) Set to skip frame + * KGSL_FT_DISABLE -> BIT(4) Set to disable FT for faulting context * by default set FT policy to KGSL_FT_DEFAULT_POLICY */ adreno_dev->ft_policy = KGSL_FT_DEFAULT_POLICY; debugfs_create_u32("ft_policy", 0644, device->d_debugfs, &adreno_dev->ft_policy); - /* By default enable long IB detection */ adreno_dev->long_ib_detect = 1; debugfs_create_u32("long_ib_detect", 0644, device->d_debugfs, @@ -96,7 +102,10 @@ void adreno_debugfs_init(struct kgsl_device *device) * KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT -> BIT(3) Set to log only one * pagefault per INT. */ - adreno_dev->ft_pf_policy = KGSL_FT_PAGEFAULT_DEFAULT_POLICY; - debugfs_create_u32("ft_pagefault_policy", 0644, device->d_debugfs, - &adreno_dev->ft_pf_policy); + adreno_dev->ft_pf_policy = KGSL_FT_PAGEFAULT_DEFAULT_POLICY; + debugfs_create_u32("ft_pagefault_policy", 0644, device->d_debugfs, + &adreno_dev->ft_pf_policy); + + debugfs_create_file("active_cnt", 0644, device->d_debugfs, device, + &_active_count_fops); } diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c new file mode 100644 index 0000000000000000000000000000000000000000..72b73b6fa784235716184a3b34e82edcf4537325 --- /dev/null +++ b/drivers/gpu/msm/adreno_dispatch.c @@ -0,0 +1,1415 @@ +/* Copyright (c) 2013, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/wait.h> +#include <linux/delay.h> +#include <linux/sched.h> +#include <linux/jiffies.h> +#include <linux/err.h> + +#include "kgsl.h" +#include "adreno.h" +#include "adreno_ringbuffer.h" +#include "adreno_trace.h" + +#define ADRENO_DISPATCHER_ACTIVE 0 +#define ADRENO_DISPATCHER_PAUSE 1 + +#define ADRENO_DISPATCHER_SOFT_FAULT 1 +#define ADRENO_DISPATCHER_HARD_FAULT 2 +#define ADRENO_DISPATCHER_TIMEOUT_FAULT 3 + +#define CMDQUEUE_NEXT(_i, _s) (((_i) + 1) % (_s)) + +/* Number of commands that can be queued in a context before it sleeps */ +static unsigned int _context_cmdqueue_size = 50; + +/* Number of milliseconds to wait for the context queue to clear */ +static unsigned int _context_queue_wait = 10000; + +/* Number of command batches sent at a time from a single context */ +static unsigned int _context_cmdbatch_burst = 5; + +/* Number of command batches inflight in the ringbuffer at any time */ +static unsigned int _dispatcher_inflight = 15; + +/* Command batch timeout (in milliseconds) */ +static unsigned int _cmdbatch_timeout = 2000; + +/* Interval for reading and comparing fault detection registers */ +static unsigned int _fault_timer_interval = 100; + +/* Local array for the current set of fault detect registers */ +static unsigned int *fault_detect_regs; + +/** + * fault_detect_read() - Read the set of fault detect registers + * @device: Pointer to the KGSL device struct + * + * Read the set of fault detect registers and store them in the local array. + * This is for the initial values that are compared later with + * fault_detect_read_compare + */ +static void fault_detect_read(struct kgsl_device *device) +{ + int i; + + for (i = 0; i < FT_DETECT_REGS_COUNT; i++) { + if (ft_detect_regs[i] == 0) + continue; + adreno_regread(device, ft_detect_regs[i], + &fault_detect_regs[i]); + } +} + +/** + * fault_detect_read_compare() - Read the fault detect registers and compare + * them to the current value + * @device: Pointer to the KGSL device struct + * + * Read the set of fault detect registers and compare them to the current set + * of registers. Return 1 if any of the register values changed + */ +static int fault_detect_read_compare(struct kgsl_device *device) +{ + int i, ret = 0; + + for (i = 0; i < FT_DETECT_REGS_COUNT; i++) { + unsigned int val; + + if (ft_detect_regs[i] == 0) + continue; + adreno_regread(device, ft_detect_regs[i], &val); + if (val != fault_detect_regs[i]) + ret = 1; + fault_detect_regs[i] = val; + } + + return ret; +} + +/** + * adreno_context_get_cmdbatch() - Get a new command from a context queue + * @drawctxt: Pointer to the adreno draw context + * + * Dequeue a new command batch from the context list + */ +static inline struct kgsl_cmdbatch *adreno_context_get_cmdbatch( + struct adreno_context *drawctxt) +{ + struct kgsl_cmdbatch *cmdbatch = NULL; + + mutex_lock(&drawctxt->mutex); + if (drawctxt->cmdqueue_head != drawctxt->cmdqueue_tail) { + cmdbatch = drawctxt->cmdqueue[drawctxt->cmdqueue_head]; + + /* + * Don't dequeue a cmdbatch that is still waiting for other + * events + */ + if (kgsl_cmdbatch_sync_pending(cmdbatch)) { + cmdbatch = ERR_PTR(-EAGAIN); + goto done; + } + + drawctxt->cmdqueue_head = + CMDQUEUE_NEXT(drawctxt->cmdqueue_head, + ADRENO_CONTEXT_CMDQUEUE_SIZE); + drawctxt->queued--; + } + +done: + mutex_unlock(&drawctxt->mutex); + + return cmdbatch; +} + +/** + * adreno_context_requeue_cmdbatch() - Put a command back on the context queue + * @drawctxt: Pointer to the adreno draw context + * @cmdbatch: Pointer to the KGSL cmdbatch to requeue + * + * Failure to submit a command to the ringbuffer isn't the fault of the command + * being submitted so if a failure happens, push it back on the head of the the + * context queue to be reconsidered again + */ +static inline void adreno_context_requeue_cmdbatch( + struct adreno_context *drawctxt, struct kgsl_cmdbatch *cmdbatch) +{ + unsigned int prev; + mutex_lock(&drawctxt->mutex); + + prev = drawctxt->cmdqueue_head - 1; + + if (prev < 0) + prev = ADRENO_CONTEXT_CMDQUEUE_SIZE - 1; + + /* + * The maximum queue size always needs to be one less then the size of + * the ringbuffer queue so there is "room" to put the cmdbatch back in + */ + + BUG_ON(prev == drawctxt->cmdqueue_tail); + + drawctxt->cmdqueue[prev] = cmdbatch; + drawctxt->queued++; + + /* Reset the command queue head to reflect the newly requeued change */ + drawctxt->cmdqueue_head = prev; + mutex_unlock(&drawctxt->mutex); +} + +/** + * dispatcher_queue_context() - Queue a context in the dispatcher pending list + * @dispatcher: Pointer to the adreno dispatcher struct + * @drawctxt: Pointer to the adreno draw context + * + * Add a context to the dispatcher pending list. + */ +static void dispatcher_queue_context(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + + spin_lock(&dispatcher->plist_lock); + + + if (plist_node_empty(&drawctxt->pending)) { + /* Get a reference to the context while it sits on the list */ + _kgsl_context_get(&drawctxt->base); + trace_dispatch_queue_context(drawctxt); + plist_add(&drawctxt->pending, &dispatcher->pending); + } + + spin_unlock(&dispatcher->plist_lock); +} + +/** + * sendcmd() - Send a command batch to the GPU hardware + * @dispatcher: Pointer to the adreno dispatcher struct + * @cmdbatch: Pointer to the KGSL cmdbatch being sent + * + * Send a KGSL command batch to the GPU hardware + */ +static int sendcmd(struct adreno_device *adreno_dev, + struct kgsl_cmdbatch *cmdbatch) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + int ret; + + dispatcher->inflight++; + + mutex_lock(&device->mutex); + + if (dispatcher->inflight == 1) { + /* Time to make the donuts. Turn on the GPU */ + ret = kgsl_active_count_get(device); + if (ret) { + dispatcher->inflight--; + mutex_unlock(&device->mutex); + return ret; + } + } + + ret = adreno_ringbuffer_submitcmd(adreno_dev, cmdbatch); + + /* + * On the first command, if the submission was successful, then read the + * fault registers. If it failed then turn off the GPU. Sad face. + */ + + if (dispatcher->inflight == 1) { + if (ret == 0) + fault_detect_read(device); + else + kgsl_active_count_put(device); + } + + mutex_unlock(&device->mutex); + + if (ret) { + dispatcher->inflight--; + KGSL_DRV_ERR(device, + "Unable to submit command to the ringbuffer\n"); + return ret; + } + + trace_adreno_cmdbatch_submitted(cmdbatch, dispatcher->inflight); + + dispatcher->cmdqueue[dispatcher->tail] = cmdbatch; + dispatcher->tail = (dispatcher->tail + 1) % + ADRENO_DISPATCH_CMDQUEUE_SIZE; + + /* + * If this is the first command in the pipe then the GPU will + * immediately start executing it so we can start the expiry timeout on + * the command batch here. Subsequent command batches will have their + * timer started when the previous command batch is retired + */ + if (dispatcher->inflight == 1) { + cmdbatch->expires = jiffies + + msecs_to_jiffies(_cmdbatch_timeout); + mod_timer(&dispatcher->timer, cmdbatch->expires); + + /* Start the fault detection timer */ + if (adreno_dev->fast_hang_detect) + mod_timer(&dispatcher->fault_timer, + jiffies + + msecs_to_jiffies(_fault_timer_interval)); + } + + return 0; +} + +/** + * dispatcher_context_sendcmds() - Send commands from a context to the GPU + * @adreno_dev: Pointer to the adreno device struct + * @drawctxt: Pointer to the adreno context to dispatch commands from + * + * Dequeue and send a burst of commands from the specified context to the GPU + */ +static int dispatcher_context_sendcmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + int count = 0; + + /* + * Each context can send a specific number of command batches per cycle + */ + for ( ; count < _context_cmdbatch_burst && + dispatcher->inflight < _dispatcher_inflight; count++) { + int ret; + struct kgsl_cmdbatch *cmdbatch = + adreno_context_get_cmdbatch(drawctxt); + + if (cmdbatch == NULL) + break; + + /* + * adreno_context_get_cmdbatch returns -EAGAIN if the current + * cmdbatch has pending sync points so no more to do here. + * When the sync points are satisfied then the context will get + * reqeueued + */ + + if (IS_ERR(cmdbatch)) + return count; + + /* + * If this is a synchronization submission then there are no + * commands to submit. Discard it and get the next item from + * the queue. Decrement count so this packet doesn't count + * against the burst for the context + */ + + if (cmdbatch->flags & KGSL_CONTEXT_SYNC) { + count--; + kgsl_cmdbatch_destroy(cmdbatch); + continue; + } + + ret = sendcmd(adreno_dev, cmdbatch); + + /* + * There are various reasons why we can't submit a command (no + * memory for the commands, full ringbuffer, etc) but none of + * these are actually the current command's fault. Requeue it + * back on the context and let it come back around again if + * conditions improve + */ + if (ret) { + adreno_context_requeue_cmdbatch(drawctxt, cmdbatch); + break; + } + } + + /* + * If the context successfully submitted commands, then + * unconditionally put it back on the queue to be considered the + * next time around. This might seem a little wasteful but it is + * reasonable to think that a busy context will stay busy. + */ + + if (count) { + dispatcher_queue_context(adreno_dev, drawctxt); + + /* + * If we submitted something there will be room in the + * context queue so ping the context wait queue on the + * chance that the context is snoozing + */ + + wake_up_interruptible_all(&drawctxt->wq); + } + + return count; +} + +/** + * _adreno_dispatcher_issuecmds() - Issue commmands from pending contexts + * @adreno_dev: Pointer to the adreno device struct + * + * Issue as many commands as possible (up to inflight) from the pending contexts + * This function assumes the dispatcher mutex has been locked. + */ +static int _adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + + /* Don't do anything if the dispatcher is paused */ + if (dispatcher->state != ADRENO_DISPATCHER_ACTIVE) + return 0; + + while (dispatcher->inflight < _dispatcher_inflight) { + struct adreno_context *drawctxt = NULL; + + spin_lock(&dispatcher->plist_lock); + + if (!plist_head_empty(&dispatcher->pending)) { + drawctxt = plist_first_entry(&dispatcher->pending, + struct adreno_context, pending); + + plist_del(&drawctxt->pending, &dispatcher->pending); + } + + spin_unlock(&dispatcher->plist_lock); + + if (drawctxt == NULL) + break; + + if (kgsl_context_detached(&drawctxt->base) || + drawctxt->state == ADRENO_CONTEXT_STATE_INVALID) { + kgsl_context_put(&drawctxt->base); + continue; + } + + dispatcher_context_sendcmds(adreno_dev, drawctxt); + kgsl_context_put(&drawctxt->base); + } + + return 0; +} + +/** + * adreno_dispatcher_issuecmds() - Issue commmands from pending contexts + * @adreno_dev: Pointer to the adreno device struct + * + * Lock the dispatcher and call _adreno_dispatcher_issueibcmds + */ +int adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + int ret; + + mutex_lock(&dispatcher->mutex); + ret = _adreno_dispatcher_issuecmds(adreno_dev); + mutex_unlock(&dispatcher->mutex); + + return ret; +} + +static int _check_context_queue(struct adreno_context *drawctxt) +{ + int ret; + + mutex_lock(&drawctxt->mutex); + + /* + * Wake up if there is room in the context or if the whole thing got + * invalidated while we were asleep + */ + + if (drawctxt->state == ADRENO_CONTEXT_STATE_INVALID) + ret = 1; + else + ret = drawctxt->queued < _context_cmdqueue_size ? 1 : 0; + + mutex_unlock(&drawctxt->mutex); + + return ret; +} + +/** + * get_timestamp() - Return the next timestamp for the context + * @drawctxt - Pointer to an adreno draw context struct + * @cmdbatch - Pointer to a command batch + * @timestamp - Pointer to a timestamp value possibly passed from the user + * + * Assign a timestamp based on the settings of the draw context and the command + * batch. + */ +static int get_timestamp(struct adreno_context *drawctxt, + struct kgsl_cmdbatch *cmdbatch, unsigned int *timestamp) +{ + /* Synchronization commands don't get a timestamp */ + if (cmdbatch->flags & KGSL_CONTEXT_SYNC) { + *timestamp = 0; + return 0; + } + + if (drawctxt->flags & CTXT_FLAGS_USER_GENERATED_TS) { + /* + * User specified timestamps need to be greater than the last + * issued timestamp in the context + */ + if (timestamp_cmp(drawctxt->timestamp, *timestamp) >= 0) + return -ERANGE; + + drawctxt->timestamp = *timestamp; + } else + drawctxt->timestamp++; + + *timestamp = drawctxt->timestamp; + return 0; +} + +/** + * adreno_context_queue_cmd() - Queue a new command in the context + * @adreno_dev: Pointer to the adreno device struct + * @drawctxt: Pointer to the adreno draw context + * @cmdbatch: Pointer to the command batch being submitted + * @timestamp: Pointer to the requested timestamp + * + * Queue a command in the context - if there isn't any room in the queue, then + * block until there is + */ +int adreno_context_queue_cmd(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, struct kgsl_cmdbatch *cmdbatch, + uint32_t *timestamp) +{ + int ret; + + mutex_lock(&drawctxt->mutex); + + if (drawctxt->flags & CTXT_FLAGS_BEING_DESTROYED) { + mutex_unlock(&drawctxt->mutex); + return -EINVAL; + } + + /* + * After skipping to the end of the frame we need to force the preamble + * to run (if it exists) regardless of the context state. + */ + + if (drawctxt->flags & CTXT_FLAGS_FORCE_PREAMBLE) { + cmdbatch->priv |= CMDBATCH_FLAG_FORCE_PREAMBLE; + drawctxt->flags &= ~CTXT_FLAGS_FORCE_PREAMBLE; + } + + /* + * If we are waiting for the end of frame and it hasn't appeared yet, + * then mark the command batch as skipped. It will still progress + * through the pipeline but it won't actually send any commands + */ + + if (drawctxt->flags & CTXT_FLAGS_SKIP_EOF) { + cmdbatch->priv |= CMDBATCH_FLAG_SKIP; + + /* + * If this command batch represents the EOF then clear the way + * for the dispatcher to continue submitting + */ + + if (cmdbatch->flags & KGSL_CONTEXT_END_OF_FRAME) { + drawctxt->flags &= ~CTXT_FLAGS_SKIP_EOF; + + /* + * Force the preamble on the next command to ensure that + * the state is correct + */ + + drawctxt->flags |= CTXT_FLAGS_FORCE_PREAMBLE; + } + } + + /* Wait for room in the context queue */ + + while (drawctxt->queued >= _context_cmdqueue_size) { + trace_adreno_context_sleep(drawctxt); + mutex_unlock(&drawctxt->mutex); + + ret = wait_event_interruptible_timeout(drawctxt->wq, + _check_context_queue(drawctxt), + msecs_to_jiffies(_context_queue_wait)); + + mutex_lock(&drawctxt->mutex); + trace_adreno_context_wake(drawctxt); + + if (ret <= 0) { + mutex_unlock(&drawctxt->mutex); + return (ret == 0) ? -ETIMEDOUT : (int) ret; + } + + /* + * Account for the possiblity that the context got invalidated + * while we were sleeping + */ + + if (drawctxt->state == ADRENO_CONTEXT_STATE_INVALID) { + mutex_unlock(&drawctxt->mutex); + return -EDEADLK; + } + } + + ret = get_timestamp(drawctxt, cmdbatch, timestamp); + if (ret) { + mutex_unlock(&drawctxt->mutex); + return ret; + } + + cmdbatch->timestamp = *timestamp; + + /* The batch fault policy is the current system fault policy */ + cmdbatch->fault_policy = adreno_dev->ft_policy; + + /* Put the command into the queue */ + drawctxt->cmdqueue[drawctxt->cmdqueue_tail] = cmdbatch; + drawctxt->cmdqueue_tail = (drawctxt->cmdqueue_tail + 1) % + ADRENO_CONTEXT_CMDQUEUE_SIZE; + + drawctxt->queued++; + trace_adreno_cmdbatch_queued(cmdbatch, drawctxt->queued); + + + mutex_unlock(&drawctxt->mutex); + + /* Add the context to the dispatcher pending list */ + dispatcher_queue_context(adreno_dev, drawctxt); + + /* + * Only issue commands if inflight is less than burst -this prevents us + * from sitting around waiting for the mutex on a busy system - the work + * loop will schedule it for us. Inflight is mutex protected but the + * worse that can happen is that it will go to 0 after we check and if + * it goes to 0 it is because the work loop decremented it and the work + * queue will try to schedule new commands anyway. + */ + + if (adreno_dev->dispatcher.inflight < _context_cmdbatch_burst) + adreno_dispatcher_issuecmds(adreno_dev); + + return 0; +} + +/* + * If an IB inside of the command batch has a gpuaddr that matches the base + * passed in then zero the size which effectively skips it when it is submitted + * in the ringbuffer. + */ +static void cmdbatch_skip_ib(struct kgsl_cmdbatch *cmdbatch, unsigned int base) +{ + int i; + + for (i = 0; i < cmdbatch->ibcount; i++) { + if (cmdbatch->ibdesc[i].gpuaddr == base) { + cmdbatch->ibdesc[i].sizedwords = 0; + return; + } + } +} + +static void cmdbatch_skip_frame(struct kgsl_cmdbatch *cmdbatch, + struct kgsl_cmdbatch **replay, int count) +{ + struct adreno_context *drawctxt = ADRENO_CONTEXT(cmdbatch->context); + int skip = 1; + int i; + + for (i = 0; i < count; i++) { + + /* + * Only operate on command batches that belong to the + * faulting context + */ + + if (replay[i]->context->id != cmdbatch->context->id) + continue; + + /* + * Skip all the command batches in this context until + * the EOF flag is seen. If the EOF flag is seen then + * force the preamble for the next command. + */ + + if (skip) { + replay[i]->priv |= CMDBATCH_FLAG_SKIP; + + if (replay[i]->flags & KGSL_CONTEXT_END_OF_FRAME) + skip = 0; + } else { + replay[i]->priv |= CMDBATCH_FLAG_FORCE_PREAMBLE; + return; + } + } + + /* + * If the EOF flag hasn't been seen yet then set the flag in the + * drawctxt to keep looking for it + */ + + if (skip && drawctxt) + drawctxt->flags |= CTXT_FLAGS_SKIP_EOF; + + /* + * If we did see the EOF flag then force the preamble on for the + * next command issued on this context + */ + + if (!skip && drawctxt) + drawctxt->flags |= CTXT_FLAGS_FORCE_PREAMBLE; +} + +static void remove_invalidated_cmdbatches(struct kgsl_device *device, + struct kgsl_cmdbatch **replay, int count) +{ + int i; + + for (i = 0; i < count; i++) { + struct kgsl_cmdbatch *cmd = replay[i]; + struct adreno_context *drawctxt; + + if (cmd == NULL) + continue; + + drawctxt = ADRENO_CONTEXT(cmd->context); + + if (kgsl_context_detached(cmd->context) || + drawctxt->state == ADRENO_CONTEXT_STATE_INVALID) { + replay[i] = NULL; + + mutex_lock(&device->mutex); + kgsl_cancel_events_timestamp(device, cmd->context, + cmd->timestamp); + mutex_unlock(&device->mutex); + + kgsl_cmdbatch_destroy(cmd); + } + } +} + +static void dispatcher_do_fault(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + unsigned int ptr; + unsigned int reg, base; + struct kgsl_cmdbatch **replay = NULL; + struct kgsl_cmdbatch *cmdbatch; + int ret, i, count = 0; + + BUG_ON(dispatcher->inflight == 0); + + /* Turn off all the timers */ + del_timer_sync(&dispatcher->timer); + del_timer_sync(&dispatcher->fault_timer); + + mutex_lock(&device->mutex); + + cmdbatch = dispatcher->cmdqueue[dispatcher->head]; + + /* + * If the fault was due to a timeout then stop the CP to ensure we don't + * get activity while we are trying to dump the state of the system + */ + + if (dispatcher->fault == ADRENO_DISPATCHER_TIMEOUT_FAULT) { + kgsl_regread(device, REG_CP_ME_CNTL, ®); + reg |= (1 << 27) | (1 << 28); + kgsl_regwrite(device, REG_CP_ME_CNTL, reg); + + /* Skip the PM dump for a timeout because it confuses people */ + cmdbatch->fault_policy |= KGSL_FT_SKIP_PMDUMP; + } + + kgsl_regread(device, REG_CP_IB1_BASE, &base); + + /* + * Dump the postmortem and snapshot information if this is the first + * detected fault for the oldest active command batch + */ + + if (!(cmdbatch->fault_policy & KGSL_FT_SKIP_PMDUMP)) { + kgsl_postmortem_dump(device, 0); + kgsl_device_snapshot(device, 1); + } + + mutex_unlock(&device->mutex); + + /* Allocate memory to store the inflight commands */ + replay = kzalloc(sizeof(*replay) * dispatcher->inflight, GFP_KERNEL); + + if (replay == NULL) { + unsigned int ptr = dispatcher->head; + + while (ptr != dispatcher->tail) { + struct kgsl_context *context = + dispatcher->cmdqueue[ptr]->context; + + adreno_drawctxt_invalidate(device, context); + kgsl_cmdbatch_destroy(dispatcher->cmdqueue[ptr]); + + ptr = CMDQUEUE_NEXT(ptr, ADRENO_DISPATCH_CMDQUEUE_SIZE); + } + + /* + * Set the replay count to zero - this will ensure that the + * hardware gets reset but nothing else goes played + */ + + count = 0; + goto replay; + } + + /* Copy the inflight command batches into the temporary storage */ + ptr = dispatcher->head; + + while (ptr != dispatcher->tail) { + replay[count++] = dispatcher->cmdqueue[ptr]; + ptr = CMDQUEUE_NEXT(ptr, ADRENO_DISPATCH_CMDQUEUE_SIZE); + } + + /* + * For the purposes of replay, we assume that the oldest command batch + * that hasn't retired a timestamp is "hung". + */ + + cmdbatch = replay[0]; + + /* + * Set a flag so we don't print another PM dump if the cmdbatch fails + * again on replay + */ + + cmdbatch->fault_policy |= KGSL_FT_SKIP_PMDUMP; + + /* + * A hardware fault generally means something was deterministically + * wrong with the command batch - no point in trying to replay it + * Clear the replay bit and move on to the next policy level + */ + + if (dispatcher->fault == ADRENO_DISPATCHER_HARD_FAULT) + cmdbatch->fault_policy &= ~KGSL_FT_REPLAY; + + /* + * A timeout fault means the IB timed out - don't be silly and replay + * it, because it will probably timeout again + */ + + if (dispatcher->fault == ADRENO_DISPATCHER_TIMEOUT_FAULT) + cmdbatch->fault_policy &= ~KGSL_FT_REPLAY; + + /* + * Execute the fault tolerance policy. Each command batch stores the + * current fault policy that was set when it was queued. + * As the options are tried in descending priority + * (REPLAY -> SKIPIBS -> SKIPFRAME -> NOTHING) the bits are cleared + * from the cmdbatch policy so the next thing can be tried if the + * change comes around again + */ + + /* Replay the hanging command batch again */ + if (cmdbatch->fault_policy & KGSL_FT_REPLAY) { + cmdbatch->fault_policy &= ~KGSL_FT_REPLAY; + goto replay; + } + + /* + * Skip the last IB1 that was played but replay everything else. + * Note that the last IB1 might not be in the "hung" command batch + * because the CP may have caused a page-fault while it was prefetching + * the next IB1/IB2. walk all outstanding commands and zap the + * supposedly bad IB1 where ever it lurks. + */ + + if (cmdbatch->fault_policy & KGSL_FT_SKIPIB) { + cmdbatch->fault_policy &= ~KGSL_FT_SKIPIB; + + for (i = 0; i < count; i++) { + if (replay[i] != NULL) + cmdbatch_skip_ib(replay[i], base); + } + + goto replay; + } + + if (cmdbatch->fault_policy & KGSL_FT_SKIPFRAME) { + + cmdbatch->fault_policy &= ~KGSL_FT_SKIPFRAME; + + /* + * Skip all the pending command batches for this context until + * the EOF frame is seen + */ + cmdbatch_skip_frame(cmdbatch, replay, count); + goto replay; + } + + /* If we get here then all the policies failed or FT is disabled */ + + /* Invalidate the context */ + adreno_drawctxt_invalidate(device, cmdbatch->context); + + /* Remove any pending command batches that have been invalidated */ + remove_invalidated_cmdbatches(device, replay, count); + +replay: + /* Reset the dispatcher queue */ + dispatcher->inflight = 0; + dispatcher->head = dispatcher->tail = 0; + + /* Reset the GPU */ + mutex_lock(&device->mutex); + ret = adreno_reset(device); + mutex_unlock(&device->mutex); + + /* If adreno_reset() fails then what hope do we have for the future? */ + BUG_ON(ret); + + /* + * Force the preamble on the first command (if applicable) to avoid any + * strange stage issues + */ + + if (replay[0]) + replay[0]->priv |= CMDBATCH_FLAG_FORCE_PREAMBLE; + + /* Replay the pending command buffers */ + for (i = 0; i < count; i++) { + + int ret; + + if (replay[i] == NULL) + continue; + + /* + * Force each command batch to wait for idle - this avoids wierd + * CP parse issues + */ + + replay[i]->flags |= KGSL_CMD_FLAGS_WFI; + + ret = sendcmd(adreno_dev, replay[i]); + + /* + * If sending the command fails, then try to recover by + * invalidating the context + */ + + if (ret) { + adreno_drawctxt_invalidate(device, cmdbatch->context); + + remove_invalidated_cmdbatches(device, &replay[i], + count - i); + } + } + + mutex_lock(&device->mutex); + kgsl_active_count_put(device); + mutex_unlock(&device->mutex); + + kfree(replay); +} + +static inline int cmdbatch_consumed(struct kgsl_cmdbatch *cmdbatch, + unsigned int consumed, unsigned int retired) +{ + return ((timestamp_cmp(cmdbatch->timestamp, consumed) >= 0) && + (timestamp_cmp(retired, cmdbatch->timestamp) < 0)); +} + +/** + * adreno_dispatcher_work() - Master work handler for the dispatcher + * @work: Pointer to the work struct for the current work queue + * + * Process expired commands and send new ones. + */ +static void adreno_dispatcher_work(struct work_struct *work) +{ + struct adreno_dispatcher *dispatcher = + container_of(work, struct adreno_dispatcher, work); + struct adreno_device *adreno_dev = + container_of(dispatcher, struct adreno_device, dispatcher); + struct kgsl_device *device = &adreno_dev->dev; + int count = 0; + + mutex_lock(&dispatcher->mutex); + + while (dispatcher->head != dispatcher->tail) { + uint32_t consumed, retired = 0; + struct kgsl_cmdbatch *cmdbatch = + dispatcher->cmdqueue[dispatcher->head]; + struct adreno_context *drawctxt; + BUG_ON(cmdbatch == NULL); + + drawctxt = ADRENO_CONTEXT(cmdbatch->context); + + /* + * First try to expire the timestamp. This happens if the + * context is valid and the timestamp expired normally or if the + * context was destroyed before the command batch was finished + * in the GPU. Either way retire the command batch advance the + * pointers and continue processing the queue + */ + + if (!kgsl_context_detached(cmdbatch->context)) + retired = kgsl_readtimestamp(device, cmdbatch->context, + KGSL_TIMESTAMP_RETIRED); + + if (kgsl_context_detached(cmdbatch->context) || + (timestamp_cmp(cmdbatch->timestamp, retired) <= 0)) { + + trace_adreno_cmdbatch_retired(cmdbatch, + dispatcher->inflight - 1); + + /* Reduce the number of inflight command batches */ + dispatcher->inflight--; + + /* Zero the old entry*/ + dispatcher->cmdqueue[dispatcher->head] = NULL; + + /* Advance the buffer head */ + dispatcher->head = CMDQUEUE_NEXT(dispatcher->head, + ADRENO_DISPATCH_CMDQUEUE_SIZE); + + /* Destroy the retired command batch */ + kgsl_cmdbatch_destroy(cmdbatch); + + /* Update the expire time for the next command batch */ + + if (dispatcher->inflight > 0) { + cmdbatch = + dispatcher->cmdqueue[dispatcher->head]; + cmdbatch->expires = jiffies + + msecs_to_jiffies(_cmdbatch_timeout); + } + + count++; + continue; + } + + /* + * If we got a fault from the interrupt handler, this command + * is to blame. Invalidate it, reset and replay + */ + + if (dispatcher->fault) { + dispatcher_do_fault(device); + dispatcher->fault = 0; + goto done; + } + + /* Get the last consumed timestamp */ + consumed = kgsl_readtimestamp(device, cmdbatch->context, + KGSL_TIMESTAMP_CONSUMED); + + /* + * Break here if fault detection is dsiabled for the context or + * if the long running IB detection is disabled device wide + * Long running command buffers will be allowed to run to + * completion - but badly behaving command buffers (infinite + * shaders etc) can end up running forever. + */ + + if (!adreno_dev->long_ib_detect || + drawctxt->flags & CTXT_FLAGS_NO_FAULT_TOLERANCE) + break; + + /* + * The last line of defense is to check if the command batch has + * timed out. If we get this far but the timeout hasn't expired + * yet then the GPU is still ticking away + */ + + if (time_is_after_jiffies(cmdbatch->expires)) + break; + + /* Boom goes the dynamite */ + + KGSL_DRV_ERR(device, + "Context %d, timestamp %d ran too long\n", + drawctxt->base.id, drawctxt->timestamp); + + dispatcher->fault = ADRENO_DISPATCHER_TIMEOUT_FAULT; + + dispatcher_do_fault(device); + break; + } + + /* + * Decrement the active count to 0 - this will allow the system to go + * into suspend even if there are queued command batches + */ + + if (count && dispatcher->inflight == 0) { + mutex_lock(&device->mutex); + kgsl_active_count_put(device); + mutex_unlock(&device->mutex); + } + + /* Dispatch new commands if we have the room */ + if (dispatcher->inflight < _dispatcher_inflight) + _adreno_dispatcher_issuecmds(adreno_dev); + +done: + /* Either update the timer for the next command batch or disable it */ + if (dispatcher->inflight) { + struct kgsl_cmdbatch *cmdbatch + = dispatcher->cmdqueue[dispatcher->head]; + + /* Update the timeout timer for the next command batch */ + mod_timer(&dispatcher->timer, cmdbatch->expires); + } else { + del_timer_sync(&dispatcher->timer); + del_timer_sync(&dispatcher->fault_timer); + } + + /* Before leaving update the pwrscale information */ + mutex_lock(&device->mutex); + kgsl_pwrscale_idle(device); + mutex_unlock(&device->mutex); + + mutex_unlock(&dispatcher->mutex); +} + +void adreno_dispatcher_schedule(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + + queue_work(device->work_queue, &dispatcher->work); +} + +/** + * adreno_dispatcher_queue_context() - schedule a drawctxt in the dispatcher + * device: pointer to the KGSL device + * drawctxt: pointer to the drawctxt to schedule + * + * Put a draw context on the dispatcher pending queue and schedule the + * dispatcher. This is used to reschedule changes that might have been blocked + * for sync points or other concerns + */ +void adreno_dispatcher_queue_context(struct kgsl_device *device, + struct adreno_context *drawctxt) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + dispatcher_queue_context(adreno_dev, drawctxt); + adreno_dispatcher_schedule(device); +} + +/* + * This is called on a regular basis while command batches are inflight. Fault + * detection registers are read and compared to the existing values - if they + * changed then the GPU is still running. If they are the same between + * subsequent calls then the GPU may have faulted + */ + +void adreno_dispatcher_fault_timer(unsigned long data) +{ + struct adreno_device *adreno_dev = (struct adreno_device *) data; + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + + /* Leave if the user decided to turn off fast hang detection */ + if (adreno_dev->fast_hang_detect == 0) + return; + + /* Don't do anything if the dispatcher is idle or faulted */ + if (dispatcher->inflight == 0 || dispatcher->fault) + return; + + /* Make sure the device is active before trying a read */ + if (device->state != KGSL_STATE_ACTIVE) + return; + + /* + * Read the fault registers - if it returns 0 then they haven't changed + * so mark the dispatcher as faulted and schedule the work loop. + */ + + if (!fault_detect_read_compare(device)) { + dispatcher->fault = ADRENO_DISPATCHER_SOFT_FAULT; + adreno_dispatcher_schedule(device); + } else { + mod_timer(&dispatcher->fault_timer, + jiffies + msecs_to_jiffies(_fault_timer_interval)); + } +} + +/* + * This is called when the timer expires - it either means the GPU is hung or + * the IB is taking too long to execute + */ +void adreno_dispatcher_timer(unsigned long data) +{ + struct adreno_device *adreno_dev = (struct adreno_device *) data; + struct kgsl_device *device = &adreno_dev->dev; + + adreno_dispatcher_schedule(device); +} +/** + * adreno_dispatcher_irq_fault() - Trigger a fault in the dispatcher + * @device: Pointer to the KGSL device + * + * Called from an interrupt context this will trigger a fault in the + * dispatcher for the oldest pending command batch + */ +void adreno_dispatcher_irq_fault(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + + dispatcher->fault = ADRENO_DISPATCHER_HARD_FAULT; + adreno_dispatcher_schedule(device); +} + +/** + * adreno_dispatcher_pause() - stop the dispatcher + * @adreno_dev: pointer to the adreno device structure + * + * Pause the dispather so it doesn't accept any new commands + */ +void adreno_dispatcher_pause(struct adreno_device *adreno_dev) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + + /* + * This will probably get called while holding other mutexes so don't + * take the dispatcher mutex. The biggest penalty is that another + * command might be submitted while we are in here but thats okay + * because whoever is waiting for the drain will just have another + * command batch to wait for + */ + + dispatcher->state = ADRENO_DISPATCHER_PAUSE; +} + +/** + * adreno_dispatcher_start() - activate the dispatcher + * @adreno_dev: pointer to the adreno device structure + * + * Set the disaptcher active and start the loop once to get things going + */ +void adreno_dispatcher_start(struct adreno_device *adreno_dev) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + + dispatcher->state = ADRENO_DISPATCHER_ACTIVE; + + /* Schedule the work loop to get things going */ + adreno_dispatcher_schedule(&adreno_dev->dev); +} + +/** + * adreno_dispatcher_stop() - stop the dispatcher + * @adreno_dev: pointer to the adreno device structure + * + * Stop the dispatcher and close all the timers + */ +void adreno_dispatcher_stop(struct adreno_device *adreno_dev) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + + del_timer_sync(&dispatcher->timer); + del_timer_sync(&dispatcher->fault_timer); +} + +/** + * adreno_dispatcher_close() - close the dispatcher + * @adreno_dev: pointer to the adreno device structure + * + * Close the dispatcher and free all the oustanding commands and memory + */ +void adreno_dispatcher_close(struct adreno_device *adreno_dev) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + + mutex_lock(&dispatcher->mutex); + del_timer_sync(&dispatcher->timer); + del_timer_sync(&dispatcher->fault_timer); + + while (dispatcher->head != dispatcher->tail) { + kgsl_cmdbatch_destroy(dispatcher->cmdqueue[dispatcher->head]); + dispatcher->head = (dispatcher->head + 1) + % ADRENO_DISPATCH_CMDQUEUE_SIZE; + } + + kfree(fault_detect_regs); + fault_detect_regs = NULL; + + mutex_unlock(&dispatcher->mutex); + + kobject_put(&dispatcher->kobj); +} + +struct dispatcher_attribute { + struct attribute attr; + ssize_t (*show)(struct adreno_dispatcher *, + struct dispatcher_attribute *, char *); + ssize_t (*store)(struct adreno_dispatcher *, + struct dispatcher_attribute *, const char *buf, + size_t count); + unsigned int max; + unsigned int *value; +}; + +#define DISPATCHER_UINT_ATTR(_name, _mode, _max, _value) \ + struct dispatcher_attribute dispatcher_attr_##_name = { \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ + .show = _show_uint, \ + .store = _store_uint, \ + .max = _max, \ + .value = &(_value), \ + } + +#define to_dispatcher_attr(_a) \ + container_of((_a), struct dispatcher_attribute, attr) +#define to_dispatcher(k) container_of(k, struct adreno_dispatcher, kobj) + +static ssize_t _store_uint(struct adreno_dispatcher *dispatcher, + struct dispatcher_attribute *attr, + const char *buf, size_t size) +{ + unsigned long val; + int ret = kstrtoul(buf, 0, &val); + + if (ret) + return ret; + + if (!val || (attr->max && (val > attr->max))) + return -EINVAL; + + *((unsigned int *) attr->value) = val; + return size; +} + +static ssize_t _show_uint(struct adreno_dispatcher *dispatcher, + struct dispatcher_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", + *((unsigned int *) attr->value)); +} + +static DISPATCHER_UINT_ATTR(inflight, 0644, ADRENO_DISPATCH_CMDQUEUE_SIZE, + _dispatcher_inflight); +/* + * Our code that "puts back" a command from the context is much cleaner + * if we are sure that there will always be enough room in the + * ringbuffer so restrict the maximum size of the context queue to + * ADRENO_CONTEXT_CMDQUEUE_SIZE - 1 + */ +static DISPATCHER_UINT_ATTR(context_cmdqueue_size, 0644, + ADRENO_CONTEXT_CMDQUEUE_SIZE - 1, _context_cmdqueue_size); +static DISPATCHER_UINT_ATTR(context_burst_count, 0644, 0, + _context_cmdbatch_burst); +static DISPATCHER_UINT_ATTR(cmdbatch_timeout, 0644, 0, _cmdbatch_timeout); +static DISPATCHER_UINT_ATTR(context_queue_wait, 0644, 0, _context_queue_wait); +static DISPATCHER_UINT_ATTR(fault_detect_interval, 0644, 0, + _fault_timer_interval); + +static struct attribute *dispatcher_attrs[] = { + &dispatcher_attr_inflight.attr, + &dispatcher_attr_context_cmdqueue_size.attr, + &dispatcher_attr_context_burst_count.attr, + &dispatcher_attr_cmdbatch_timeout.attr, + &dispatcher_attr_context_queue_wait.attr, + &dispatcher_attr_fault_detect_interval.attr, + NULL, +}; + +static ssize_t dispatcher_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct adreno_dispatcher *dispatcher = to_dispatcher(kobj); + struct dispatcher_attribute *pattr = to_dispatcher_attr(attr); + ssize_t ret = -EIO; + + if (pattr->show) + ret = pattr->show(dispatcher, pattr, buf); + + return ret; +} + +static ssize_t dispatcher_sysfs_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t count) +{ + struct adreno_dispatcher *dispatcher = to_dispatcher(kobj); + struct dispatcher_attribute *pattr = to_dispatcher_attr(attr); + ssize_t ret = -EIO; + + if (pattr->store) + ret = pattr->store(dispatcher, pattr, buf, count); + + return ret; +} + +static void dispatcher_sysfs_release(struct kobject *kobj) +{ +} + +static const struct sysfs_ops dispatcher_sysfs_ops = { + .show = dispatcher_sysfs_show, + .store = dispatcher_sysfs_store +}; + +static struct kobj_type ktype_dispatcher = { + .sysfs_ops = &dispatcher_sysfs_ops, + .default_attrs = dispatcher_attrs, + .release = dispatcher_sysfs_release +}; + +/** + * adreno_dispatcher_init() - Initialize the dispatcher + * @adreno_dev: pointer to the adreno device structure + * + * Initialize the dispatcher + */ +int adreno_dispatcher_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + int ret; + + memset(dispatcher, 0, sizeof(*dispatcher)); + + mutex_init(&dispatcher->mutex); + + setup_timer(&dispatcher->timer, adreno_dispatcher_timer, + (unsigned long) adreno_dev); + + setup_timer(&dispatcher->fault_timer, adreno_dispatcher_fault_timer, + (unsigned long) adreno_dev); + + INIT_WORK(&dispatcher->work, adreno_dispatcher_work); + + plist_head_init(&dispatcher->pending); + spin_lock_init(&dispatcher->plist_lock); + + dispatcher->state = ADRENO_DISPATCHER_ACTIVE; + + ret = kobject_init_and_add(&dispatcher->kobj, &ktype_dispatcher, + &device->dev->kobj, "dispatch"); + + fault_detect_regs = kzalloc(FT_DETECT_REGS_COUNT * sizeof(unsigned int), + GFP_KERNEL); + + if (fault_detect_regs == NULL) + ret = -ENOMEM; + + return ret; +} diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c index 8f0bca27b0f2d60276ff59a120b6c95a80eb795f..55802c62fe07a5e8f09565d51ac3c2ac344bb1aa 100644 --- a/drivers/gpu/msm/adreno_drawctxt.c +++ b/drivers/gpu/msm/adreno_drawctxt.c @@ -13,10 +13,12 @@ #include <linux/slab.h> #include <linux/msm_kgsl.h> +#include <linux/sched.h> #include "kgsl.h" #include "kgsl_sharedmem.h" #include "adreno.h" +#include "adreno_trace.h" #define KGSL_INIT_REFTIMESTAMP 0x7FFFFFFF @@ -132,6 +134,245 @@ void build_quad_vtxbuff(struct adreno_context *drawctxt, *incmd = cmd; } +static void wait_callback(struct kgsl_device *device, void *priv, u32 id, + u32 timestamp, u32 type) +{ + struct adreno_context *drawctxt = priv; + wake_up_interruptible_all(&drawctxt->waiting); +} + +#define adreno_wait_event_interruptible_timeout(wq, condition, timeout, io) \ +({ \ + long __ret = timeout; \ + if (io) \ + __wait_io_event_interruptible_timeout(wq, condition, __ret); \ + else \ + __wait_event_interruptible_timeout(wq, condition, __ret); \ + __ret; \ +}) + +#define adreno_wait_event_interruptible(wq, condition, io) \ +({ \ + long __ret; \ + if (io) \ + __wait_io_event_interruptible(wq, condition, __ret); \ + else \ + __wait_event_interruptible(wq, condition, __ret); \ + __ret; \ +}) + +static int _check_context_timestamp(struct kgsl_device *device, + struct adreno_context *drawctxt, unsigned int timestamp) +{ + int ret = 0; + + /* Bail if the drawctxt has been invalidated or destroyed */ + if (kgsl_context_detached(&drawctxt->base) || + drawctxt->state != ADRENO_CONTEXT_STATE_ACTIVE) + return 1; + + mutex_lock(&device->mutex); + ret = kgsl_check_timestamp(device, &drawctxt->base, timestamp); + mutex_unlock(&device->mutex); + + return ret; +} + +/** + * adreno_drawctxt_wait() - sleep until a timestamp expires + * @adreno_dev: pointer to the adreno_device struct + * @drawctxt: Pointer to the draw context to sleep for + * @timetamp: Timestamp to wait on + * @timeout: Number of jiffies to wait (0 for infinite) + * + * Register an event to wait for a timestamp on a context and sleep until it + * has past. Returns < 0 on error, -ETIMEDOUT if the timeout expires or 0 + * on success + */ +int adreno_drawctxt_wait(struct adreno_device *adreno_dev, + struct kgsl_context *context, + uint32_t timestamp, unsigned int timeout) +{ + static unsigned int io_cnt; + struct kgsl_device *device = &adreno_dev->dev; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + int ret, io; + + if (kgsl_context_detached(context)) + return -EINVAL; + + if (drawctxt->state == ADRENO_CONTEXT_STATE_INVALID) + return -EDEADLK; + + /* Needs to hold the device mutex */ + BUG_ON(!mutex_is_locked(&device->mutex)); + + trace_adreno_drawctxt_wait_start(context->id, timestamp); + + ret = kgsl_add_event(device, context->id, timestamp, + wait_callback, drawctxt, NULL); + if (ret) + goto done; + + /* + * For proper power accounting sometimes we need to call + * io_wait_interruptible_timeout and sometimes we need to call + * plain old wait_interruptible_timeout. We call the regular + * timeout N times out of 100, where N is a number specified by + * the current power level + */ + + io_cnt = (io_cnt + 1) % 100; + io = (io_cnt < pwr->pwrlevels[pwr->active_pwrlevel].io_fraction) + ? 0 : 1; + + mutex_unlock(&device->mutex); + + if (timeout) { + ret = (int) adreno_wait_event_interruptible_timeout( + drawctxt->waiting, + _check_context_timestamp(device, drawctxt, timestamp), + msecs_to_jiffies(timeout), io); + + if (ret == 0) + ret = -ETIMEDOUT; + else if (ret > 0) + ret = 0; + } else { + ret = (int) adreno_wait_event_interruptible(drawctxt->waiting, + _check_context_timestamp(device, drawctxt, timestamp), + io); + } + + mutex_lock(&device->mutex); + + /* -EDEADLK if the context was invalidated while we were waiting */ + if (drawctxt->state == ADRENO_CONTEXT_STATE_INVALID) + ret = -EDEADLK; + + + /* Return -EINVAL if the context was detached while we were waiting */ + if (kgsl_context_detached(context)) + ret = -EINVAL; + +done: + trace_adreno_drawctxt_wait_done(context->id, timestamp, ret); + return ret; +} + +static void global_wait_callback(struct kgsl_device *device, void *priv, u32 id, + u32 timestamp, u32 type) +{ + struct adreno_context *drawctxt = priv; + + wake_up_interruptible_all(&drawctxt->waiting); + kgsl_context_put(&drawctxt->base); +} + +static int _check_global_timestamp(struct kgsl_device *device, + unsigned int timestamp) +{ + int ret; + + mutex_lock(&device->mutex); + ret = kgsl_check_timestamp(device, NULL, timestamp); + mutex_unlock(&device->mutex); + + return ret; +} + +int adreno_drawctxt_wait_global(struct adreno_device *adreno_dev, + struct kgsl_context *context, + uint32_t timestamp, unsigned int timeout) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + int ret; + + /* Needs to hold the device mutex */ + BUG_ON(!mutex_is_locked(&device->mutex)); + + _kgsl_context_get(context); + + trace_adreno_drawctxt_wait_start(KGSL_MEMSTORE_GLOBAL, timestamp); + + ret = kgsl_add_event(device, KGSL_MEMSTORE_GLOBAL, timestamp, + global_wait_callback, drawctxt, NULL); + if (ret) { + kgsl_context_put(context); + goto done; + } + + mutex_unlock(&device->mutex); + + if (timeout) { + ret = (int) wait_event_interruptible_timeout(drawctxt->waiting, + _check_global_timestamp(device, timestamp), + msecs_to_jiffies(timeout)); + + if (ret == 0) + ret = -ETIMEDOUT; + else if (ret > 0) + ret = 0; + } else { + ret = (int) wait_event_interruptible(drawctxt->waiting, + _check_global_timestamp(device, timestamp)); + } + + mutex_lock(&device->mutex); + + if (ret) + kgsl_cancel_events_timestamp(device, NULL, timestamp); + +done: + trace_adreno_drawctxt_wait_done(KGSL_MEMSTORE_GLOBAL, timestamp, ret); + return ret; +} + +/** + * adreno_drawctxt_invalidate() - Invalidate an adreno draw context + * @device: Pointer to the KGSL device structure for the GPU + * @context: Pointer to the KGSL context structure + * + * Invalidate the context and remove all queued commands and cancel any pending + * waiters + */ +void adreno_drawctxt_invalidate(struct kgsl_device *device, + struct kgsl_context *context) +{ + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + + drawctxt->state = ADRENO_CONTEXT_STATE_INVALID; + + /* Clear the pending queue */ + mutex_lock(&drawctxt->mutex); + + while (drawctxt->cmdqueue_head != drawctxt->cmdqueue_tail) { + struct kgsl_cmdbatch *cmdbatch = + drawctxt->cmdqueue[drawctxt->cmdqueue_head]; + + drawctxt->cmdqueue_head = (drawctxt->cmdqueue_head + 1) % + ADRENO_CONTEXT_CMDQUEUE_SIZE; + + mutex_unlock(&drawctxt->mutex); + + mutex_lock(&device->mutex); + kgsl_cancel_events_timestamp(device, context, + cmdbatch->timestamp); + mutex_unlock(&device->mutex); + + kgsl_cmdbatch_destroy(cmdbatch); + mutex_lock(&drawctxt->mutex); + } + + mutex_unlock(&drawctxt->mutex); + + /* Give the bad news to everybody waiting around */ + wake_up_interruptible_all(&drawctxt->waiting); + wake_up_interruptible_all(&drawctxt->wq); +} + /** * adreno_drawctxt_create - create a new adreno draw context * @device - KGSL device to create the context on @@ -142,48 +383,60 @@ void build_quad_vtxbuff(struct adreno_context *drawctxt, * Create a new draw context for the 3D core. Return 0 on success, * or error code on failure. */ -int adreno_drawctxt_create(struct kgsl_device *device, - struct kgsl_pagetable *pagetable, - struct kgsl_context *context, uint32_t *flags) + struct kgsl_context * +adreno_drawctxt_create(struct kgsl_device_private *dev_priv, + uint32_t *flags) { struct adreno_context *drawctxt; + struct kgsl_device *device = dev_priv->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); int ret; drawctxt = kzalloc(sizeof(struct adreno_context), GFP_KERNEL); if (drawctxt == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); + + ret = kgsl_context_init(dev_priv, &drawctxt->base); + if (ret != 0) { + kfree(drawctxt); + return ERR_PTR(ret); + } - drawctxt->pid = task_pid_nr(current); - strlcpy(drawctxt->pid_name, current->comm, TASK_COMM_LEN); - drawctxt->pagetable = pagetable; drawctxt->bin_base_offset = 0; - drawctxt->id = context->id; drawctxt->timestamp = 0; *flags &= (KGSL_CONTEXT_PREAMBLE | KGSL_CONTEXT_NO_GMEM_ALLOC | KGSL_CONTEXT_PER_CONTEXT_TS | KGSL_CONTEXT_USER_GENERATED_TS | + KGSL_CONTEXT_NO_FAULT_TOLERANCE | KGSL_CONTEXT_TYPE_MASK); + /* Always enable per-context timestamps */ + *flags |= KGSL_CONTEXT_PER_CONTEXT_TS; + drawctxt->flags |= CTXT_FLAGS_PER_CONTEXT_TS; + if (*flags & KGSL_CONTEXT_PREAMBLE) drawctxt->flags |= CTXT_FLAGS_PREAMBLE; if (*flags & KGSL_CONTEXT_NO_GMEM_ALLOC) drawctxt->flags |= CTXT_FLAGS_NOGMEMALLOC; - if (*flags & KGSL_CONTEXT_PER_CONTEXT_TS) - drawctxt->flags |= CTXT_FLAGS_PER_CONTEXT_TS; - - if (*flags & KGSL_CONTEXT_USER_GENERATED_TS) { - if (!(*flags & KGSL_CONTEXT_PER_CONTEXT_TS)) { - ret = -EINVAL; - goto err; - } + if (*flags & KGSL_CONTEXT_USER_GENERATED_TS) drawctxt->flags |= CTXT_FLAGS_USER_GENERATED_TS; - } + + mutex_init(&drawctxt->mutex); + init_waitqueue_head(&drawctxt->wq); + init_waitqueue_head(&drawctxt->waiting); + + /* + * Set up the plist node for the dispatcher. For now all contexts have + * the same priority, but later the priority will be set at create time + * by the user + */ + + plist_node_init(&drawctxt->pending, ADRENO_CONTEXT_DEFAULT_PRIORITY); if (*flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE) drawctxt->flags |= CTXT_FLAGS_NO_FAULT_TOLERANCE; @@ -196,43 +449,52 @@ int adreno_drawctxt_create(struct kgsl_device *device, goto err; kgsl_sharedmem_writel(&device->memstore, - KGSL_MEMSTORE_OFFSET(drawctxt->id, ref_wait_ts), - KGSL_INIT_REFTIMESTAMP); - kgsl_sharedmem_writel(&device->memstore, - KGSL_MEMSTORE_OFFSET(drawctxt->id, ts_cmp_enable), 0); + KGSL_MEMSTORE_OFFSET(drawctxt->base.id, soptimestamp), + 0); kgsl_sharedmem_writel(&device->memstore, - KGSL_MEMSTORE_OFFSET(drawctxt->id, soptimestamp), 0); - kgsl_sharedmem_writel(&device->memstore, - KGSL_MEMSTORE_OFFSET(drawctxt->id, eoptimestamp), 0); + KGSL_MEMSTORE_OFFSET(drawctxt->base.id, eoptimestamp), + 0); - context->devctxt = drawctxt; - return 0; + return &drawctxt->base; err: - kfree(drawctxt); - return ret; + kgsl_context_put(&drawctxt->base); + return ERR_PTR(ret); } /** - * adreno_drawctxt_destroy - destroy a draw context - * @device - KGSL device that owns the context - * @context- Generic KGSL context container for the context + * adreno_drawctxt_sched() - Schedule a previously blocked context + * @device: pointer to a KGSL device + * @drawctxt: drawctxt to rechedule * - * Destroy an existing context. Return 0 on success or error - * code on failure. + * This function is called by the core when it knows that a previously blocked + * context has been unblocked. The default adreno response is to reschedule the + * context on the dispatcher */ +void adreno_drawctxt_sched(struct kgsl_device *device, + struct kgsl_context *context) +{ + adreno_dispatcher_queue_context(device, ADRENO_CONTEXT(context)); +} -/* destroy a drawing context */ - -void adreno_drawctxt_destroy(struct kgsl_device *device, - struct kgsl_context *context) +/** + * adreno_drawctxt_detach(): detach a context from the GPU + * @context: Generic KGSL context container for the context + * + */ +int adreno_drawctxt_detach(struct kgsl_context *context) { - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_device *device; + struct adreno_device *adreno_dev; struct adreno_context *drawctxt; + int ret; - if (context == NULL || context->devctxt == NULL) - return; + if (context == NULL) + return 0; + + device = context->device; + adreno_dev = ADRENO_DEVICE(device); + drawctxt = ADRENO_CONTEXT(context); - drawctxt = context->devctxt; /* deactivate context */ if (adreno_dev->drawctxt_active == drawctxt) { /* no need to save GMEM or shader, the context is @@ -248,18 +510,48 @@ void adreno_drawctxt_destroy(struct kgsl_device *device, adreno_drawctxt_switch(adreno_dev, NULL, 0); } - if (device->state != KGSL_STATE_HUNG) - adreno_idle(device); + mutex_lock(&drawctxt->mutex); + + while (drawctxt->cmdqueue_head != drawctxt->cmdqueue_tail) { + struct kgsl_cmdbatch *cmdbatch = + drawctxt->cmdqueue[drawctxt->cmdqueue_head]; + + drawctxt->cmdqueue_head = (drawctxt->cmdqueue_head + 1) % + ADRENO_CONTEXT_CMDQUEUE_SIZE; + + mutex_unlock(&drawctxt->mutex); + + /* + * Don't hold the drawctxt mutex while the cmdbatch is being + * destroyed because the cmdbatch destroy takes the device + * mutex and the world falls in on itself + */ + + kgsl_cmdbatch_destroy(cmdbatch); + mutex_lock(&drawctxt->mutex); + } + + mutex_unlock(&drawctxt->mutex); - if (adreno_is_a20x(adreno_dev) && adreno_dev->drawctxt_active) - kgsl_setstate(&device->mmu, adreno_dev->drawctxt_active->id, - KGSL_MMUFLAGS_PTUPDATE); + /* Wait for the last global timestamp to pass before continuing */ + ret = adreno_drawctxt_wait_global(adreno_dev, context, + drawctxt->internal_timestamp, 10 * 1000); kgsl_sharedmem_free(&drawctxt->gpustate); kgsl_sharedmem_free(&drawctxt->context_gmem_shadow.gmemshadow); + return ret; +} + + +void adreno_drawctxt_destroy(struct kgsl_context *context) +{ + struct adreno_context *drawctxt; + if (context == NULL) + return; + + drawctxt = ADRENO_CONTEXT(context); kfree(drawctxt); - context->devctxt = NULL; } /** @@ -275,10 +567,12 @@ void adreno_drawctxt_set_bin_base_offset(struct kgsl_device *device, struct kgsl_context *context, unsigned int offset) { - struct adreno_context *drawctxt = context->devctxt; + struct adreno_context *drawctxt; - if (drawctxt) - drawctxt->bin_base_offset = offset; + if (context == NULL) + return; + drawctxt = ADRENO_CONTEXT(context); + drawctxt->bin_base_offset = offset; } /** @@ -290,11 +584,12 @@ void adreno_drawctxt_set_bin_base_offset(struct kgsl_device *device, * Switch the current draw context */ -void adreno_drawctxt_switch(struct adreno_device *adreno_dev, +int adreno_drawctxt_switch(struct adreno_device *adreno_dev, struct adreno_context *drawctxt, unsigned int flags) { struct kgsl_device *device = &adreno_dev->dev; + int ret = 0; if (drawctxt) { if (flags & KGSL_CONTEXT_SAVE_GMEM) @@ -310,18 +605,44 @@ void adreno_drawctxt_switch(struct adreno_device *adreno_dev, if (adreno_dev->drawctxt_active == drawctxt) { if (adreno_dev->gpudev->ctxt_draw_workaround && adreno_is_a225(adreno_dev)) - adreno_dev->gpudev->ctxt_draw_workaround( + ret = adreno_dev->gpudev->ctxt_draw_workaround( adreno_dev, drawctxt); - return; + return ret; } - KGSL_CTXT_INFO(device, "from %p to %p flags %d\n", - adreno_dev->drawctxt_active, drawctxt, flags); + KGSL_CTXT_INFO(device, "from %d to %d flags %d\n", + adreno_dev->drawctxt_active ? + adreno_dev->drawctxt_active->base.id : 0, + drawctxt ? drawctxt->base.id : 0, flags); /* Save the old context */ - adreno_dev->gpudev->ctxt_save(adreno_dev, adreno_dev->drawctxt_active); + ret = adreno_dev->gpudev->ctxt_save(adreno_dev, + adreno_dev->drawctxt_active); + + if (ret) { + KGSL_DRV_ERR(device, + "Error in GPU context %d save: %d\n", + adreno_dev->drawctxt_active->base.id, ret); + return ret; + } + + /* Put the old instance of the active drawctxt */ + if (adreno_dev->drawctxt_active) + kgsl_context_put(&adreno_dev->drawctxt_active->base); + + /* Get a refcount to the new instance */ + if (drawctxt) + _kgsl_context_get(&drawctxt->base); /* Set the new context */ - adreno_dev->gpudev->ctxt_restore(adreno_dev, drawctxt); + ret = adreno_dev->gpudev->ctxt_restore(adreno_dev, drawctxt); + if (ret) { + KGSL_DRV_ERR(device, + "Error in GPU context %d restore: %d\n", + drawctxt->base.id, ret); + return ret; + } + adreno_dev->drawctxt_active = drawctxt; + return 0; } diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h index 8bbeaa9b431fc82da0ad20b9d34ad0725bb62b38..dddc20629b753a621f08c2f20a536f17efa9afc4 100644 --- a/drivers/gpu/msm/adreno_drawctxt.h +++ b/drivers/gpu/msm/adreno_drawctxt.h @@ -13,8 +13,6 @@ #ifndef __ADRENO_DRAWCTXT_H #define __ADRENO_DRAWCTXT_H -#include <linux/sched.h> - #include "adreno_pm4types.h" #include "a2xx_reg.h" @@ -56,6 +54,8 @@ #define CTXT_FLAGS_SKIP_EOF BIT(15) /* Context no fault tolerance */ #define CTXT_FLAGS_NO_FAULT_TOLERANCE BIT(16) +/* Force the preamble for the next submission */ +#define CTXT_FLAGS_FORCE_PREAMBLE BIT(17) /* Symbolic table for the adreno draw context type */ #define ADRENO_DRAWCTXT_TYPES \ @@ -65,6 +65,13 @@ { KGSL_CONTEXT_TYPE_C2D, "C2D" }, \ { KGSL_CONTEXT_TYPE_RS, "RS" } +#define ADRENO_CONTEXT_CMDQUEUE_SIZE 128 + +#define ADRENO_CONTEXT_DEFAULT_PRIORITY 1 + +#define ADRENO_CONTEXT_STATE_ACTIVE 0 +#define ADRENO_CONTEXT_STATE_INVALID 1 + struct kgsl_device; struct adreno_device; struct kgsl_device_private; @@ -95,21 +102,58 @@ struct gmem_shadow_t { struct kgsl_memdesc quad_vertices_restore; }; +/** + * struct adreno_context - Adreno GPU draw context + * @id: Unique integer ID of the context + * @timestamp: Last issued context-specific timestamp + * @internal_timestamp: Global timestamp of the last issued command + * @state: Current state of the context + * @flags: Bitfield controlling behavior of the context + * @type: Context type (GL, CL, RS) + * @mutex: Mutex to protect the cmdqueue + * @pagetable: Pointer to the GPU pagetable for the context + * @gpustate: Pointer to the GPU scratch memory for context save/restore + * @reg_restore: Command buffer for restoring context registers + * @shader_save: Command buffer for saving shaders + * @shader_restore: Command buffer to restore shaders + * @context_gmem_shadow: GMEM shadow structure for save/restore + * @reg_save: A2XX command buffer to save context registers + * @shader_fixup: A2XX command buffer to "fix" shaders on restore + * @chicken_restore: A2XX command buffer to "fix" register restore + * @bin_base_offset: Saved value of the A2XX BIN_BASE_OFFSET register + * @regconstant_save: A3XX command buffer to save some registers + * @constant_retore: A3XX command buffer to restore some registers + * @hslqcontrol_restore: A3XX command buffer to restore HSLSQ registers + * @save_fixup: A3XX command buffer to "fix" register save + * @restore_fixup: A3XX cmmand buffer to restore register save fixes + * @shader_load_commands: A3XX GPU memory descriptor for shader load IB + * @shader_save_commands: A3XX GPU memory descriptor for shader save IB + * @constantr_save_commands: A3XX GPU memory descriptor for constant save IB + * @constant_load_commands: A3XX GPU memory descriptor for constant load IB + * @cond_execs: A3XX GPU memory descriptor for conditional exec IB + * @hlsq_restore_commands: A3XX GPU memory descriptor for HLSQ restore IB + * @cmdqueue: Queue of command batches waiting to be dispatched for this context + * @cmdqueue_head: Head of the cmdqueue queue + * @cmdqueue_tail: Tail of the cmdqueue queue + * @pending: Priority list node for the dispatcher list of pending contexts + * @wq: Workqueue structure for contexts to sleep pending room in the queue + * @waiting: Workqueue structure for contexts waiting for a timestamp or event + * @queued: Number of commands queued in the cmdqueue + */ struct adreno_context { - pid_t pid; - char pid_name[TASK_COMM_LEN]; - unsigned int id; + struct kgsl_context base; unsigned int ib_gpu_time_used; unsigned int timestamp; + unsigned int internal_timestamp; + int state; uint32_t flags; unsigned int type; - struct kgsl_pagetable *pagetable; + struct mutex mutex; struct kgsl_memdesc gpustate; unsigned int reg_restore[3]; unsigned int shader_save[3]; unsigned int shader_restore[3]; - /* Information of the GMEM shadow that is created in context create */ struct gmem_shadow_t context_gmem_shadow; /* A2XX specific items */ @@ -130,23 +174,44 @@ struct adreno_context { struct kgsl_memdesc constant_load_commands[3]; struct kgsl_memdesc cond_execs[4]; struct kgsl_memdesc hlsqcontrol_restore_commands[1]; + + /* Dispatcher */ + struct kgsl_cmdbatch *cmdqueue[ADRENO_CONTEXT_CMDQUEUE_SIZE]; + int cmdqueue_head; + int cmdqueue_tail; + + struct plist_node pending; + wait_queue_head_t wq; + wait_queue_head_t waiting; + + int queued; }; -int adreno_drawctxt_create(struct kgsl_device *device, - struct kgsl_pagetable *pagetable, - struct kgsl_context *context, + +struct kgsl_context *adreno_drawctxt_create(struct kgsl_device_private *, uint32_t *flags); -void adreno_drawctxt_destroy(struct kgsl_device *device, - struct kgsl_context *context); +int adreno_drawctxt_detach(struct kgsl_context *context); + +void adreno_drawctxt_destroy(struct kgsl_context *context); -void adreno_drawctxt_switch(struct adreno_device *adreno_dev, +void adreno_drawctxt_sched(struct kgsl_device *device, + struct kgsl_context *context); + +int adreno_drawctxt_switch(struct adreno_device *adreno_dev, struct adreno_context *drawctxt, unsigned int flags); void adreno_drawctxt_set_bin_base_offset(struct kgsl_device *device, struct kgsl_context *context, unsigned int offset); +int adreno_drawctxt_wait(struct adreno_device *adreno_dev, + struct kgsl_context *context, + uint32_t timestamp, unsigned int timeout); + +void adreno_drawctxt_invalidate(struct kgsl_device *device, + struct kgsl_context *context); + /* GPU context switch helper functions */ void build_quad_vtxbuff(struct adreno_context *drawctxt, diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c index a6e96e19ced353e9e4901af90d195c719e5148cc..8a166fe92f70ec7e277a9e9072745285bd049494 100644 --- a/drivers/gpu/msm/adreno_postmortem.c +++ b/drivers/gpu/msm/adreno_postmortem.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved. +/* Copyright (c) 2010-2013, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -21,6 +21,7 @@ #include "adreno_ringbuffer.h" #include "kgsl_cffdump.h" #include "kgsl_pwrctrl.h" +#include "adreno_trace.h" #include "a2xx_reg.h" #include "a3xx_reg.h" @@ -724,6 +725,9 @@ int adreno_dump(struct kgsl_device *device, int manual) kgsl_regread(device, REG_CP_IB2_BASE, &cp_ib2_base); kgsl_regread(device, REG_CP_IB2_BUFSZ, &cp_ib2_bufsz); + trace_adreno_gpu_fault(rbbm_status, cp_rb_rptr, cp_rb_wptr, + cp_ib1_base, cp_ib1_bufsz, cp_ib2_base, cp_ib2_bufsz); + /* If postmortem dump is not enabled, dump minimal set and return */ if (!device->pm_dump_enable) { @@ -903,5 +907,9 @@ int adreno_dump(struct kgsl_device *device, int manual) error_vfree: vfree(rb_copy); end: + /* Restart the dispatcher after a manually triggered dump */ + if (manual) + adreno_dispatcher_start(adreno_dev); + return result; } diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c index 25878a1cbabf7cfc3013b79b19d0ad41403ead16..ef696088eac8ef6216855edfc43f5a453f5d0db1 100644 --- a/drivers/gpu/msm/adreno_ringbuffer.c +++ b/drivers/gpu/msm/adreno_ringbuffer.c @@ -18,7 +18,6 @@ #include "kgsl.h" #include "kgsl_sharedmem.h" #include "kgsl_cffdump.h" -#include "kgsl_trace.h" #include "adreno.h" #include "adreno_pm4types.h" @@ -65,9 +64,6 @@ adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb, unsigned long wait_time; unsigned long wait_timeout = msecs_to_jiffies(ADRENO_IDLE_TIMEOUT); unsigned long wait_time_part; - unsigned int prev_reg_val[ft_detect_regs_count]; - - memset(prev_reg_val, 0, sizeof(prev_reg_val)); /* if wptr ahead, fill the remaining with NOPs */ if (wptr_ahead) { @@ -105,43 +101,13 @@ adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb, if (freecmds == 0 || freecmds > numcmds) break; - /* Dont wait for timeout, detect hang faster. - */ - if (time_after(jiffies, wait_time_part)) { - wait_time_part = jiffies + - msecs_to_jiffies(KGSL_TIMEOUT_PART); - if ((adreno_ft_detect(rb->device, - prev_reg_val))){ - KGSL_DRV_ERR(rb->device, - "Hang detected while waiting for freespace in" - "ringbuffer rptr: 0x%x, wptr: 0x%x\n", - rb->rptr, rb->wptr); - goto err; - } - } - if (time_after(jiffies, wait_time)) { KGSL_DRV_ERR(rb->device, "Timed out while waiting for freespace in ringbuffer " "rptr: 0x%x, wptr: 0x%x\n", rb->rptr, rb->wptr); - goto err; + return -ETIMEDOUT; } - continue; - -err: - if (!adreno_dump_and_exec_ft(rb->device)) { - if (context && context->flags & CTXT_FLAGS_GPU_HANG) { - KGSL_CTXT_WARN(rb->device, - "Context %p caused a gpu hang. Will not accept commands for context %d\n", - context, context->id); - return -EDEADLK; - } - wait_time = jiffies + wait_timeout; - } else { - /* GPU is hung and fault tolerance failed */ - BUG(); - } } return 0; } @@ -179,7 +145,8 @@ unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, if (!ret) { ptr = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr; rb->wptr += numcmds; - } + } else + ptr = ERR_PTR(ret); return ptr; } @@ -320,10 +287,9 @@ int adreno_ringbuffer_load_pfp_ucode(struct kgsl_device *device) return 0; } -int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) +int adreno_ringbuffer_start(struct adreno_ringbuffer *rb) { int status; - /*cp_rb_cntl_u cp_rb_cntl; */ union reg_cp_rb_cntl cp_rb_cntl; unsigned int rb_cntl; struct kgsl_device *device = rb->device; @@ -332,9 +298,6 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) if (rb->flags & KGSL_FLAGS_STARTED) return 0; - if (init_ram) - rb->global_ts = 0; - kgsl_sharedmem_set(&rb->memptrs_desc, 0, 0, sizeof(struct kgsl_rbmemptrs)); @@ -444,7 +407,9 @@ int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) adreno_regwrite(device, REG_CP_ME_CNTL, 0); /* ME init is GPU specific, so jump into the sub-function */ - adreno_dev->gpudev->rb_init(adreno_dev, rb); + status = adreno_dev->gpudev->rb_init(adreno_dev, rb); + if (status) + return status; /* idle device to validate ME INIT */ status = adreno_idle(device); @@ -482,6 +447,7 @@ int adreno_ringbuffer_init(struct kgsl_device *device) */ rb->sizedwords = KGSL_RB_SIZE >> 2; + rb->buffer_desc.flags = KGSL_MEMFLAGS_GPUREADONLY; /* allocate memory for ringbuffer */ status = kgsl_allocate_contiguous(&rb->buffer_desc, (rb->sizedwords << 2)); @@ -505,6 +471,8 @@ int adreno_ringbuffer_init(struct kgsl_device *device) /* overlay structure on memptrs memory */ rb->memptrs = (struct kgsl_rbmemptrs *) rb->memptrs_desc.hostptr; + rb->global_ts = 0; + return 0; } @@ -526,9 +494,9 @@ void adreno_ringbuffer_close(struct adreno_ringbuffer *rb) static int adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, - struct adreno_context *context, + struct adreno_context *drawctxt, unsigned int flags, unsigned int *cmds, - int sizedwords) + int sizedwords, uint32_t timestamp) { struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); unsigned int *ringcmds; @@ -537,19 +505,20 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, unsigned int rcmd_gpu; unsigned int context_id; unsigned int gpuaddr = rb->device->memstore.gpuaddr; - unsigned int timestamp; - /* - * if the context was not created with per context timestamp - * support, we must use the global timestamp since issueibcmds - * will be returning that one, or if an internal issue then - * use global timestamp. - */ - if ((context && (context->flags & CTXT_FLAGS_PER_CONTEXT_TS)) && - !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) - context_id = context->id; - else + /* The global timestamp always needs to be incremented */ + rb->global_ts++; + + /* If this is a internal IB, use the global timestamp for it */ + if (!drawctxt || (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) { + timestamp = rb->global_ts; context_id = KGSL_MEMSTORE_GLOBAL; + } else { + context_id = drawctxt->base.id; + } + + if (drawctxt) + drawctxt->internal_timestamp = rb->global_ts; /* reserve space to temporarily turn off protected mode * error checking if needed @@ -560,13 +529,8 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, /* internal ib command identifier for the ringbuffer */ total_sizedwords += (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE) ? 2 : 0; - /* Add CP_COND_EXEC commands to generate CP_INTERRUPT */ - total_sizedwords += context ? 13 : 0; - - if ((context) && (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) && - (flags & (KGSL_CMD_FLAGS_INTERNAL_ISSUE | - KGSL_CMD_FLAGS_GET_INT))) - total_sizedwords += 2; + /* Add two dwords for the CP_INTERRUPT */ + total_sizedwords += drawctxt ? 2 : 0; if (adreno_is_a3xx(adreno_dev)) total_sizedwords += 7; @@ -574,16 +538,25 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, if (adreno_is_a2xx(adreno_dev)) total_sizedwords += 2; /* CP_WAIT_FOR_IDLE */ - total_sizedwords += 2; /* scratchpad ts for recovery */ total_sizedwords += 3; /* sop timestamp */ total_sizedwords += 4; /* eop timestamp */ - if (KGSL_MEMSTORE_GLOBAL != context_id) + if (drawctxt) { total_sizedwords += 3; /* global timestamp without cache * flush for non-zero context */ + } + + if (adreno_is_a20x(adreno_dev)) + total_sizedwords += 2; /* CACHE_FLUSH */ - ringcmds = adreno_ringbuffer_allocspace(rb, context, total_sizedwords); - if (!ringcmds) + if (flags & KGSL_CMD_FLAGS_WFI) + total_sizedwords += 2; /* WFI */ + + ringcmds = adreno_ringbuffer_allocspace(rb, drawctxt, total_sizedwords); + + if (IS_ERR(ringcmds)) + return PTR_ERR(ringcmds); + if (ringcmds == NULL) return -ENOSPC; rcmd_gpu = rb->buffer_desc.gpuaddr @@ -597,18 +570,6 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_INTERNAL_IDENTIFIER); } - /* always increment the global timestamp. once. */ - rb->global_ts++; - - if (KGSL_MEMSTORE_GLOBAL != context_id) - timestamp = context->timestamp; - else - timestamp = rb->global_ts; - - /* scratchpad ts for recovery */ - GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1)); - GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->global_ts); - /* start-of-pipeline timestamp */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_MEM_WRITE, 2)); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + @@ -669,63 +630,21 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp))); GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp); - if (KGSL_MEMSTORE_GLOBAL != context_id) { + if (drawctxt) { GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_MEM_WRITE, 2)); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + - KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, - eoptimestamp))); + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + eoptimestamp))); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->global_ts); } - if (context) { - /* Conditional execution based on memory values */ - GSL_RB_WRITE(ringcmds, rcmd_gpu, - cp_type3_packet(CP_COND_EXEC, 4)); - GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + - KGSL_MEMSTORE_OFFSET( - context_id, ts_cmp_enable)) >> 2); - GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + - KGSL_MEMSTORE_OFFSET( - context_id, ref_wait_ts)) >> 2); - GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp); - /* # of conditional command DWORDs */ - GSL_RB_WRITE(ringcmds, rcmd_gpu, 8); - - /* Clear the ts_cmp_enable for the context */ - GSL_RB_WRITE(ringcmds, rcmd_gpu, - cp_type3_packet(CP_MEM_WRITE, 2)); - GSL_RB_WRITE(ringcmds, rcmd_gpu, gpuaddr + - KGSL_MEMSTORE_OFFSET( - context_id, ts_cmp_enable)); - GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x0); - - /* Clear the ts_cmp_enable for the global timestamp */ - GSL_RB_WRITE(ringcmds, rcmd_gpu, - cp_type3_packet(CP_MEM_WRITE, 2)); - GSL_RB_WRITE(ringcmds, rcmd_gpu, gpuaddr + - KGSL_MEMSTORE_OFFSET( - KGSL_MEMSTORE_GLOBAL, ts_cmp_enable)); - GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x0); - /* Trigger the interrupt */ + if (drawctxt || (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) { GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_INTERRUPT, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK); } - /* - * If per context timestamps are enabled and any of the kgsl - * internal commands want INT to be generated trigger the INT - */ - if ((context) && (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) && - (flags & (KGSL_CMD_FLAGS_INTERNAL_ISSUE | - KGSL_CMD_FLAGS_GET_INT))) { - GSL_RB_WRITE(ringcmds, rcmd_gpu, - cp_type3_packet(CP_INTERRUPT, 1)); - GSL_RB_WRITE(ringcmds, rcmd_gpu, - CP_INT_CNTL__RB_INT_MASK); - } - if (adreno_is_a3xx(adreno_dev)) { /* Dummy set-constant to trigger context rollover */ GSL_RB_WRITE(ringcmds, rcmd_gpu, @@ -735,9 +654,10 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, GSL_RB_WRITE(ringcmds, rcmd_gpu, 0); } - if (flags & KGSL_CMD_FLAGS_EOF) { - GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1)); - GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_END_OF_FRAME_IDENTIFIER); + if (flags & KGSL_CMD_FLAGS_WFI) { + GSL_RB_WRITE(ringcmds, rcmd_gpu, + cp_type3_packet(CP_WAIT_FOR_IDLE, 1)); + GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00000000); } adreno_ringbuffer_submit(rb); @@ -755,14 +675,10 @@ adreno_ringbuffer_issuecmds(struct kgsl_device *device, struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer; - if (device->state & KGSL_STATE_HUNG) - return kgsl_readtimestamp(device, KGSL_MEMSTORE_GLOBAL, - KGSL_TIMESTAMP_RETIRED); - flags |= KGSL_CMD_FLAGS_INTERNAL_ISSUE; return adreno_ringbuffer_addcmds(rb, drawctxt, flags, cmds, - sizedwords); + sizedwords, 0); } static bool _parse_ibs(struct kgsl_device_private *dev_priv, uint gpuaddr, @@ -957,50 +873,108 @@ done: return ret; } +/** + * _ringbuffer_verify_ib() - parse an IB and verify that it is correct + * @dev_priv: Pointer to the process struct + * @ibdesc: Pointer to the IB descriptor + * + * This function only gets called if debugging is enabled - it walks the IB and + * does additional level parsing and verification above and beyond what KGSL + * core does + */ +static inline bool _ringbuffer_verify_ib(struct kgsl_device_private *dev_priv, + struct kgsl_ibdesc *ibdesc) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + /* Check that the size of the IBs is under the allowable limit */ + if (ibdesc->sizedwords == 0 || ibdesc->sizedwords > 0xFFFFF) { + KGSL_DRV_ERR(device, "Invalid IB size 0x%X\n", + ibdesc->sizedwords); + return false; + } + + if (unlikely(adreno_dev->ib_check_level >= 1) && + !_parse_ibs(dev_priv, ibdesc->gpuaddr, ibdesc->sizedwords)) { + KGSL_DRV_ERR(device, "Could not verify the IBs\n"); + return false; + } + + return true; +} + int adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, struct kgsl_context *context, - struct kgsl_ibdesc *ibdesc, - unsigned int numibs, - uint32_t *timestamp, - unsigned int flags) + struct kgsl_cmdbatch *cmdbatch, + uint32_t *timestamp) { struct kgsl_device *device = dev_priv->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - unsigned int *link = 0; + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + int i, ret; + + if (drawctxt->state == ADRENO_CONTEXT_STATE_INVALID) + return -EDEADLK; + + /* Verify the IBs before they get queued */ + + for (i = 0; i < cmdbatch->ibcount; i++) { + if (!_ringbuffer_verify_ib(dev_priv, &cmdbatch->ibdesc[i])) + return -EINVAL; + } + + /* Queue the command in the ringbuffer */ + ret = adreno_context_queue_cmd(adreno_dev, drawctxt, cmdbatch, + timestamp); + + if (ret) + KGSL_DRV_ERR(device, "adreno_context_queue_cmd returned %d\n", + ret); + + return ret; +} + +/* adreno_rindbuffer_submitcmd - submit userspace IBs to the GPU */ +int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_cmdbatch *cmdbatch) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct kgsl_ibdesc *ibdesc; + unsigned int numibs; + unsigned int *link; unsigned int *cmds; unsigned int i; - struct adreno_context *drawctxt = NULL; + struct kgsl_context *context; + struct adreno_context *drawctxt; unsigned int start_index = 0; - int ret = 0; + int ret; - if (device->state & KGSL_STATE_HUNG) { - ret = -EBUSY; - goto done; - } + context = cmdbatch->context; + drawctxt = ADRENO_CONTEXT(context); - if (!(adreno_dev->ringbuffer.flags & KGSL_FLAGS_STARTED) || - context == NULL || ibdesc == 0 || numibs == 0) { - ret = -EINVAL; - goto done; - } - drawctxt = context->devctxt; + ibdesc = cmdbatch->ibdesc; + numibs = cmdbatch->ibcount; - if (drawctxt->flags & CTXT_FLAGS_GPU_HANG) { - KGSL_CTXT_ERR(device, "proc %s failed fault tolerance" - " will not accept commands for context %d\n", - drawctxt->pid_name, drawctxt->id); - ret = -EDEADLK; - goto done; - } + /*When preamble is enabled, the preamble buffer with state restoration + commands are stored in the first node of the IB chain. We can skip that + if a context switch hasn't occured */ + + if ((drawctxt->flags & CTXT_FLAGS_PREAMBLE) && + !(cmdbatch->priv & CMDBATCH_FLAG_FORCE_PREAMBLE) && + (adreno_dev->drawctxt_active == drawctxt)) + start_index = 1; + + /* + * In skip mode don't issue the draw IBs but keep all the other + * accoutrements of a submision (including the interrupt) to keep + * the accounting sane. Set start_index and numibs to 0 to just + * generate the start and end markers and skip everything else + */ - if (drawctxt->flags & CTXT_FLAGS_SKIP_EOF) { - KGSL_CTXT_ERR(device, - "proc %s triggered fault tolerance" - " skipping commands for context till EOF %d\n", - drawctxt->pid_name, drawctxt->id); - if (flags & KGSL_CMD_FLAGS_EOF) - drawctxt->flags &= ~CTXT_FLAGS_SKIP_EOF; + if (cmdbatch->priv & CMDBATCH_FLAG_SKIP) { + start_index = 0; numibs = 0; } @@ -1011,14 +985,6 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, goto done; } - /*When preamble is enabled, the preamble buffer with state restoration - commands are stored in the first node of the IB chain. We can skip that - if a context switch hasn't occured */ - - if (drawctxt->flags & CTXT_FLAGS_PREAMBLE && - adreno_dev->drawctxt_active == drawctxt) - start_index = 1; - if (!start_index) { *cmds++ = cp_nop_packet(1); *cmds++ = KGSL_START_OF_IB_IDENTIFIER; @@ -1030,19 +996,17 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, *cmds++ = ibdesc[0].sizedwords; } for (i = start_index; i < numibs; i++) { - if (unlikely(adreno_dev->ib_check_level >= 1 && - !_parse_ibs(dev_priv, ibdesc[i].gpuaddr, - ibdesc[i].sizedwords))) { - ret = -EINVAL; - goto done; - } - if (ibdesc[i].sizedwords == 0) { - ret = -EINVAL; - goto done; - } + /* + * Skip 0 sized IBs - these are presumed to have been removed + * from consideration by the FT policy + */ + + if (ibdesc[i].sizedwords == 0) + *cmds++ = cp_nop_packet(2); + else + *cmds++ = CP_HDR_INDIRECT_BUFFER_PFD; - *cmds++ = CP_HDR_INDIRECT_BUFFER_PFD; *cmds++ = ibdesc[i].gpuaddr; *cmds++ = ibdesc[i].sizedwords; } @@ -1050,36 +1014,27 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, *cmds++ = cp_nop_packet(1); *cmds++ = KGSL_END_OF_IB_IDENTIFIER; - kgsl_setstate(&device->mmu, context->id, + ret = kgsl_setstate(&device->mmu, context->id, kgsl_mmu_pt_get_flags(device->mmu.hwpagetable, device->id)); - adreno_drawctxt_switch(adreno_dev, drawctxt, flags); + if (ret) + goto done; - if (drawctxt->flags & CTXT_FLAGS_USER_GENERATED_TS) { - if (timestamp_cmp(drawctxt->timestamp, *timestamp) >= 0) { - KGSL_DRV_ERR(device, - "Invalid user generated ts <%d:0x%x>, " - "less than last issued ts <%d:0x%x>\n", - drawctxt->id, *timestamp, drawctxt->id, - drawctxt->timestamp); - return -ERANGE; - } - drawctxt->timestamp = *timestamp; - } else - drawctxt->timestamp++; + ret = adreno_drawctxt_switch(adreno_dev, drawctxt, cmdbatch->flags); - ret = adreno_ringbuffer_addcmds(&adreno_dev->ringbuffer, - drawctxt, - (flags & KGSL_CMD_FLAGS_EOF), - &link[0], (cmds - link)); + /* + * In the unlikely event of an error in the drawctxt switch, + * treat it like a hang + */ if (ret) goto done; - if (drawctxt->flags & CTXT_FLAGS_PER_CONTEXT_TS) - *timestamp = drawctxt->timestamp; - else - *timestamp = adreno_dev->ringbuffer.global_ts; + ret = adreno_ringbuffer_addcmds(&adreno_dev->ringbuffer, + drawctxt, + cmdbatch->flags, + &link[0], (cmds - link), + cmdbatch->timestamp); #ifdef CONFIG_MSM_KGSL_CFF_DUMP /* @@ -1090,209 +1045,11 @@ adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, adreno_idle(device); #endif - /* - * If context hung and recovered then return error so that the - * application may handle it - */ - if (drawctxt->flags & CTXT_FLAGS_GPU_HANG_FT) { - drawctxt->flags &= ~CTXT_FLAGS_GPU_HANG_FT; - ret = -EPROTO; - } - done: - trace_kgsl_issueibcmds(device, context->id, ibdesc, numibs, - *timestamp, flags, ret, drawctxt->type); + kgsl_trace_issueibcmds(device, context->id, cmdbatch, + cmdbatch->timestamp, cmdbatch->flags, ret, + drawctxt->type); kfree(link); return ret; } - -static void _turn_preamble_on_for_ib_seq(struct adreno_ringbuffer *rb, - unsigned int rb_rptr) -{ - unsigned int temp_rb_rptr = rb_rptr; - unsigned int size = rb->buffer_desc.size; - unsigned int val[2]; - int i = 0; - bool check = false; - bool cmd_start = false; - - /* Go till the start of the ib sequence and turn on preamble */ - while (temp_rb_rptr / sizeof(unsigned int) != rb->wptr) { - kgsl_sharedmem_readl(&rb->buffer_desc, &val[i], temp_rb_rptr); - if (check && KGSL_START_OF_IB_IDENTIFIER == val[i]) { - /* decrement i */ - i = (i + 1) % 2; - if (val[i] == cp_nop_packet(4)) { - temp_rb_rptr = adreno_ringbuffer_dec_wrapped( - temp_rb_rptr, size); - kgsl_sharedmem_writel(&rb->buffer_desc, - temp_rb_rptr, cp_nop_packet(1)); - } - KGSL_FT_INFO(rb->device, - "Turned preamble on at offset 0x%x\n", - temp_rb_rptr / 4); - break; - } - /* If you reach beginning of next command sequence then exit - * First command encountered is the current one so don't break - * on that. */ - if (KGSL_CMD_IDENTIFIER == val[i]) { - if (cmd_start) - break; - cmd_start = true; - } - - i = (i + 1) % 2; - if (1 == i) - check = true; - temp_rb_rptr = adreno_ringbuffer_inc_wrapped(temp_rb_rptr, - size); - } -} - -void adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, - struct adreno_ft_data *ft_data) -{ - struct kgsl_device *device = rb->device; - unsigned int rb_rptr = ft_data->start_of_replay_cmds; - unsigned int good_rb_idx = 0, bad_rb_idx = 0, temp_rb_idx = 0; - unsigned int last_good_cmd_end_idx = 0, last_bad_cmd_end_idx = 0; - unsigned int cmd_start_idx = 0; - unsigned int val1 = 0; - int copy_rb_contents = 0; - unsigned int temp_rb_rptr; - struct kgsl_context *k_ctxt; - struct adreno_context *a_ctxt; - unsigned int size = rb->buffer_desc.size; - unsigned int *temp_rb_buffer = ft_data->rb_buffer; - int *rb_size = &ft_data->rb_size; - unsigned int *bad_rb_buffer = ft_data->bad_rb_buffer; - int *bad_rb_size = &ft_data->bad_rb_size; - unsigned int *good_rb_buffer = ft_data->good_rb_buffer; - int *good_rb_size = &ft_data->good_rb_size; - - /* - * If the start index from where commands need to be copied is invalid - * then no need to save off any commands - */ - if (0xFFFFFFFF == ft_data->start_of_replay_cmds) - return; - - k_ctxt = kgsl_context_get(device, ft_data->context_id); - - if (k_ctxt) { - a_ctxt = k_ctxt->devctxt; - if (a_ctxt->flags & CTXT_FLAGS_PREAMBLE) - _turn_preamble_on_for_ib_seq(rb, rb_rptr); - kgsl_context_put(k_ctxt); - } - k_ctxt = NULL; - - /* Walk the rb from the context switch. Omit any commands - * for an invalid context. */ - while ((rb_rptr / sizeof(unsigned int)) != rb->wptr) { - kgsl_sharedmem_readl(&rb->buffer_desc, &val1, rb_rptr); - - if (KGSL_CMD_IDENTIFIER == val1) { - /* Start is the NOP dword that comes before - * KGSL_CMD_IDENTIFIER */ - cmd_start_idx = temp_rb_idx - 1; - if ((copy_rb_contents) && (good_rb_idx)) - last_good_cmd_end_idx = good_rb_idx - 1; - if ((!copy_rb_contents) && (bad_rb_idx)) - last_bad_cmd_end_idx = bad_rb_idx - 1; - } - - /* check for context switch indicator */ - if (val1 == KGSL_CONTEXT_TO_MEM_IDENTIFIER) { - unsigned int temp_idx, val2; - /* increment by 3 to get to the context_id */ - temp_rb_rptr = rb_rptr + (3 * sizeof(unsigned int)) % - size; - kgsl_sharedmem_readl(&rb->buffer_desc, &val2, - temp_rb_rptr); - - /* if context switches to a context that did not cause - * hang then start saving the rb contents as those - * commands can be executed */ - k_ctxt = kgsl_context_get(rb->device, val2); - - if (k_ctxt) { - a_ctxt = k_ctxt->devctxt; - - /* If we are changing to a good context and were not - * copying commands then copy over commands to the good - * context */ - if (!copy_rb_contents && ((k_ctxt && - !(a_ctxt->flags & CTXT_FLAGS_GPU_HANG)) || - !k_ctxt)) { - for (temp_idx = cmd_start_idx; - temp_idx < temp_rb_idx; - temp_idx++) - good_rb_buffer[good_rb_idx++] = - temp_rb_buffer[temp_idx]; - ft_data->last_valid_ctx_id = val2; - copy_rb_contents = 1; - /* remove the good commands from bad buffer */ - bad_rb_idx = last_bad_cmd_end_idx; - } else if (copy_rb_contents && k_ctxt && - (a_ctxt->flags & CTXT_FLAGS_GPU_HANG)) { - - /* If we are changing back to a bad context - * from good ctxt and were not copying commands - * to bad ctxt then copy over commands to - * the bad context */ - for (temp_idx = cmd_start_idx; - temp_idx < temp_rb_idx; - temp_idx++) - bad_rb_buffer[bad_rb_idx++] = - temp_rb_buffer[temp_idx]; - /* If we are changing to bad context then - * remove the dwords we copied for this - * sequence from the good buffer */ - good_rb_idx = last_good_cmd_end_idx; - copy_rb_contents = 0; - } - } - kgsl_context_put(k_ctxt); - } - - if (copy_rb_contents) - good_rb_buffer[good_rb_idx++] = val1; - else - bad_rb_buffer[bad_rb_idx++] = val1; - - /* Copy both good and bad commands to temp buffer */ - temp_rb_buffer[temp_rb_idx++] = val1; - - rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr, size); - } - *good_rb_size = good_rb_idx; - *bad_rb_size = bad_rb_idx; - *rb_size = temp_rb_idx; -} - -void -adreno_ringbuffer_restore(struct adreno_ringbuffer *rb, unsigned int *rb_buff, - int num_rb_contents) -{ - int i; - unsigned int *ringcmds; - unsigned int rcmd_gpu; - - if (!num_rb_contents) - return; - - if (num_rb_contents > (rb->buffer_desc.size - rb->wptr)) { - adreno_regwrite(rb->device, REG_CP_RB_RPTR, 0); - rb->rptr = 0; - BUG_ON(num_rb_contents > rb->buffer_desc.size); - } - ringcmds = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr; - rcmd_gpu = rb->buffer_desc.gpuaddr + sizeof(unsigned int) * rb->wptr; - for (i = 0; i < num_rb_contents; i++) - GSL_RB_WRITE(ringcmds, rcmd_gpu, rb_buff[i]); - rb->wptr += num_rb_contents; - adreno_ringbuffer_submit(rb); -} diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h index 3157f41432551f10af65398f2dcf9f333712877f..d7a774093354b0848dac638102dee0202515545a 100644 --- a/drivers/gpu/msm/adreno_ringbuffer.h +++ b/drivers/gpu/msm/adreno_ringbuffer.h @@ -27,7 +27,6 @@ struct kgsl_device; struct kgsl_device_private; -struct adreno_ft_data; #define GSL_RB_MEMPTRS_SCRATCH_COUNT 8 struct kgsl_rbmemptrs { @@ -90,15 +89,15 @@ struct adreno_ringbuffer { int adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, struct kgsl_context *context, - struct kgsl_ibdesc *ibdesc, - unsigned int numibs, - uint32_t *timestamp, - unsigned int flags); + struct kgsl_cmdbatch *cmdbatch, + uint32_t *timestamp); + +int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_cmdbatch *cmdbatch); int adreno_ringbuffer_init(struct kgsl_device *device); -int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, - unsigned int init_ram); +int adreno_ringbuffer_start(struct adreno_ringbuffer *rb); void adreno_ringbuffer_stop(struct adreno_ringbuffer *rb); @@ -114,13 +113,6 @@ void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb); void kgsl_cp_intrcallback(struct kgsl_device *device); -void adreno_ringbuffer_extract(struct adreno_ringbuffer *rb, - struct adreno_ft_data *ft_data); - -void -adreno_ringbuffer_restore(struct adreno_ringbuffer *rb, unsigned int *rb_buff, - int num_rb_contents); - unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, struct adreno_context *context, unsigned int numcmds); diff --git a/drivers/gpu/msm/adreno_snapshot.c b/drivers/gpu/msm/adreno_snapshot.c index 893cfa61bdced43795e86363c6f8f4ee4329521b..3bcbd580f36df670c7fab133cd5276165793bbf6 100644 --- a/drivers/gpu/msm/adreno_snapshot.c +++ b/drivers/gpu/msm/adreno_snapshot.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2012, The Linux Foundation. All rights reserved. +/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -161,6 +161,12 @@ static unsigned int vfd_control_0; static unsigned int sp_vs_pvt_mem_addr; static unsigned int sp_fs_pvt_mem_addr; +/* + * Cached value of SP_VS_OBJ_START_REG and SP_FS_OBJ_START_REG. + */ +static unsigned int sp_vs_obj_start_reg; +static unsigned int sp_fs_obj_start_reg; + /* * Each load state block has two possible types. Each type has a different * number of dwords per unit. Use this handy lookup table to make sure @@ -373,6 +379,26 @@ static int ib_parse_draw_indx(struct kgsl_device *device, unsigned int *pkt, sp_fs_pvt_mem_addr = 0; } + if (sp_vs_obj_start_reg) { + ret = kgsl_snapshot_get_object(device, ptbase, + sp_vs_obj_start_reg & 0xFFFFFFE0, 0, + SNAPSHOT_GPU_OBJECT_GENERIC); + if (ret < 0) + return -EINVAL; + snapshot_frozen_objsize += ret; + sp_vs_obj_start_reg = 0; + } + + if (sp_fs_obj_start_reg) { + ret = kgsl_snapshot_get_object(device, ptbase, + sp_fs_obj_start_reg & 0xFFFFFFE0, 0, + SNAPSHOT_GPU_OBJECT_GENERIC); + if (ret < 0) + return -EINVAL; + snapshot_frozen_objsize += ret; + sp_fs_obj_start_reg = 0; + } + /* Finally: VBOs */ /* The number of active VBOs is stored in VFD_CONTROL_O[31:27] */ @@ -444,7 +470,7 @@ static void ib_parse_type0(struct kgsl_device *device, unsigned int *ptr, int offset = type0_pkt_offset(*ptr); int i; - for (i = 0; i < size; i++, offset++) { + for (i = 0; i < size - 1; i++, offset++) { /* Visiblity stream buffer */ @@ -505,11 +531,20 @@ static void ib_parse_type0(struct kgsl_device *device, unsigned int *ptr, case A3XX_SP_FS_PVT_MEM_ADDR_REG: sp_fs_pvt_mem_addr = ptr[i + 1]; break; + case A3XX_SP_VS_OBJ_START_REG: + sp_vs_obj_start_reg = ptr[i + 1]; + break; + case A3XX_SP_FS_OBJ_START_REG: + sp_fs_obj_start_reg = ptr[i + 1]; + break; } } } } +static inline int parse_ib(struct kgsl_device *device, unsigned int ptbase, + unsigned int gpuaddr, unsigned int dwords); + /* Add an IB as a GPU object, but first, parse it to find more goodies within */ static int ib_add_gpu_object(struct kgsl_device *device, unsigned int ptbase, @@ -549,32 +584,12 @@ static int ib_add_gpu_object(struct kgsl_device *device, unsigned int ptbase, if (adreno_cmd_is_ib(src[i])) { unsigned int gpuaddr = src[i + 1]; unsigned int size = src[i + 2]; - unsigned int ibbase; - - /* Address of the last processed IB2 */ - kgsl_regread(device, REG_CP_IB2_BASE, &ibbase); - /* - * If this is the last IB2 that was executed, - * then push it to make sure it goes into the - * static space - */ + ret = parse_ib(device, ptbase, gpuaddr, size); - if (ibbase == gpuaddr) - push_object(device, - SNAPSHOT_OBJ_TYPE_IB, ptbase, - gpuaddr, size); - else { - ret = ib_add_gpu_object(device, - ptbase, gpuaddr, size); - - /* - * If adding the IB failed then stop - * parsing - */ - if (ret < 0) - goto done; - } + /* If adding the IB failed then stop parsing */ + if (ret < 0) + goto done; } else { ret = ib_parse_type3(device, &src[i], ptbase); /* @@ -604,6 +619,36 @@ done: return ret; } +/* + * We want to store the last executed IB1 and IB2 in the static region to ensure + * that we get at least some information out of the snapshot even if we can't + * access the dynamic data from the sysfs file. Push all other IBs on the + * dynamic list + */ +static inline int parse_ib(struct kgsl_device *device, unsigned int ptbase, + unsigned int gpuaddr, unsigned int dwords) +{ + unsigned int ib1base, ib2base; + int ret = 0; + + /* + * Check the IB address - if it is either the last executed IB1 or the + * last executed IB2 then push it into the static blob otherwise put + * it in the dynamic list + */ + + kgsl_regread(device, REG_CP_IB1_BASE, &ib1base); + kgsl_regread(device, REG_CP_IB2_BASE, &ib2base); + + if (gpuaddr == ib1base || gpuaddr == ib2base) + push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase, + gpuaddr, dwords); + else + ret = ib_add_gpu_object(device, ptbase, gpuaddr, dwords); + + return ret; +} + /* Snapshot the ringbuffer memory */ static int snapshot_rb(struct kgsl_device *device, void *snapshot, int remain, void *priv) @@ -740,13 +785,13 @@ static int snapshot_rb(struct kgsl_device *device, void *snapshot, struct kgsl_memdesc *memdesc = adreno_find_ctxtmem(device, ptbase, ibaddr, - ibsize); + ibsize << 2); /* IOMMU uses a NOP IB placed in setsate memory */ if (NULL == memdesc) if (kgsl_gpuaddr_in_memdesc( &device->mmu.setstate_memory, - ibaddr, ibsize)) + ibaddr, ibsize << 2)) memdesc = &device->mmu.setstate_memory; /* * The IB from CP_IB1_BASE and the IBs for legacy @@ -754,12 +799,11 @@ static int snapshot_rb(struct kgsl_device *device, void *snapshot, * others get marked at GPU objects */ - if (ibaddr == ibbase || memdesc != NULL) + if (memdesc != NULL) push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase, ibaddr, ibsize); else - ib_add_gpu_object(device, ptbase, ibaddr, - ibsize); + parse_ib(device, ptbase, ibaddr, ibsize); } index = index + 1; @@ -804,15 +848,14 @@ static int snapshot_ib(struct kgsl_device *device, void *snapshot, continue; if (adreno_cmd_is_ib(*src)) - push_object(device, SNAPSHOT_OBJ_TYPE_IB, - obj->ptbase, src[1], src[2]); - else { + ret = parse_ib(device, obj->ptbase, src[1], + src[2]); + else ret = ib_parse_type3(device, src, obj->ptbase); - /* Stop parsing if the type3 decode fails */ - if (ret < 0) - break; - } + /* Stop parsing if the type3 decode fails */ + if (ret < 0) + break; } } diff --git a/drivers/gpu/msm/adreno_trace.c b/drivers/gpu/msm/adreno_trace.c new file mode 100644 index 0000000000000000000000000000000000000000..607ba8c4afa54ef834cf8aaff04f4884269e4d63 --- /dev/null +++ b/drivers/gpu/msm/adreno_trace.c @@ -0,0 +1,18 @@ +/* Copyright (c) 2013, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "adreno.h" + +/* Instantiate tracepoints */ +#define CREATE_TRACE_POINTS +#include "adreno_trace.h" diff --git a/drivers/gpu/msm/adreno_trace.h b/drivers/gpu/msm/adreno_trace.h new file mode 100644 index 0000000000000000000000000000000000000000..8993afb3764e6f46445dcafb88f11dfc9b87eb03 --- /dev/null +++ b/drivers/gpu/msm/adreno_trace.h @@ -0,0 +1,169 @@ +/* Copyright (c) 2013, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#if !defined(_ADRENO_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _ADRENO_TRACE_H + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kgsl +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE adreno_trace + +#include <linux/tracepoint.h> + +TRACE_EVENT(adreno_cmdbatch_queued, + TP_PROTO(struct kgsl_cmdbatch *cmdbatch, unsigned int queued), + TP_ARGS(cmdbatch, queued), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(unsigned int, queued) + ), + TP_fast_assign( + __entry->id = cmdbatch->context->id; + __entry->timestamp = cmdbatch->timestamp; + __entry->queued = queued; + ), + TP_printk( + "ctx=%u ts=%u queued=%u", + __entry->id, __entry->timestamp, __entry->queued + ) +); + +DECLARE_EVENT_CLASS(adreno_cmdbatch_template, + TP_PROTO(struct kgsl_cmdbatch *cmdbatch, int inflight), + TP_ARGS(cmdbatch, inflight), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(unsigned int, inflight) + ), + TP_fast_assign( + __entry->id = cmdbatch->context->id; + __entry->timestamp = cmdbatch->timestamp; + __entry->inflight = inflight; + ), + TP_printk( + "ctx=%u ts=%u inflight=%u", + __entry->id, __entry->timestamp, + __entry->inflight + ) +); + +DEFINE_EVENT(adreno_cmdbatch_template, adreno_cmdbatch_retired, + TP_PROTO(struct kgsl_cmdbatch *cmdbatch, int inflight), + TP_ARGS(cmdbatch, inflight) +); + +DEFINE_EVENT(adreno_cmdbatch_template, adreno_cmdbatch_submitted, + TP_PROTO(struct kgsl_cmdbatch *cmdbatch, int inflight), + TP_ARGS(cmdbatch, inflight) +); + +DECLARE_EVENT_CLASS(adreno_drawctxt_template, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt), + TP_STRUCT__entry( + __field(unsigned int, id) + ), + TP_fast_assign( + __entry->id = drawctxt->base.id; + ), + TP_printk("ctx=%u", __entry->id) +); + +DEFINE_EVENT(adreno_drawctxt_template, adreno_context_sleep, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt) +); + +DEFINE_EVENT(adreno_drawctxt_template, adreno_context_wake, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt) +); + +DEFINE_EVENT(adreno_drawctxt_template, dispatch_queue_context, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt) +); + +TRACE_EVENT(adreno_drawctxt_wait_start, + TP_PROTO(unsigned int id, unsigned int ts), + TP_ARGS(id, ts), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, ts) + ), + TP_fast_assign( + __entry->id = id; + __entry->ts = ts; + ), + TP_printk( + "ctx=%u ts=%u", + __entry->id, __entry->ts + ) +); + +TRACE_EVENT(adreno_drawctxt_wait_done, + TP_PROTO(unsigned int id, unsigned int ts, int status), + TP_ARGS(id, ts, status), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, ts) + __field(int, status) + ), + TP_fast_assign( + __entry->id = id; + __entry->ts = ts; + __entry->status = status; + ), + TP_printk( + "ctx=%u ts=%u status=%d", + __entry->id, __entry->ts, __entry->status + ) +); + +TRACE_EVENT(adreno_gpu_fault, + TP_PROTO(unsigned int status, unsigned int rptr, unsigned int wptr, + unsigned int ib1base, unsigned int ib1size, + unsigned int ib2base, unsigned int ib2size), + TP_ARGS(status, rptr, wptr, ib1base, ib1size, ib2base, ib2size), + TP_STRUCT__entry( + __field(unsigned int, status) + __field(unsigned int, rptr) + __field(unsigned int, wptr) + __field(unsigned int, ib1base) + __field(unsigned int, ib1size) + __field(unsigned int, ib2base) + __field(unsigned int, ib2size) + ), + TP_fast_assign( + __entry->status = status; + __entry->rptr = rptr; + __entry->wptr = wptr; + __entry->ib1base = ib1base; + __entry->ib1size = ib1size; + __entry->ib2base = ib2base; + __entry->ib2size = ib2size; + ), + TP_printk("status=%X RB=%X/%X IB1=%X/%X IB2=%X/%X", + __entry->status, __entry->wptr, __entry->rptr, + __entry->ib1base, __entry->ib1size, __entry->ib2base, + __entry->ib2size) +); + +#endif /* _ADRENO_TRACE_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index d87e8acf092e814eacfa86640166a657f5d13614..bf7ec97a8b7c7e2965fedc8fb00fb769cc3fbee1 100644 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-2012, The Linux Foundation. All rights reserved. +/* Copyright (c) 2008-2013, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -14,6 +14,7 @@ #include <linux/fb.h> #include <linux/file.h> #include <linux/fs.h> +#include <linux/list.h> #include <linux/debugfs.h> #include <linux/uaccess.h> #include <linux/interrupt.h> @@ -29,6 +30,8 @@ #include <linux/io.h> #include <mach/socinfo.h> #include <linux/mman.h> +#include <linux/sort.h> +#include <asm/cacheflush.h> #include "kgsl.h" #include "kgsl_debugfs.h" @@ -53,6 +56,45 @@ MODULE_PARM_DESC(ksgl_mmu_type, static struct ion_client *kgsl_ion_client; +/** + * kgsl_trace_issueibcmds() - Call trace_issueibcmds by proxy + * device: KGSL device + * id: ID of the context submitting the command + * cmdbatch: Pointer to kgsl_cmdbatch describing these commands + * timestamp: Timestamp assigned to the command batch + * flags: Flags sent by the user + * result: Result of the submission attempt + * type: Type of context issuing the command + * + * Wrap the issueibcmds ftrace hook into a function that can be called from the + * GPU specific modules. + */ +void kgsl_trace_issueibcmds(struct kgsl_device *device, int id, + struct kgsl_cmdbatch *cmdbatch, + unsigned int timestamp, unsigned int flags, + int result, unsigned int type) +{ + trace_kgsl_issueibcmds(device, id, cmdbatch, + timestamp, flags, result, type); +} +EXPORT_SYMBOL(kgsl_trace_issueibcmds); + +/** + * kgsl_trace_regwrite - call regwrite ftrace function by proxy + * device: KGSL device + * offset: dword offset of the register being written + * value: Value of the register being written + * + * Wrap the regwrite ftrace hook into a function that can be called from the + * GPU specific modules. + */ +void kgsl_trace_regwrite(struct kgsl_device *device, unsigned int offset, + unsigned int value) +{ + trace_kgsl_regwrite(device, offset, value); +} +EXPORT_SYMBOL(kgsl_trace_regwrite); + int kgsl_memfree_hist_init(void) { void *base; @@ -104,10 +146,13 @@ void kgsl_memfree_hist_set_event(unsigned int pid, unsigned int gpuaddr, * @ptbase - the pagetable base of the object * @gpuaddr - the GPU address of the object * @size - Size of the region to search + * + * Caller must kgsl_mem_entry_put() the returned entry when finished using it. */ -struct kgsl_mem_entry *kgsl_get_mem_entry(struct kgsl_device *device, - unsigned int ptbase, unsigned int gpuaddr, unsigned int size) +struct kgsl_mem_entry * __must_check +kgsl_get_mem_entry(struct kgsl_device *device, unsigned int ptbase, + unsigned int gpuaddr, unsigned int size) { struct kgsl_process_private *priv; struct kgsl_mem_entry *entry; @@ -117,15 +162,12 @@ struct kgsl_mem_entry *kgsl_get_mem_entry(struct kgsl_device *device, list_for_each_entry(priv, &kgsl_driver.process_list, list) { if (!kgsl_mmu_pt_equal(&device->mmu, priv->pagetable, ptbase)) continue; - spin_lock(&priv->mem_lock); entry = kgsl_sharedmem_find_region(priv, gpuaddr, size); if (entry) { - spin_unlock(&priv->mem_lock); mutex_unlock(&kgsl_driver.process_mutex); return entry; } - spin_unlock(&priv->mem_lock); } mutex_unlock(&kgsl_driver.process_mutex); @@ -268,16 +310,28 @@ err: static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry) { + bool had_gpuaddr = false; + if (entry == NULL) return; + /* + * Unmap the entry first so that there isn't a period of + * time where kgsl doesn't know about the address range + * but it is still present in the pagetable. Unmapping will + * clear the gpuaddr field, so remember if we had a mapping, + * and an rbtree entry for later. + */ + had_gpuaddr = entry->memdesc.gpuaddr != 0; + kgsl_mmu_unmap(entry->memdesc.pagetable, &entry->memdesc); + spin_lock(&entry->priv->mem_lock); if (entry->id != 0) idr_remove(&entry->priv->mem_idr, entry->id); entry->id = 0; - if (entry->memdesc.gpuaddr != 0) + if (had_gpuaddr) rb_erase(&entry->node, &entry->priv->mem_rb); spin_unlock(&entry->priv->mem_lock); @@ -285,64 +339,66 @@ static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry) entry->priv->stats[entry->memtype].cur -= entry->memdesc.size; entry->priv = NULL; - kgsl_mmu_unmap(entry->memdesc.pagetable, &entry->memdesc); kgsl_mem_entry_put(entry); } -/* Allocate a new context id */ - -static struct kgsl_context * -kgsl_create_context(struct kgsl_device_private *dev_priv) +/** + * kgsl_context_init() - helper to initialize kgsl_context members + * @dev_priv: the owner of the context + * @context: the newly created context struct, should be allocated by + * the device specific drawctxt_create function. + * + * This is a helper function for the device specific drawctxt_create + * function to initialize the common members of its context struct. + * If this function succeeds, reference counting is active in the context + * struct and the caller should kgsl_context_put() it on error. + * If it fails, the caller should just free the context structer + * it passed in. + */ +int kgsl_context_init(struct kgsl_device_private *dev_priv, + struct kgsl_context *context) { - struct kgsl_context *context; - int ret, id; - - context = kzalloc(sizeof(*context), GFP_KERNEL); - - if (context == NULL) { - KGSL_DRV_INFO(dev_priv->device, "kzalloc(%d) failed\n", - sizeof(*context)); - return ERR_PTR(-ENOMEM); - } + int ret = 0, id; + struct kgsl_device *device = dev_priv->device; while (1) { - if (idr_pre_get(&dev_priv->device->context_idr, - GFP_KERNEL) == 0) { - KGSL_DRV_INFO(dev_priv->device, - "idr_pre_get: ENOMEM\n"); + if (idr_pre_get(&device->context_idr, GFP_KERNEL) == 0) { + KGSL_DRV_INFO(device, "idr_pre_get: ENOMEM\n"); ret = -ENOMEM; - goto func_end; + break; } - ret = idr_get_new_above(&dev_priv->device->context_idr, - context, 1, &id); + write_lock(&device->context_lock); + ret = idr_get_new_above(&device->context_idr, context, 1, &id); + context->id = id; + write_unlock(&device->context_lock); if (ret != -EAGAIN) break; } if (ret) - goto func_end; + goto fail; /* MAX - 1, there is one memdesc in memstore for device info */ if (id >= KGSL_MEMSTORE_MAX) { - KGSL_DRV_ERR(dev_priv->device, "cannot have more than %d " + KGSL_DRV_INFO(device, "cannot have more than %d " "ctxts due to memstore limitation\n", KGSL_MEMSTORE_MAX); - idr_remove(&dev_priv->device->context_idr, id); ret = -ENOSPC; - goto func_end; + goto fail_free_id; } kref_init(&context->refcount); - context->id = id; - context->dev_priv = dev_priv; + context->device = dev_priv->device; + context->pagetable = dev_priv->process_priv->pagetable; + + context->pid = dev_priv->process_priv->pid; ret = kgsl_sync_timeline_create(context); if (ret) { - idr_remove(&dev_priv->device->context_idr, id); - goto func_end; + goto fail_free_id; } /* Initialize the pending event list */ @@ -358,50 +414,57 @@ kgsl_create_context(struct kgsl_device_private *dev_priv) */ INIT_LIST_HEAD(&context->events_list); - -func_end: - if (ret) { - kfree(context); - return ERR_PTR(ret); - } - - return context; + return 0; +fail_free_id: + write_lock(&device->context_lock); + idr_remove(&dev_priv->device->context_idr, id); + write_unlock(&device->context_lock); +fail: + return ret; } +EXPORT_SYMBOL(kgsl_context_init); /** - * kgsl_context_detach - Release the "master" context reference - * @context - The context that will be detached + * kgsl_context_detach() - Release the "master" context reference + * @context: The context that will be detached * * This is called when a context becomes unusable, because userspace * has requested for it to be destroyed. The context itself may * exist a bit longer until its reference count goes to zero. * Other code referencing the context can detect that it has been - * detached because the context id will be set to KGSL_CONTEXT_INVALID. + * detached by checking the KGSL_CONTEXT_DETACHED bit in + * context->priv. */ -void -kgsl_context_detach(struct kgsl_context *context) +int kgsl_context_detach(struct kgsl_context *context) { - int id; struct kgsl_device *device; - if (context == NULL) - return; - device = context->dev_priv->device; + int ret; + + if (context == NULL || kgsl_context_detached(context)) + return -EINVAL; + + device = context->device; + trace_kgsl_context_detach(device, context); - id = context->id; - if (device->ftbl->drawctxt_destroy) - device->ftbl->drawctxt_destroy(device, context); - /*device specific drawctxt_destroy MUST clean up devctxt */ - BUG_ON(context->devctxt); + /* + * Mark the context as detached to keep others from using + * the context before it gets fully removed + */ + set_bit(KGSL_CONTEXT_DETACHED, &context->priv); + + ret = device->ftbl->drawctxt_detach(context); + /* * Cancel events after the device-specific context is * destroyed, to avoid possibly freeing memory while * it is still in use by the GPU. */ + kgsl_context_cancel_events(device, context); - idr_remove(&device->context_idr, id); - context->id = KGSL_CONTEXT_INVALID; kgsl_context_put(context); + + return ret; } void @@ -409,29 +472,21 @@ kgsl_context_destroy(struct kref *kref) { struct kgsl_context *context = container_of(kref, struct kgsl_context, refcount); - kgsl_sync_timeline_destroy(context); - kfree(context); -} + struct kgsl_device *device = context->device; -static void kgsl_check_idle_locked(struct kgsl_device *device) -{ - if (device->pwrctrl.nap_allowed == true && - device->state == KGSL_STATE_ACTIVE && - device->requested_state == KGSL_STATE_NONE) { - kgsl_pwrctrl_request_state(device, KGSL_STATE_NAP); - kgsl_pwrscale_idle(device); - if (kgsl_pwrctrl_sleep(device) != 0) - mod_timer(&device->idle_timer, - jiffies + - device->pwrctrl.interval_timeout); + trace_kgsl_context_destroy(device, context); + + BUG_ON(!kgsl_context_detached(context)); + + write_lock(&device->context_lock); + if (context->id != KGSL_CONTEXT_INVALID) { + idr_remove(&device->context_idr, context->id); + context->id = KGSL_CONTEXT_INVALID; } -} + write_unlock(&device->context_lock); + kgsl_sync_timeline_destroy(context); -static void kgsl_check_idle(struct kgsl_device *device) -{ - mutex_lock(&device->mutex); - kgsl_check_idle_locked(device); - mutex_unlock(&device->mutex); + device->ftbl->drawctxt_destroy(context); } struct kgsl_device *kgsl_get_device(int dev_idx) @@ -496,23 +551,23 @@ static int kgsl_suspend_device(struct kgsl_device *device, pm_message_t state) policy_saved = device->pwrscale.policy; device->pwrscale.policy = NULL; kgsl_pwrctrl_request_state(device, KGSL_STATE_SUSPEND); - /* Make sure no user process is waiting for a timestamp * - * before supending */ - if (device->active_cnt != 0) { - mutex_unlock(&device->mutex); - wait_for_completion(&device->suspend_gate); - mutex_lock(&device->mutex); - } + + /* Tell the device to drain the submission queue */ + device->ftbl->drain(device); + + /* Wait for the active count to hit zero */ + kgsl_active_count_wait(device); + /* Don't let the timer wake us during suspended sleep. */ del_timer_sync(&device->idle_timer); switch (device->state) { case KGSL_STATE_INIT: break; case KGSL_STATE_ACTIVE: - /* Wait for the device to become idle */ - device->ftbl->idle(device); case KGSL_STATE_NAP: case KGSL_STATE_SLEEP: + /* make sure power is on to stop the device */ + kgsl_pwrctrl_enable(device); /* Get the completion ready to be waited upon. */ INIT_COMPLETION(device->hwaccess_gate); device->ftbl->suspend_context(device); @@ -600,9 +655,14 @@ void kgsl_early_suspend_driver(struct early_suspend *h) struct kgsl_device, display_off); KGSL_PWR_WARN(device, "early suspend start\n"); mutex_lock(&device->mutex); - device->pwrctrl.restore_slumber = true; - kgsl_pwrctrl_request_state(device, KGSL_STATE_SLUMBER); - kgsl_pwrctrl_sleep(device); + + /* Only go to slumber if active_cnt is 0 */ + if (atomic_read(&device->active_cnt) == 0) { + device->pwrctrl.restore_slumber = true; + kgsl_pwrctrl_request_state(device, KGSL_STATE_SLUMBER); + kgsl_pwrctrl_sleep(device); + } + mutex_unlock(&device->mutex); KGSL_PWR_WARN(device, "early suspend end\n"); } @@ -632,24 +692,121 @@ void kgsl_late_resume_driver(struct early_suspend *h) device->pwrctrl.restore_slumber = false; if (device->pwrscale.policy == NULL) kgsl_pwrctrl_pwrlevel_change(device, KGSL_PWRLEVEL_TURBO); - kgsl_pwrctrl_wake(device); + if (kgsl_pwrctrl_wake(device) != 0) + return; + /* + * We don't have a way to go directly from + * a deeper sleep state to NAP, which is + * the desired state here. + * + * Except if active_cnt is non zero which means that + * we probably went to early_suspend with it non zero + * and thus the system is still in an active state. + */ + + if (atomic_read(&device->active_cnt) == 0) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_NAP); + kgsl_pwrctrl_sleep(device); + } + mutex_unlock(&device->mutex); - kgsl_check_idle(device); KGSL_PWR_WARN(device, "late resume end\n"); } EXPORT_SYMBOL(kgsl_late_resume_driver); -/* file operations */ +/* + * kgsl_destroy_process_private() - Cleanup function to free process private + * @kref: - Pointer to object being destroyed's kref struct + * Free struct object and all other resources attached to it. + * Since the function can be used when not all resources inside process + * private have been allocated, there is a check to (before each resource + * cleanup) see if the struct member being cleaned is in fact allocated or not. + * If the value is not NULL, resource is freed. + */ +static void kgsl_destroy_process_private(struct kref *kref) +{ + + struct kgsl_mem_entry *entry = NULL; + int next = 0; + + + struct kgsl_process_private *private = container_of(kref, + struct kgsl_process_private, refcount); + + /* + * Remove this process from global process list + * We do not acquire a lock first as it is expected that + * kgsl_destroy_process_private() is only going to be called + * through kref_put() which is only called after acquiring + * the lock. + */ + if (!private) { + KGSL_CORE_ERR("Cannot destroy null process private\n"); + mutex_unlock(&kgsl_driver.process_mutex); + return; + } + list_del(&private->list); + mutex_unlock(&kgsl_driver.process_mutex); + + if (private->kobj.parent) + kgsl_process_uninit_sysfs(private); + if (private->debug_root) + debugfs_remove_recursive(private->debug_root); + + while (1) { + rcu_read_lock(); + entry = idr_get_next(&private->mem_idr, &next); + rcu_read_unlock(); + if (entry == NULL) + break; + kgsl_mem_entry_detach_process(entry); + /* + * Always start back at the beginning, to + * ensure all entries are removed, + * like list_for_each_entry_safe. + */ + next = 0; + } + kgsl_mmu_putpagetable(private->pagetable); + idr_destroy(&private->mem_idr); + + kfree(private); + return; +} + +static void +kgsl_put_process_private(struct kgsl_device *device, + struct kgsl_process_private *private) +{ + mutex_lock(&kgsl_driver.process_mutex); + + /* + * kref_put() returns 1 when the refcnt has reached 0 and the destroy + * function is called. Mutex is released in the destroy function if + * its called, so only release mutex if kref_put() return 0 + */ + if (!kref_put(&private->refcount, kgsl_destroy_process_private)) + mutex_unlock(&kgsl_driver.process_mutex); + return; +} + +/* + * find_process_private() - Helper function to search for process private + * @cur_dev_priv: Pointer to device private structure which contains pointers + * to device and process_private structs. + * Returns: Pointer to the found/newly created private struct + */ static struct kgsl_process_private * -kgsl_get_process_private(struct kgsl_device_private *cur_dev_priv) +kgsl_find_process_private(struct kgsl_device_private *cur_dev_priv) { struct kgsl_process_private *private; + /* Search in the process list */ mutex_lock(&kgsl_driver.process_mutex); list_for_each_entry(private, &kgsl_driver.process_list, list) { if (private->pid == task_tgid_nr(current)) { - private->refcnt++; - goto out; + kref_get(&private->refcount); + goto done; } } @@ -658,80 +815,65 @@ kgsl_get_process_private(struct kgsl_device_private *cur_dev_priv) if (private == NULL) { KGSL_DRV_ERR(cur_dev_priv->device, "kzalloc(%d) failed\n", sizeof(struct kgsl_process_private)); - goto out; + goto done; } - spin_lock_init(&private->mem_lock); - private->refcnt = 1; + kref_init(&private->refcount); + private->pid = task_tgid_nr(current); - private->mem_rb = RB_ROOT; + spin_lock_init(&private->mem_lock); + mutex_init(&private->process_private_mutex); + /* Add the newly created process struct obj to the process list */ + list_add(&private->list, &kgsl_driver.process_list); +done: + mutex_unlock(&kgsl_driver.process_mutex); + return private; +} - idr_init(&private->mem_idr); +/* + * kgsl_get_process_private() - Used to find the process private structure + * @cur_dev_priv: Current device pointer + * Finds or creates a new porcess private structire and initializes its members + * Returns: Pointer to the private process struct obj found/created or + * NULL if pagetable creation for this process private obj failed. + */ +static struct kgsl_process_private * +kgsl_get_process_private(struct kgsl_device_private *cur_dev_priv) +{ + struct kgsl_process_private *private; - if (kgsl_mmu_enabled()) - { + private = kgsl_find_process_private(cur_dev_priv); + + mutex_lock(&private->process_private_mutex); + + if (!private->mem_rb.rb_node) { + private->mem_rb = RB_ROOT; + idr_init(&private->mem_idr); + } + + if ((!private->pagetable) && kgsl_mmu_enabled()) { unsigned long pt_name; pt_name = task_tgid_nr(current); private->pagetable = kgsl_mmu_getpagetable(pt_name); if (private->pagetable == NULL) { - kfree(private); - private = NULL; - goto out; + mutex_unlock(&private->process_private_mutex); + kgsl_put_process_private(cur_dev_priv->device, + private); + return NULL; } } - list_add(&private->list, &kgsl_driver.process_list); + if (!private->kobj.parent) + kgsl_process_init_sysfs(private); + if (!private->debug_root) + kgsl_process_init_debugfs(private); - kgsl_process_init_sysfs(private); - kgsl_process_init_debugfs(private); + mutex_unlock(&private->process_private_mutex); -out: - mutex_unlock(&kgsl_driver.process_mutex); return private; } -static void -kgsl_put_process_private(struct kgsl_device *device, - struct kgsl_process_private *private) -{ - struct kgsl_mem_entry *entry = NULL; - int next = 0; - - if (!private) - return; - - mutex_lock(&kgsl_driver.process_mutex); - - if (--private->refcnt) - goto unlock; - - kgsl_process_uninit_sysfs(private); - debugfs_remove_recursive(private->debug_root); - - list_del(&private->list); - - while (1) { - rcu_read_lock(); - entry = idr_get_next(&private->mem_idr, &next); - rcu_read_unlock(); - if (entry == NULL) - break; - kgsl_mem_entry_detach_process(entry); - /* - * Always start back at the beginning, to - * ensure all entries are removed, - * like list_for_each_entry_safe. - */ - next = 0; - } - kgsl_mmu_putpagetable(private->pagetable); - idr_destroy(&private->mem_idr); - kfree(private); -unlock: - mutex_unlock(&kgsl_driver.process_mutex); -} - static int kgsl_release(struct inode *inodep, struct file *filep) { int result = 0; @@ -744,15 +886,27 @@ static int kgsl_release(struct inode *inodep, struct file *filep) filep->private_data = NULL; mutex_lock(&device->mutex); - kgsl_check_suspended(device); + kgsl_active_count_get(device); while (1) { + read_lock(&device->context_lock); context = idr_get_next(&device->context_idr, &next); + read_unlock(&device->context_lock); + if (context == NULL) break; - if (context->dev_priv == dev_priv) + + if (context->pid == private->pid) { + /* + * Hold a reference to the context in case somebody + * tries to put it while we are detaching + */ + + _kgsl_context_get(context); kgsl_context_detach(context); + kgsl_context_put(context); + } next = next + 1; } @@ -766,10 +920,17 @@ static int kgsl_release(struct inode *inodep, struct file *filep) device->open_count--; if (device->open_count == 0) { + BUG_ON(atomic_read(&device->active_cnt) > 1); result = device->ftbl->stop(device); kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT); + /* + * active_cnt special case: we just stopped the device, + * so no need to use kgsl_active_count_put() + */ + atomic_dec(&device->active_cnt); + } else { + kgsl_active_count_put(device); } - mutex_unlock(&device->mutex); kfree(dev_priv); @@ -815,19 +976,27 @@ static int kgsl_open(struct inode *inodep, struct file *filep) filep->private_data = dev_priv; mutex_lock(&device->mutex); - kgsl_check_suspended(device); if (device->open_count == 0) { + /* + * active_cnt special case: we are starting up for the first + * time, so use this sequence instead of the kgsl_pwrctrl_wake() + * which will be called by kgsl_active_count_get(). + */ + atomic_inc(&device->active_cnt); kgsl_sharedmem_set(&device->memstore, 0, 0, device->memstore.size); - result = device->ftbl->start(device, true); + result = device->ftbl->init(device); + if (result) + goto err_freedevpriv; - if (result) { - mutex_unlock(&device->mutex); + result = device->ftbl->start(device); + if (result) goto err_freedevpriv; - } + kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); + kgsl_active_count_put(device); } device->open_count++; mutex_unlock(&device->mutex); @@ -853,11 +1022,17 @@ err_stop: mutex_lock(&device->mutex); device->open_count--; if (device->open_count == 0) { + /* make sure power is on to stop the device */ + kgsl_pwrctrl_enable(device); result = device->ftbl->stop(device); kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT); } - mutex_unlock(&device->mutex); err_freedevpriv: + /* only the first open takes an active count */ + if (device->open_count == 0) + atomic_dec(&device->active_cnt); + + mutex_unlock(&device->mutex); filep->private_data = NULL; kfree(dev_priv); err_pmruntime: @@ -865,8 +1040,17 @@ err_pmruntime: return result; } -/*call with private->mem_lock locked */ -struct kgsl_mem_entry * +/** + * kgsl_sharedmem_find_region() - Find a gpu memory allocation + * + * @private: private data for the process to check. + * @gpuaddr: start address of the region + * @size: size of the region + * + * Find a gpu allocation. Caller must kgsl_mem_entry_put() + * the returned entry when finished using it. + */ +struct kgsl_mem_entry * __must_check kgsl_sharedmem_find_region(struct kgsl_process_private *private, unsigned int gpuaddr, size_t size) { @@ -875,46 +1059,57 @@ kgsl_sharedmem_find_region(struct kgsl_process_private *private, if (!kgsl_mmu_gpuaddr_in_range(gpuaddr)) return NULL; + spin_lock(&private->mem_lock); while (node != NULL) { struct kgsl_mem_entry *entry; entry = rb_entry(node, struct kgsl_mem_entry, node); - - if (kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) + if (kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) { + kgsl_mem_entry_get(entry); + spin_unlock(&private->mem_lock); return entry; - + } if (gpuaddr < entry->memdesc.gpuaddr) node = node->rb_left; else if (gpuaddr >= (entry->memdesc.gpuaddr + entry->memdesc.size)) node = node->rb_right; else { + spin_unlock(&private->mem_lock); return NULL; } } + spin_unlock(&private->mem_lock); return NULL; } EXPORT_SYMBOL(kgsl_sharedmem_find_region); -/*call with private->mem_lock locked */ -static inline struct kgsl_mem_entry * +/** + * kgsl_sharedmem_find() - Find a gpu memory allocation + * + * @private: private data for the process to check. + * @gpuaddr: start address of the region + * + * Find a gpu allocation. Caller must kgsl_mem_entry_put() + * the returned entry when finished using it. + */ +static inline struct kgsl_mem_entry * __must_check kgsl_sharedmem_find(struct kgsl_process_private *private, unsigned int gpuaddr) { return kgsl_sharedmem_find_region(private, gpuaddr, 1); } /** - * kgsl_sharedmem_region_empty - Check if an addression region is empty + * kgsl_sharedmem_region_empty() - Check if an addression region is empty * * @private: private data for the process to check. * @gpuaddr: start address of the region * @size: length of the region. * * Checks that there are no existing allocations within an address - * region. Note that unlike other kgsl_sharedmem* search functions, - * this one manages locking on its own. + * region. */ int kgsl_sharedmem_region_empty(struct kgsl_process_private *private, @@ -958,19 +1153,24 @@ kgsl_sharedmem_region_empty(struct kgsl_process_private *private, } /** - * kgsl_sharedmem_find_id - find a memory entry by id + * kgsl_sharedmem_find_id() - find a memory entry by id * @process: the owning process * @id: id to find * * @returns - the mem_entry or NULL + * + * Caller must kgsl_mem_entry_put() the returned entry, when finished using + * it. */ -static inline struct kgsl_mem_entry * +static inline struct kgsl_mem_entry * __must_check kgsl_sharedmem_find_id(struct kgsl_process_private *process, unsigned int id) { struct kgsl_mem_entry *entry; rcu_read_lock(); entry = idr_find(&process->mem_idr, id); + if (entry) + kgsl_mem_entry_get(entry); rcu_read_unlock(); return entry; @@ -1035,177 +1235,696 @@ static long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv, context->reset_status = KGSL_CTX_STAT_NO_ERROR; } - kgsl_context_put(context); - break; - } - default: - result = dev_priv->device->ftbl->getproperty( - dev_priv->device, param->type, - param->value, param->sizebytes); + kgsl_context_put(context); + break; + } + default: + result = dev_priv->device->ftbl->getproperty( + dev_priv->device, param->type, + param->value, param->sizebytes); + } + + + return result; +} + +static long kgsl_ioctl_device_setproperty(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + /* The getproperty struct is reused for setproperty too */ + struct kgsl_device_getproperty *param = data; + + if (dev_priv->device->ftbl->setproperty) + result = dev_priv->device->ftbl->setproperty( + dev_priv->device, param->type, + param->value, param->sizebytes); + + return result; +} + +static long _device_waittimestamp(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, + unsigned int timestamp, + unsigned int timeout) +{ + int result = 0; + struct kgsl_device *device = dev_priv->device; + unsigned int context_id = context ? context->id : KGSL_MEMSTORE_GLOBAL; + + trace_kgsl_waittimestamp_entry(device, context_id, + kgsl_readtimestamp(device, context, + KGSL_TIMESTAMP_RETIRED), + timestamp, timeout); + + result = device->ftbl->waittimestamp(dev_priv->device, + context, timestamp, timeout); + + trace_kgsl_waittimestamp_exit(device, + kgsl_readtimestamp(device, context, + KGSL_TIMESTAMP_RETIRED), + result); + + return result; +} + +static long kgsl_ioctl_device_waittimestamp(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_device_waittimestamp *param = data; + + return _device_waittimestamp(dev_priv, NULL, + param->timestamp, param->timeout); +} + +static long kgsl_ioctl_device_waittimestamp_ctxtid(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_device_waittimestamp_ctxtid *param = data; + struct kgsl_context *context; + long result = -EINVAL; + + context = kgsl_context_get_owner(dev_priv, param->context_id); + + if (context) + result = _device_waittimestamp(dev_priv, context, + param->timestamp, param->timeout); + + kgsl_context_put(context); + return result; +} + +/* + * KGSL command batch management + * A command batch is a single submission from userland. The cmdbatch + * encapsulates everything about the submission : command buffers, flags and + * sync points. + * + * Sync points are events that need to expire before the + * cmdbatch can be queued to the hardware. For each sync point a + * kgsl_cmdbatch_sync_event struct is created and added to a list in the + * cmdbatch. There can be multiple types of events both internal ones (GPU + * events) and external triggers. As the events expire the struct is deleted + * from the list. The GPU will submit the command batch as soon as the list + * goes empty indicating that all the sync points have been met. + */ + +/** + * struct kgsl_cmdbatch_sync_event + * @type: Syncpoint type + * @node: Local list node for the cmdbatch sync point list + * @cmdbatch: Pointer to the cmdbatch that owns the sync event + * @context: Pointer to the KGSL context that owns the cmdbatch + * @timestamp: Pending timestamp for the event + * @handle: Pointer to a sync fence handle + * @device: Pointer to the KGSL device + */ +struct kgsl_cmdbatch_sync_event { + int type; + struct list_head node; + struct kgsl_cmdbatch *cmdbatch; + struct kgsl_context *context; + unsigned int timestamp; + struct kgsl_sync_fence_waiter *handle; + struct kgsl_device *device; + spinlock_t lock; +}; + +/** + * kgsl_cmdbatch_destroy_object() - Destroy a cmdbatch object + * @kref: Pointer to the kref structure for this object + * + * Actually destroy a command batch object. Called from kgsl_cmdbatch_put + */ +void kgsl_cmdbatch_destroy_object(struct kref *kref) +{ + struct kgsl_cmdbatch *cmdbatch = container_of(kref, + struct kgsl_cmdbatch, refcount); + + kgsl_context_put(cmdbatch->context); + kfree(cmdbatch->ibdesc); + + kfree(cmdbatch); +} +EXPORT_SYMBOL(kgsl_cmdbatch_destroy_object); + +static void kgsl_cmdbatch_sync_expire(struct kgsl_device *device, + struct kgsl_cmdbatch_sync_event *event) +{ + int sched = 0; + + spin_lock(&event->cmdbatch->lock); + list_del(&event->node); + sched = list_empty(&event->cmdbatch->synclist) ? 1 : 0; + spin_unlock(&event->cmdbatch->lock); + + /* + * if this is the last event in the list then tell + * the GPU device that the cmdbatch can be submitted + */ + + if (sched && device->ftbl->drawctxt_sched) + device->ftbl->drawctxt_sched(device, event->cmdbatch->context); +} + + +/* + * This function is called by the GPU event when the sync event timestamp + * expires + */ +static void kgsl_cmdbatch_sync_func(struct kgsl_device *device, void *priv, + u32 id, u32 timestamp, u32 type) +{ + struct kgsl_cmdbatch_sync_event *event = priv; + + kgsl_cmdbatch_sync_expire(device, event); + + kgsl_context_put(event->context); + kgsl_cmdbatch_put(event->cmdbatch); + + kfree(event); +} + +/** + * kgsl_cmdbatch_destroy() - Destroy a cmdbatch structure + * @cmdbatch: Pointer to the command batch object to destroy + * + * Start the process of destroying a command batch. Cancel any pending events + * and decrement the refcount. + */ +void kgsl_cmdbatch_destroy(struct kgsl_cmdbatch *cmdbatch) +{ + struct kgsl_cmdbatch_sync_event *event, *tmp; + int canceled = 0; + + spin_lock(&cmdbatch->lock); + + /* Delete any pending sync points for this command batch */ + list_for_each_entry_safe(event, tmp, &cmdbatch->synclist, node) { + + switch (event->type) { + case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: { + /* Cancel the event if it still exists */ + mutex_lock(&cmdbatch->device->mutex); + kgsl_cancel_event(cmdbatch->device, event->context, + event->timestamp, kgsl_cmdbatch_sync_func, + event); + canceled = 1; + mutex_unlock(&cmdbatch->device->mutex); + kgsl_context_put(event->context); + break; + } + case KGSL_CMD_SYNCPOINT_TYPE_FENCE: + canceled = kgsl_sync_fence_async_cancel(event->handle); + break; + default: + break; + } + + if(canceled) { + list_del(&event->node); + kfree(event); + + /* + * Put back a instance of the cmdbatch for each pending event + * that we canceled + */ + + kgsl_cmdbatch_put(cmdbatch); + } + } + spin_unlock(&cmdbatch->lock); + + kgsl_cmdbatch_put(cmdbatch); +} +EXPORT_SYMBOL(kgsl_cmdbatch_destroy); + +static void kgsl_cmdbatch_sync_fence_func(void *priv) +{ + struct kgsl_cmdbatch_sync_event *event = priv; + + spin_lock(&event->lock); + kgsl_cmdbatch_sync_expire(event->device, event); + kgsl_cmdbatch_put(event->cmdbatch); + spin_unlock(&event->lock); + kfree(event); +} + +/* kgsl_cmdbatch_add_sync_fence() - Add a new sync fence syncpoint + * @device: KGSL device + * @cmdbatch: KGSL cmdbatch to add the sync point to + * @priv: Private sructure passed by the user + * + * Add a new fence sync syncpoint to the cmdbatch. + */ +static int kgsl_cmdbatch_add_sync_fence(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void *priv) +{ + struct kgsl_cmd_syncpoint_fence *sync = priv; + struct kgsl_cmdbatch_sync_event *event; + + event = kzalloc(sizeof(*event), GFP_KERNEL); + + if (event == NULL) + return -ENOMEM; + + kref_get(&cmdbatch->refcount); + + event->type = KGSL_CMD_SYNCPOINT_TYPE_FENCE; + event->cmdbatch = cmdbatch; + event->device = device; + spin_lock_init(&event->lock); + + /* + * Add it to the list first to account for the possiblity that the + * callback will happen immediately after the call to + * kgsl_sync_fence_async_wait + */ + + spin_lock(&cmdbatch->lock); + list_add(&event->node, &cmdbatch->synclist); + spin_unlock(&cmdbatch->lock); + + /* + * There is a distinct race condition that can occur if the fence + * callback is fired before the function has a chance to return. The + * event struct would be freed before we could write event->handle and + * hilarity ensued. Protect against this by protecting the call to + * kgsl_sync_fence_async_wait and the kfree in the callback with a lock. + */ + + spin_lock(&event->lock); + + event->handle = kgsl_sync_fence_async_wait(sync->fd, + kgsl_cmdbatch_sync_fence_func, event); + + + if (IS_ERR_OR_NULL(event->handle)) { + int ret = PTR_ERR(event->handle); + + spin_lock(&cmdbatch->lock); + list_del(&event->node); + spin_unlock(&cmdbatch->lock); + + kgsl_cmdbatch_put(cmdbatch); + spin_unlock(&event->lock); + kfree(event); + + return ret; + } + + spin_unlock(&event->lock); + return 0; +} + +/* kgsl_cmdbatch_add_sync_timestamp() - Add a new sync point for a cmdbatch + * @device: KGSL device + * @cmdbatch: KGSL cmdbatch to add the sync point to + * @priv: Private sructure passed by the user + * + * Add a new sync point timestamp event to the cmdbatch. + */ +static int kgsl_cmdbatch_add_sync_timestamp(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void *priv) +{ + struct kgsl_cmd_syncpoint_timestamp *sync = priv; + struct kgsl_context *context = kgsl_context_get(cmdbatch->device, + sync->context_id); + struct kgsl_cmdbatch_sync_event *event; + int ret = -EINVAL; + + if (context == NULL) + return -EINVAL; + + /* Sanity check - you can't create a sync point on your own context */ + if (context == cmdbatch->context) { + KGSL_DRV_ERR(device, + "Cannot create a sync point on your own context\n"); + goto done; + } + + event = kzalloc(sizeof(*event), GFP_KERNEL); + if (event == NULL) { + ret = -ENOMEM; + goto done; + } + + kref_get(&cmdbatch->refcount); + + event->type = KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP; + event->cmdbatch = cmdbatch; + event->context = context; + event->timestamp = sync->timestamp; + + spin_lock(&cmdbatch->lock); + list_add(&event->node, &cmdbatch->synclist); + spin_unlock(&cmdbatch->lock); + + mutex_lock(&device->mutex); + kgsl_active_count_get(device); + ret = kgsl_add_event(device, context->id, sync->timestamp, + kgsl_cmdbatch_sync_func, event, NULL); + kgsl_active_count_put(device); + mutex_unlock(&device->mutex); + + if (ret) { + spin_lock(&cmdbatch->lock); + list_del(&event->node); + spin_unlock(&cmdbatch->lock); + + kgsl_cmdbatch_put(cmdbatch); + kfree(event); + } + +done: + if (ret) + kgsl_context_put(context); + + return ret; +} + +/** + * kgsl_cmdbatch_add_sync() - Add a sync point to a command batch + * @device: Pointer to the KGSL device struct for the GPU + * @cmdbatch: Pointer to the cmdbatch + * @sync: Pointer to the user-specified struct defining the syncpoint + * + * Create a new sync point in the cmdbatch based on the user specified + * parameters + */ +static int kgsl_cmdbatch_add_sync(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, + struct kgsl_cmd_syncpoint *sync) +{ + void *priv; + int ret, psize; + int (*func)(struct kgsl_device *device, struct kgsl_cmdbatch *cmdbatch, + void *priv); + + switch (sync->type) { + case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: + psize = sizeof(struct kgsl_cmd_syncpoint_timestamp); + func = kgsl_cmdbatch_add_sync_timestamp; + break; + case KGSL_CMD_SYNCPOINT_TYPE_FENCE: + psize = sizeof(struct kgsl_cmd_syncpoint_fence); + func = kgsl_cmdbatch_add_sync_fence; + break; + default: + KGSL_DRV_ERR(device, "Invalid sync type 0x%x\n", sync->type); + return -EINVAL; + } + + if (sync->size != psize) { + KGSL_DRV_ERR(device, "Invalid sync size %d\n", sync->size); + return -EINVAL; + } + + priv = kzalloc(sync->size, GFP_KERNEL); + if (priv == NULL) + return -ENOMEM; + + if (copy_from_user(priv, sync->priv, sync->size)) { + kfree(priv); + return -EFAULT; + } + + ret = func(device, cmdbatch, priv); + kfree(priv); + + return ret; +} + +/** + * kgsl_cmdbatch_create() - Create a new cmdbatch structure + * @device: Pointer to a KGSL device struct + * @context: Pointer to a KGSL context struct + * @numibs: Number of indirect buffers to make room for in the cmdbatch + * + * Allocate an new cmdbatch structure and add enough room to store the list of + * indirect buffers + */ +static struct kgsl_cmdbatch *kgsl_cmdbatch_create(struct kgsl_device *device, + struct kgsl_context *context, unsigned int flags, + unsigned int numibs) +{ + struct kgsl_cmdbatch *cmdbatch = kzalloc(sizeof(*cmdbatch), GFP_KERNEL); + if (cmdbatch == NULL) + return ERR_PTR(-ENOMEM); + + if (!(flags & KGSL_CONTEXT_SYNC)) { + cmdbatch->ibdesc = kzalloc(sizeof(*cmdbatch->ibdesc) * numibs, + GFP_KERNEL); + if (cmdbatch->ibdesc == NULL) { + kfree(cmdbatch); + return ERR_PTR(-ENOMEM); + } + } + + kref_init(&cmdbatch->refcount); + INIT_LIST_HEAD(&cmdbatch->synclist); + spin_lock_init(&cmdbatch->lock); + + cmdbatch->device = device; + cmdbatch->ibcount = (flags & KGSL_CONTEXT_SYNC) ? 0 : numibs; + cmdbatch->context = context; + cmdbatch->flags = flags; + + /* + * Increase the reference count on the context so it doesn't disappear + * during the lifetime of this command batch + */ + _kgsl_context_get(context); + + return cmdbatch; +} + +/** + * _kgsl_cmdbatch_verify() - Perform a quick sanity check on a command batch + * @device: Pointer to a KGSL device that owns the command batch + * @cmdbatch: Number of indirect buffers to make room for in the cmdbatch + * + * Do a quick sanity test on the list of indirect buffers in a command batch + * verifying that the size and GPU address + */ +static bool _kgsl_cmdbatch_verify(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch) +{ + int i; + + for (i = 0; i < cmdbatch->ibcount; i++) { + if (cmdbatch->ibdesc[i].sizedwords == 0) { + KGSL_DRV_ERR(device, + "Invalid IB: size is 0\n"); + return false; + } + + if (!kgsl_mmu_gpuaddr_in_range(cmdbatch->ibdesc[i].gpuaddr)) { + KGSL_DRV_ERR(device, + "Invalid IB: address 0x%X is out of range\n", + cmdbatch->ibdesc[i].gpuaddr); + return false; + } } - - return result; + return true; } -static long kgsl_ioctl_device_setproperty(struct kgsl_device_private *dev_priv, - unsigned int cmd, void *data) +/** + * _kgsl_cmdbatch_create_legacy() - Create a cmdbatch from a legacy ioctl struct + * @device: Pointer to the KGSL device struct for the GPU + * @context: Pointer to the KGSL context that issued the command batch + * @param: Pointer to the kgsl_ringbuffer_issueibcmds struct that the user sent + * + * Create a command batch from the legacy issueibcmds format. + */ +static struct kgsl_cmdbatch *_kgsl_cmdbatch_create_legacy( + struct kgsl_device *device, + struct kgsl_context *context, + struct kgsl_ringbuffer_issueibcmds *param) { - int result = 0; - /* The getproperty struct is reused for setproperty too */ - struct kgsl_device_getproperty *param = data; + struct kgsl_cmdbatch *cmdbatch = + kgsl_cmdbatch_create(device, context, param->flags, 1); - if (dev_priv->device->ftbl->setproperty) - result = dev_priv->device->ftbl->setproperty( - dev_priv->device, param->type, - param->value, param->sizebytes); + if (IS_ERR(cmdbatch)) + return cmdbatch; - return result; + cmdbatch->ibdesc[0].gpuaddr = param->ibdesc_addr; + cmdbatch->ibdesc[0].sizedwords = param->numibs; + cmdbatch->ibcount = 1; + cmdbatch->flags = param->flags; + + return cmdbatch; } -static long _device_waittimestamp(struct kgsl_device_private *dev_priv, +/** + * _kgsl_cmdbatch_create() - Create a cmdbatch from a ioctl struct + * @device: Pointer to the KGSL device struct for the GPU + * @context: Pointer to the KGSL context that issued the command batch + * @flags: Flags passed in from the user command + * @cmdlist: Pointer to the list of commands from the user + * @numcmds: Number of commands in the list + * @synclist: Pointer to the list of syncpoints from the user + * @numsyncs: Number of syncpoints in the list + * + * Create a command batch from the standard issueibcmds format sent by the user. + */ +static struct kgsl_cmdbatch *_kgsl_cmdbatch_create(struct kgsl_device *device, struct kgsl_context *context, - unsigned int timestamp, - unsigned int timeout) + unsigned int flags, + unsigned int cmdlist, unsigned int numcmds, + unsigned int synclist, unsigned int numsyncs) { - int result = 0; - struct kgsl_device *device = dev_priv->device; - unsigned int context_id = context ? context->id : KGSL_MEMSTORE_GLOBAL; + struct kgsl_cmdbatch *cmdbatch = + kgsl_cmdbatch_create(device, context, flags, numcmds); + int ret = 0; - /* Set the active count so that suspend doesn't do the wrong thing */ + if (IS_ERR(cmdbatch)) + return cmdbatch; - device->active_cnt++; + if (!(flags & KGSL_CONTEXT_SYNC)) { + if (copy_from_user(cmdbatch->ibdesc, (void __user *) cmdlist, + sizeof(struct kgsl_ibdesc) * numcmds)) { + ret = -EFAULT; + goto done; + } + } - trace_kgsl_waittimestamp_entry(device, context_id, - kgsl_readtimestamp(device, context, - KGSL_TIMESTAMP_RETIRED), - timestamp, timeout); + if (synclist && numsyncs) { + struct kgsl_cmd_syncpoint sync; + void __user *uptr = (void __user *) synclist; + int i; - result = device->ftbl->waittimestamp(dev_priv->device, - context, timestamp, timeout); + for (i = 0; i < numsyncs; i++) { + memset(&sync, 0, sizeof(sync)); - trace_kgsl_waittimestamp_exit(device, - kgsl_readtimestamp(device, context, - KGSL_TIMESTAMP_RETIRED), - result); + if (copy_from_user(&sync, uptr, sizeof(sync))) { + ret = -EFAULT; + break; + } - /* Fire off any pending suspend operations that are in flight */ - kgsl_active_count_put(dev_priv->device); + ret = kgsl_cmdbatch_add_sync(device, cmdbatch, &sync); - return result; -} + if (ret) + break; -static long kgsl_ioctl_device_waittimestamp(struct kgsl_device_private - *dev_priv, unsigned int cmd, - void *data) -{ - struct kgsl_device_waittimestamp *param = data; + uptr += sizeof(sync); + } + } - return _device_waittimestamp(dev_priv, NULL, - param->timestamp, param->timeout); +done: + if (ret) { + kgsl_cmdbatch_destroy(cmdbatch); + return ERR_PTR(ret); + } + + cmdbatch->flags = flags; + + return cmdbatch; } -static long kgsl_ioctl_device_waittimestamp_ctxtid(struct kgsl_device_private - *dev_priv, unsigned int cmd, - void *data) +static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) { - struct kgsl_device_waittimestamp_ctxtid *param = data; + struct kgsl_ringbuffer_issueibcmds *param = data; + struct kgsl_device *device = dev_priv->device; struct kgsl_context *context; + struct kgsl_cmdbatch *cmdbatch; long result = -EINVAL; - context = kgsl_context_get_owner(dev_priv, param->context_id); + /* The legacy functions don't support synchronization commands */ + if (param->flags & KGSL_CONTEXT_SYNC) + return -EINVAL; - if (context) - result = _device_waittimestamp(dev_priv, context, - param->timestamp, param->timeout); + /* Get the context */ + context = kgsl_context_get_owner(dev_priv, param->drawctxt_id); + if (context == NULL) + goto done; + + if (param->flags & KGSL_CONTEXT_SUBMIT_IB_LIST) { + /* + * Do a quick sanity check on the number of IBs in the + * submission + */ + + if (param->numibs == 0 || param->numibs > 100000) + goto done; + + cmdbatch = _kgsl_cmdbatch_create(device, context, param->flags, + param->ibdesc_addr, param->numibs, 0, 0); + } else + cmdbatch = _kgsl_cmdbatch_create_legacy(device, context, param); + + if (IS_ERR(cmdbatch)) { + result = PTR_ERR(cmdbatch); + goto done; + } + + /* Run basic sanity checking on the command */ + if (!_kgsl_cmdbatch_verify(device, cmdbatch)) { + KGSL_DRV_ERR(device, "Unable to verify the IBs\n"); + goto free_cmdbatch; + } + result = dev_priv->device->ftbl->issueibcmds(dev_priv, context, + cmdbatch, ¶m->timestamp); + +free_cmdbatch: + if (result) + kgsl_cmdbatch_destroy(cmdbatch); + +done: kgsl_context_put(context); return result; } -static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, +static long kgsl_ioctl_submit_commands(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) { - int result = 0; - struct kgsl_ringbuffer_issueibcmds *param = data; - struct kgsl_ibdesc *ibdesc; + struct kgsl_submit_commands *param = data; + struct kgsl_device *device = dev_priv->device; struct kgsl_context *context; + struct kgsl_cmdbatch *cmdbatch; - context = kgsl_context_get_owner(dev_priv, param->drawctxt_id); - if (context == NULL) { - result = -EINVAL; - goto done; - } + long result = -EINVAL; - if (param->flags & KGSL_CONTEXT_SUBMIT_IB_LIST) { - KGSL_DRV_INFO(dev_priv->device, - "Using IB list mode for ib submission, numibs: %d\n", - param->numibs); - if (!param->numibs) { - KGSL_DRV_ERR(dev_priv->device, - "Invalid numibs as parameter: %d\n", - param->numibs); - result = -EINVAL; - goto done; - } + /* The number of IBs are completely ignored for sync commands */ + if (!(param->flags & KGSL_CONTEXT_SYNC)) { + if (param->numcmds == 0 || param->numcmds > 100000) + return -EINVAL; + } else if (param->numcmds != 0) { + KGSL_DRV_ERR(device, + "Commands specified with the SYNC flag. They will be ignored\n"); + } - /* - * Put a reasonable upper limit on the number of IBs that can be - * submitted - */ + context = kgsl_context_get_owner(dev_priv, param->context_id); + if (context == NULL) + return -EINVAL; - if (param->numibs > 10000) { - KGSL_DRV_ERR(dev_priv->device, - "Too many IBs submitted. count: %d max 10000\n", - param->numibs); - result = -EINVAL; - goto done; - } + cmdbatch = _kgsl_cmdbatch_create(device, context, param->flags, + (unsigned int) param->cmdlist, param->numcmds, + (unsigned int) param->synclist, param->numsyncs); - ibdesc = kzalloc(sizeof(struct kgsl_ibdesc) * param->numibs, - GFP_KERNEL); - if (!ibdesc) { - KGSL_MEM_ERR(dev_priv->device, - "kzalloc(%d) failed\n", - sizeof(struct kgsl_ibdesc) * param->numibs); - result = -ENOMEM; - goto done; - } + if (IS_ERR(cmdbatch)) { + result = PTR_ERR(cmdbatch); + goto done; + } - if (copy_from_user(ibdesc, (void *)param->ibdesc_addr, - sizeof(struct kgsl_ibdesc) * param->numibs)) { - result = -EFAULT; - KGSL_DRV_ERR(dev_priv->device, - "copy_from_user failed\n"); - goto free_ibdesc; - } - } else { - KGSL_DRV_INFO(dev_priv->device, - "Using single IB submission mode for ib submission\n"); - /* If user space driver is still using the old mode of - * submitting single ib then we need to support that as well */ - ibdesc = kzalloc(sizeof(struct kgsl_ibdesc), GFP_KERNEL); - if (!ibdesc) { - KGSL_MEM_ERR(dev_priv->device, - "kzalloc(%d) failed\n", - sizeof(struct kgsl_ibdesc)); - result = -ENOMEM; - goto done; - } - ibdesc[0].gpuaddr = param->ibdesc_addr; - ibdesc[0].sizedwords = param->numibs; - param->numibs = 1; + /* Run basic sanity checking on the command */ + if (!_kgsl_cmdbatch_verify(device, cmdbatch)) { + KGSL_DRV_ERR(device, "Unable to verify the IBs\n"); + goto free_cmdbatch; } - result = dev_priv->device->ftbl->issueibcmds(dev_priv, - context, - ibdesc, - param->numibs, - ¶m->timestamp, - param->flags); + result = dev_priv->device->ftbl->issueibcmds(dev_priv, context, + cmdbatch, ¶m->timestamp); + +free_cmdbatch: + if (result) + kgsl_cmdbatch_destroy(cmdbatch); -free_ibdesc: - kfree(ibdesc); done: kgsl_context_put(context); return result; @@ -1271,15 +1990,12 @@ static long _cmdstream_freememontimestamp(struct kgsl_device_private *dev_priv, struct kgsl_device *device = dev_priv->device; unsigned int context_id = context ? context->id : KGSL_MEMSTORE_GLOBAL; - spin_lock(&dev_priv->process_priv->mem_lock); entry = kgsl_sharedmem_find(dev_priv->process_priv, gpuaddr); - spin_unlock(&dev_priv->process_priv->mem_lock); if (!entry) { KGSL_DRV_ERR(dev_priv->device, "invalid gpuaddr %08x\n", gpuaddr); - result = -EINVAL; - goto done; + return -EINVAL; } trace_kgsl_mem_timestamp_queue(device, entry, context_id, kgsl_readtimestamp(device, context, @@ -1287,7 +2003,7 @@ static long _cmdstream_freememontimestamp(struct kgsl_device_private *dev_priv, timestamp); result = kgsl_add_event(dev_priv->device, context_id, timestamp, kgsl_freemem_event_cb, entry, dev_priv); -done: + kgsl_mem_entry_put(entry); return result; } @@ -1324,27 +2040,16 @@ static long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv, int result = 0; struct kgsl_drawctxt_create *param = data; struct kgsl_context *context = NULL; + struct kgsl_device *device = dev_priv->device; - context = kgsl_create_context(dev_priv); - + context = device->ftbl->drawctxt_create(dev_priv, ¶m->flags); if (IS_ERR(context)) { result = PTR_ERR(context); goto done; } - - if (dev_priv->device->ftbl->drawctxt_create) { - result = dev_priv->device->ftbl->drawctxt_create( - dev_priv->device, dev_priv->process_priv->pagetable, - context, ¶m->flags); - if (result) - goto done; - } trace_kgsl_context_create(dev_priv->device, context, param->flags); param->drawctxt_id = context->id; done: - if (result && !IS_ERR(context)) - kgsl_context_detach(context); - return result; } @@ -1353,14 +2058,11 @@ static long kgsl_ioctl_drawctxt_destroy(struct kgsl_device_private *dev_priv, { struct kgsl_drawctxt_destroy *param = data; struct kgsl_context *context; - long result = -EINVAL; + long result; context = kgsl_context_get_owner(dev_priv, param->drawctxt_id); - if (context) { - kgsl_context_detach(context); - result = 0; - } + result = kgsl_context_detach(context); kgsl_context_put(context); return result; @@ -1369,31 +2071,27 @@ static long kgsl_ioctl_drawctxt_destroy(struct kgsl_device_private *dev_priv, static long kgsl_ioctl_sharedmem_free(struct kgsl_device_private *dev_priv, unsigned int cmd, void *data) { - int result = 0; struct kgsl_sharedmem_free *param = data; struct kgsl_process_private *private = dev_priv->process_priv; struct kgsl_mem_entry *entry = NULL; - spin_lock(&private->mem_lock); entry = kgsl_sharedmem_find(private, param->gpuaddr); - spin_unlock(&private->mem_lock); - - if (entry) { - trace_kgsl_mem_free(entry); + if (!entry) { + KGSL_MEM_INFO(dev_priv->device, "invalid gpuaddr %08x\n", + param->gpuaddr); + return -EINVAL; + } - kgsl_memfree_hist_set_event( - entry->priv->pid, - entry->memdesc.gpuaddr, - entry->memdesc.size, - entry->memdesc.flags); + trace_kgsl_mem_free(entry); - kgsl_mem_entry_detach_process(entry); - } else { - KGSL_CORE_ERR("invalid gpuaddr %08x\n", param->gpuaddr); - result = -EINVAL; - } + kgsl_memfree_hist_set_event(entry->priv->pid, + entry->memdesc.gpuaddr, + entry->memdesc.size, + entry->memdesc.flags); - return result; + kgsl_mem_entry_detach_process(entry); + kgsl_mem_entry_put(entry); + return 0; } static long kgsl_ioctl_gpumem_free_id(struct kgsl_device_private *dev_priv, @@ -1412,6 +2110,7 @@ static long kgsl_ioctl_gpumem_free_id(struct kgsl_device_private *dev_priv, trace_kgsl_mem_free(entry); kgsl_mem_entry_detach_process(entry); + kgsl_mem_entry_put(entry); return 0; } @@ -1809,6 +2508,9 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, if (!can_use_cpu_map()) entry->memdesc.flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP; + if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU) + entry->memdesc.priv |= KGSL_MEMDESC_GUARD_PAGE; + switch (memtype) { case KGSL_USER_MEM_TYPE_PMEM: if (param->fd == 0 || param->len == 0) @@ -1873,10 +2575,7 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, else if (entry->memdesc.size >= SZ_64K) kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_64)); - result = kgsl_mmu_map(private->pagetable, - &entry->memdesc, - GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); - + result = kgsl_mmu_map(private->pagetable, &entry->memdesc); if (result) goto error_put_file_ptr; @@ -1896,7 +2595,6 @@ static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, trace_kgsl_mem_map(entry, param->fd); - kgsl_check_idle(dev_priv->device); return result; error_unmap: @@ -1916,7 +2614,6 @@ error_put_file_ptr: } error: kfree(entry); - kgsl_check_idle(dev_priv->device); return result; } @@ -1945,8 +2642,10 @@ static int _kgsl_gpumem_sync_cache(struct kgsl_mem_entry *entry, int op) mode = kgsl_memdesc_get_cachemode(&entry->memdesc); if (mode != KGSL_CACHEMODE_UNCACHED - && mode != KGSL_CACHEMODE_WRITECOMBINE) + && mode != KGSL_CACHEMODE_WRITECOMBINE) { + trace_kgsl_mem_sync_cache(entry, op); kgsl_cache_range_op(&entry->memdesc, cacheop); + } done: return ret; @@ -1961,6 +2660,7 @@ kgsl_ioctl_gpumem_sync_cache(struct kgsl_device_private *dev_priv, struct kgsl_gpumem_sync_cache *param = data; struct kgsl_process_private *private = dev_priv->process_priv; struct kgsl_mem_entry *entry = NULL; + long ret; if (param->id != 0) { entry = kgsl_sharedmem_find_id(private, param->id); @@ -1970,9 +2670,7 @@ kgsl_ioctl_gpumem_sync_cache(struct kgsl_device_private *dev_priv, return -EINVAL; } } else if (param->gpuaddr != 0) { - spin_lock(&private->mem_lock); entry = kgsl_sharedmem_find(private, param->gpuaddr); - spin_unlock(&private->mem_lock); if (entry == NULL) { KGSL_MEM_INFO(dev_priv->device, "can't find gpuaddr %x\n", @@ -1983,7 +2681,100 @@ kgsl_ioctl_gpumem_sync_cache(struct kgsl_device_private *dev_priv, return -EINVAL; } - return _kgsl_gpumem_sync_cache(entry, param->op); + ret = _kgsl_gpumem_sync_cache(entry, param->op); + kgsl_mem_entry_put(entry); + return ret; +} + +static int mem_id_cmp(const void *_a, const void *_b) +{ + const unsigned int *a = _a, *b = _b; + int cmp = a - b; + return (cmp < 0) ? -1 : (cmp > 0); +} + +static long +kgsl_ioctl_gpumem_sync_cache_bulk(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int i; + struct kgsl_gpumem_sync_cache_bulk *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + unsigned int id, last_id = 0, *id_list = NULL, actual_count = 0; + struct kgsl_mem_entry **entries = NULL; + long ret = 0; + size_t op_size = 0; + bool full_flush = false; + + if (param->id_list == NULL || param->count == 0 + || param->count > (UINT_MAX/sizeof(unsigned int))) + return -EINVAL; + + id_list = kzalloc(param->count * sizeof(unsigned int), GFP_KERNEL); + if (id_list == NULL) + return -ENOMEM; + + entries = kzalloc(param->count * sizeof(*entries), GFP_KERNEL); + if (entries == NULL) { + ret = -ENOMEM; + goto end; + } + + if (copy_from_user(id_list, param->id_list, + param->count * sizeof(unsigned int))) { + ret = -EFAULT; + goto end; + } + /* sort the ids so we can weed out duplicates */ + sort(id_list, param->count, sizeof(int), mem_id_cmp, NULL); + + for (i = 0; i < param->count; i++) { + unsigned int cachemode; + struct kgsl_mem_entry *entry = NULL; + + id = id_list[i]; + /* skip 0 ids or duplicates */ + if (id == last_id) + continue; + + entry = kgsl_sharedmem_find_id(private, id); + if (entry == NULL) + continue; + + /* skip uncached memory */ + cachemode = kgsl_memdesc_get_cachemode(&entry->memdesc); + if (cachemode != KGSL_CACHEMODE_WRITETHROUGH && + cachemode != KGSL_CACHEMODE_WRITEBACK) { + kgsl_mem_entry_put(entry); + continue; + } + + op_size += entry->memdesc.size; + entries[actual_count++] = entry; + + /* If we exceed the breakeven point, flush the entire cache */ + if (op_size >= kgsl_driver.full_cache_threshold && + param->op == KGSL_GPUMEM_CACHE_FLUSH) { + full_flush = true; + break; + } + last_id = id; + } + if (full_flush) { + trace_kgsl_mem_sync_full_cache(actual_count, op_size, + param->op); + __cpuc_flush_kern_all(); + } + + for (i = 0; i < actual_count; i++) { + if (!full_flush) + _kgsl_gpumem_sync_cache(entries[i], param->op); + kgsl_mem_entry_put(entries[i]); + } +end: + kfree(entries); + kfree(id_list); + return ret; } /* Legacy cache function, does a flush (clean + invalidate) */ @@ -1995,10 +2786,9 @@ kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv, struct kgsl_sharedmem_free *param = data; struct kgsl_process_private *private = dev_priv->process_priv; struct kgsl_mem_entry *entry = NULL; + long ret; - spin_lock(&private->mem_lock); entry = kgsl_sharedmem_find(private, param->gpuaddr); - spin_unlock(&private->mem_lock); if (entry == NULL) { KGSL_MEM_INFO(dev_priv->device, "can't find gpuaddr %x\n", @@ -2006,7 +2796,9 @@ kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv, return -EINVAL; } - return _kgsl_gpumem_sync_cache(entry, KGSL_GPUMEM_CACHE_FLUSH); + ret = _kgsl_gpumem_sync_cache(entry, KGSL_GPUMEM_CACHE_FLUSH); + kgsl_mem_entry_put(entry); + return ret; } /* @@ -2035,6 +2827,9 @@ _gpumem_alloc(struct kgsl_device_private *dev_priv, if (entry == NULL) return -ENOMEM; + if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU) + entry->memdesc.priv |= KGSL_MEMDESC_GUARD_PAGE; + result = kgsl_allocate_user(&entry->memdesc, private->pagetable, size, flags); if (result != 0) @@ -2042,7 +2837,6 @@ _gpumem_alloc(struct kgsl_device_private *dev_priv, entry->memtype = KGSL_MEM_ENTRY_KERNEL; - kgsl_check_idle(dev_priv->device); *ret_entry = entry; return result; err: @@ -2065,8 +2859,7 @@ kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv, if (result) return result; - result = kgsl_mmu_map(private->pagetable, &entry->memdesc, - kgsl_memdesc_protflags(&entry->memdesc)); + result = kgsl_mmu_map(private->pagetable, &entry->memdesc); if (result) goto err; @@ -2104,8 +2897,7 @@ kgsl_ioctl_gpumem_alloc_id(struct kgsl_device_private *dev_priv, goto err; if (!kgsl_memdesc_use_cpu_map(&entry->memdesc)) { - result = kgsl_mmu_map(private->pagetable, &entry->memdesc, - kgsl_memdesc_protflags(&entry->memdesc)); + result = kgsl_mmu_map(private->pagetable, &entry->memdesc); if (result) goto err; } @@ -2147,9 +2939,7 @@ kgsl_ioctl_gpumem_get_info(struct kgsl_device_private *dev_priv, return -EINVAL; } } else if (param->gpuaddr != 0) { - spin_lock(&private->mem_lock); entry = kgsl_sharedmem_find(private, param->gpuaddr); - spin_unlock(&private->mem_lock); if (entry == NULL) { KGSL_MEM_INFO(dev_priv->device, "can't find gpuaddr %lx\n", @@ -2165,6 +2955,8 @@ kgsl_ioctl_gpumem_get_info(struct kgsl_device_private *dev_priv, param->size = entry->memdesc.size; param->mmapsize = kgsl_memdesc_mmapsize(&entry->memdesc); param->useraddr = entry->memdesc.useraddr; + + kgsl_mem_entry_put(entry); return result; } @@ -2176,14 +2968,14 @@ static long kgsl_ioctl_cff_syncmem(struct kgsl_device_private *dev_priv, struct kgsl_process_private *private = dev_priv->process_priv; struct kgsl_mem_entry *entry = NULL; - spin_lock(&private->mem_lock); entry = kgsl_sharedmem_find_region(private, param->gpuaddr, param->len); - if (entry) - kgsl_cffdump_syncmem(dev_priv, &entry->memdesc, param->gpuaddr, - param->len, true); - else - result = -EINVAL; - spin_unlock(&private->mem_lock); + if (!entry) + return -EINVAL; + + kgsl_cffdump_syncmem(dev_priv->device, &entry->memdesc, param->gpuaddr, + param->len, true); + + kgsl_mem_entry_put(entry); return result; } @@ -2344,8 +3136,9 @@ static const struct { kgsl_ioctl_device_waittimestamp_ctxtid, KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE), KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS, - kgsl_ioctl_rb_issueibcmds, - KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE), + kgsl_ioctl_rb_issueibcmds, 0), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SUBMIT_COMMANDS, + kgsl_ioctl_submit_commands, 0), KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP, kgsl_ioctl_cmdstream_readtimestamp, KGSL_IOCTL_LOCK), @@ -2380,7 +3173,7 @@ static const struct { kgsl_ioctl_cff_user_event, 0), KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT, kgsl_ioctl_timestamp_event, - KGSL_IOCTL_LOCK), + KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE), KGSL_IOCTL_FUNC(IOCTL_KGSL_SETPROPERTY, kgsl_ioctl_device_setproperty, KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE), @@ -2392,6 +3185,8 @@ static const struct { kgsl_ioctl_gpumem_get_info, 0), KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE, kgsl_ioctl_gpumem_sync_cache, 0), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK, + kgsl_ioctl_gpumem_sync_cache_bulk, 0), }; static long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) @@ -2472,14 +3267,19 @@ static long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) if (lock) { mutex_lock(&dev_priv->device->mutex); - if (use_hw) - kgsl_check_suspended(dev_priv->device); + if (use_hw) { + ret = kgsl_active_count_get(dev_priv->device); + if (ret < 0) + goto unlock; + } } ret = func(dev_priv, cmd, uptr); +unlock: if (lock) { - kgsl_check_idle_locked(dev_priv->device); + if (use_hw) + kgsl_active_count_put(dev_priv->device); mutex_unlock(&dev_priv->device->mutex); } @@ -2567,21 +3367,17 @@ get_mmap_entry(struct kgsl_process_private *private, struct kgsl_mem_entry **out_entry, unsigned long pgoff, unsigned long len) { - int ret = -EINVAL; + int ret = 0; struct kgsl_mem_entry *entry; entry = kgsl_sharedmem_find_id(private, pgoff); if (entry == NULL) { - spin_lock(&private->mem_lock); entry = kgsl_sharedmem_find(private, pgoff << PAGE_SHIFT); - spin_unlock(&private->mem_lock); } if (!entry) return -EINVAL; - kgsl_mem_entry_get(entry); - if (!entry->memdesc.ops || !entry->memdesc.ops->vmflags || !entry->memdesc.ops->vmfault) { @@ -2606,12 +3402,18 @@ err_put: return ret; } +static inline bool +mmap_range_valid(unsigned long addr, unsigned long len) +{ + return (addr + len) > addr && (addr + len) < TASK_SIZE; +} + static unsigned long kgsl_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - unsigned long ret = 0; + unsigned long ret = 0, orig_len = len; unsigned long vma_offset = pgoff << PAGE_SHIFT; struct kgsl_device_private *dev_priv = file->private_data; struct kgsl_process_private *private = dev_priv->process_priv; @@ -2656,10 +3458,26 @@ kgsl_get_unmapped_area(struct file *file, unsigned long addr, if (align) len += 1 << align; + + if (!mmap_range_valid(addr, len)) + addr = 0; do { ret = get_unmapped_area(NULL, addr, len, pgoff, flags); - if (IS_ERR_VALUE(ret)) + if (IS_ERR_VALUE(ret)) { + /* + * If we are really fragmented, there may not be room + * for the alignment padding, so try again without it. + */ + if (!retry && (ret == (unsigned long)-ENOMEM) + && (align > PAGE_SHIFT)) { + align = PAGE_SHIFT; + addr = 0; + len = orig_len; + retry = 1; + continue; + } break; + } if (align) ret = ALIGN(ret, (1 << align)); @@ -2681,13 +3499,13 @@ kgsl_get_unmapped_area(struct file *file, unsigned long addr, * the whole address space at least once by wrapping * back around once. */ - if (!retry && (addr + len >= TASK_SIZE)) { + if (!retry && !mmap_range_valid(addr, len)) { addr = 0; retry = 1; } else { ret = -EBUSY; } - } while (addr + len < TASK_SIZE); + } while (mmap_range_valid(addr, len)); if (IS_ERR_VALUE(ret)) KGSL_MEM_INFO(device, @@ -2712,6 +3530,10 @@ static int kgsl_mmap(struct file *file, struct vm_area_struct *vma) if (vma_offset == device->memstore.gpuaddr) return kgsl_mmap_memstore(device, vma); + /* + * The reference count on the entry that we get from + * get_mmap_entry() will be held until kgsl_gpumem_vm_close(). + */ ret = get_mmap_entry(private, &entry, vma->vm_pgoff, vma->vm_end - vma->vm_start); if (ret) @@ -2720,8 +3542,7 @@ static int kgsl_mmap(struct file *file, struct vm_area_struct *vma) if (kgsl_memdesc_use_cpu_map(&entry->memdesc)) { entry->memdesc.gpuaddr = vma->vm_start; - ret = kgsl_mmu_map(private->pagetable, &entry->memdesc, - kgsl_memdesc_protflags(&entry->memdesc)); + ret = kgsl_mmu_map(private->pagetable, &entry->memdesc); if (ret) { kgsl_mem_entry_put(entry); return ret; @@ -2762,10 +3583,6 @@ static int kgsl_mmap(struct file *file, struct vm_area_struct *vma) int sglen = entry->memdesc.sglen; unsigned long addr = vma->vm_start; - /* don't map in the guard page, it should always fault */ - if (kgsl_memdesc_has_guard_page(&entry->memdesc)) - sglen--; - for_each_sg(entry->memdesc.sg, s, sglen, i) { int j; for (j = 0; j < (sg_dma_len(s) >> PAGE_SHIFT); j++) { @@ -2782,7 +3599,6 @@ static int kgsl_mmap(struct file *file, struct vm_area_struct *vma) entry->memdesc.useraddr = vma->vm_start; trace_kgsl_mem_mmap(entry); - return 0; } @@ -2809,6 +3625,11 @@ struct kgsl_driver kgsl_driver = { .devlock = __MUTEX_INITIALIZER(kgsl_driver.devlock), .memfree_hist_mutex = __MUTEX_INITIALIZER(kgsl_driver.memfree_hist_mutex), + /* + * Full cache flushes are faster than line by line on at least + * 8064 and 8974 once the region to be flushed is > 16mb. + */ + .full_cache_threshold = SZ_16M, }; EXPORT_SYMBOL(kgsl_driver); @@ -2949,11 +3770,12 @@ int kgsl_device_platform_probe(struct kgsl_device *device) device->id, device->reg_phys, device->reg_len, device->reg_virt); + rwlock_init(&device->context_lock); + result = kgsl_drm_init(pdev); if (result) goto error_pwrctrl_close; - kgsl_cffdump_open(device->id); setup_timer(&device->idle_timer, kgsl_timer, (unsigned long) device); status = kgsl_create_device_workqueue(device); @@ -3011,11 +3833,7 @@ int kgsl_postmortem_dump(struct kgsl_device *device, int manual) /* For a manual dump, make sure that the system is idle */ if (manual) { - if (device->active_cnt != 0) { - mutex_unlock(&device->mutex); - wait_for_completion(&device->suspend_gate); - mutex_lock(&device->mutex); - } + kgsl_active_count_wait(device); if (device->state == KGSL_STATE_ACTIVE) kgsl_idle(device); @@ -3035,9 +3853,6 @@ int kgsl_postmortem_dump(struct kgsl_device *device, int manual) /* Disable the idle timer so we don't get interrupted */ del_timer_sync(&device->idle_timer); - mutex_unlock(&device->mutex); - flush_workqueue(device->work_queue); - mutex_lock(&device->mutex); /* Turn off napping to make sure we have the clocks full attention through the following process */ @@ -3077,7 +3892,6 @@ void kgsl_device_platform_remove(struct kgsl_device *device) { kgsl_device_snapshot_close(device); - kgsl_cffdump_close(device->id); kgsl_pwrctrl_uninit_sysfs(device); pm_qos_remove_request(&device->pm_qos_req_dma); diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h index 30ac1a9602f6ae331f3265e59a5c978a4c67a17d..458400d412fdcdb036e1680dc10bea883b9ed47e 100644 --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -130,12 +130,14 @@ struct kgsl_driver { unsigned int mapped_max; unsigned int histogram[16]; } stats; + unsigned int full_cache_threshold; }; extern struct kgsl_driver kgsl_driver; struct kgsl_pagetable; struct kgsl_memdesc; +struct kgsl_cmdbatch; struct kgsl_memdesc_ops { int (*vmflags)(struct kgsl_memdesc *); @@ -149,6 +151,8 @@ struct kgsl_memdesc_ops { #define KGSL_MEMDESC_GUARD_PAGE BIT(0) /* Set if the memdesc is mapped into all pagetables */ #define KGSL_MEMDESC_GLOBAL BIT(1) +/* The memdesc is frozen during a snapshot */ +#define KGSL_MEMDESC_FROZEN BIT(2) /* shared memory allocation */ struct kgsl_memdesc { @@ -175,15 +179,10 @@ struct kgsl_memdesc { #define KGSL_MEM_ENTRY_ION 4 #define KGSL_MEM_ENTRY_MAX 5 -/* List of flags */ - -#define KGSL_MEM_ENTRY_FROZEN (1 << 0) - struct kgsl_mem_entry { struct kref refcount; struct kgsl_memdesc memdesc; int memtype; - int flags; void *priv_data; struct rb_node node; unsigned int id; @@ -229,6 +228,14 @@ int kgsl_resume_driver(struct platform_device *pdev); void kgsl_early_suspend_driver(struct early_suspend *h); void kgsl_late_resume_driver(struct early_suspend *h); +void kgsl_trace_regwrite(struct kgsl_device *device, unsigned int offset, + unsigned int value); + +void kgsl_trace_issueibcmds(struct kgsl_device *device, int id, + struct kgsl_cmdbatch *cmdbatch, + unsigned int timestamp, unsigned int flags, + int result, unsigned int type); + #ifdef CONFIG_MSM_KGSL_DRM extern int kgsl_drm_init(struct platform_device *dev); extern void kgsl_drm_exit(void); @@ -246,6 +253,10 @@ static inline void kgsl_drm_exit(void) static inline int kgsl_gpuaddr_in_memdesc(const struct kgsl_memdesc *memdesc, unsigned int gpuaddr, unsigned int size) { + /* set a minimum size to search for */ + if (!size) + size = 1; + /* don't overflow */ if ((gpuaddr + size) < gpuaddr) return 0; diff --git a/drivers/gpu/msm/kgsl_cffdump.c b/drivers/gpu/msm/kgsl_cffdump.c index e06c94d6b9fbf083e9084bccdb5f851e70812313..44f6e52fa2361e96c553e16c138883dbca231e4e 100644 --- a/drivers/gpu/msm/kgsl_cffdump.c +++ b/drivers/gpu/msm/kgsl_cffdump.c @@ -28,6 +28,7 @@ #include "kgsl_log.h" #include "kgsl_sharedmem.h" #include "adreno_pm4types.h" +#include "adreno.h" static struct rchan *chan; static struct dentry *dir; @@ -334,7 +335,7 @@ void kgsl_cffdump_init() return; } - kgsl_cff_dump_enable = 1; + kgsl_cff_dump_enable = 0; spin_lock_init(&cffdump_lock); @@ -356,60 +357,71 @@ void kgsl_cffdump_destroy() debugfs_remove(dir); } -void kgsl_cffdump_open(enum kgsl_deviceid device_id) +void kgsl_cffdump_open(struct kgsl_device *device) { - kgsl_cffdump_memory_base(device_id, KGSL_PAGETABLE_BASE, - kgsl_mmu_get_ptsize(), SZ_256K); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + if (!kgsl_cff_dump_enable) + return; + + if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) { + kgsl_cffdump_memory_base(device->id, + kgsl_mmu_get_base_addr(&device->mmu), + kgsl_mmu_get_ptsize(&device->mmu) + + KGSL_IOMMU_GLOBAL_MEM_SIZE, adreno_dev->gmem_size); + } else { + kgsl_cffdump_memory_base(device->id, + kgsl_mmu_get_base_addr(&device->mmu), + kgsl_mmu_get_ptsize(&device->mmu), + adreno_dev->gmem_size); + } } void kgsl_cffdump_memory_base(enum kgsl_deviceid device_id, unsigned int base, unsigned int range, unsigned gmemsize) { + if (!kgsl_cff_dump_enable) + return; cffdump_printline(device_id, CFF_OP_MEMORY_BASE, base, range, gmemsize, 0, 0); } void kgsl_cffdump_hang(enum kgsl_deviceid device_id) { + if (!kgsl_cff_dump_enable) + return; cffdump_printline(device_id, CFF_OP_HANG, 0, 0, 0, 0, 0); } void kgsl_cffdump_close(enum kgsl_deviceid device_id) { + if (!kgsl_cff_dump_enable) + return; cffdump_printline(device_id, CFF_OP_EOF, 0, 0, 0, 0, 0); } + void kgsl_cffdump_user_event(unsigned int cff_opcode, unsigned int op1, unsigned int op2, unsigned int op3, unsigned int op4, unsigned int op5) { + if (!kgsl_cff_dump_enable) + return; cffdump_printline(-1, cff_opcode, op1, op2, op3, op4, op5); } -void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv, - const struct kgsl_memdesc *memdesc, uint gpuaddr, uint sizebytes, - bool clean_cache) +void kgsl_cffdump_syncmem(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, uint gpuaddr, + uint sizebytes, bool clean_cache) { const void *src; if (!kgsl_cff_dump_enable) return; + BUG_ON(memdesc == NULL); + total_syncmem += sizebytes; - if (memdesc == NULL) { - struct kgsl_mem_entry *entry; - spin_lock(&dev_priv->process_priv->mem_lock); - entry = kgsl_sharedmem_find_region(dev_priv->process_priv, - gpuaddr, sizebytes); - spin_unlock(&dev_priv->process_priv->mem_lock); - if (entry == NULL) { - KGSL_CORE_ERR("did not find mapping " - "for gpuaddr: 0x%08x\n", gpuaddr); - return; - } - memdesc = &entry->memdesc; - } src = (uint *)kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr); if (memdesc->hostptr == NULL) { KGSL_CORE_ERR("no kernel mapping for " @@ -522,7 +534,7 @@ static int subbuf_start_handler(struct rchan_buf *buf, } static struct dentry *create_buf_file_handler(const char *filename, - struct dentry *parent, int mode, struct rchan_buf *buf, + struct dentry *parent, unsigned short mode, struct rchan_buf *buf, int *is_global) { return debugfs_create_file(filename, mode, parent, buf, diff --git a/drivers/gpu/msm/kgsl_cffdump.h b/drivers/gpu/msm/kgsl_cffdump.h index 2733cc3fab8056bf9643d33625ef4364cd552e62..83695f81c66f34e73cdc0e8da387382e623611fd 100644 --- a/drivers/gpu/msm/kgsl_cffdump.h +++ b/drivers/gpu/msm/kgsl_cffdump.h @@ -22,10 +22,10 @@ void kgsl_cffdump_init(void); void kgsl_cffdump_destroy(void); -void kgsl_cffdump_open(enum kgsl_deviceid device_id); -void kgsl_cffdump_close(enum kgsl_deviceid device_id); -void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv, - const struct kgsl_memdesc *memdesc, uint physaddr, uint sizebytes, +void kgsl_cffdump_open(struct kgsl_device *device); +void kgsl_cffdump_close(struct kgsl_device *device); +void kgsl_cffdump_syncmem(struct kgsl_device *, + struct kgsl_memdesc *memdesc, uint physaddr, uint sizebytes, bool clean_cache); void kgsl_cffdump_setmem(uint addr, uint value, uint sizebytes); void kgsl_cffdump_regwrite(enum kgsl_deviceid device_id, uint addr, @@ -49,7 +49,7 @@ void kgsl_cffdump_hang(enum kgsl_deviceid device_id); #define kgsl_cffdump_init() (void)0 #define kgsl_cffdump_destroy() (void)0 -#define kgsl_cffdump_open(device_id) (void)0 +#define kgsl_cffdump_open(device) (void)0 #define kgsl_cffdump_close(device_id) (void)0 #define kgsl_cffdump_syncmem(dev_priv, memdesc, addr, sizebytes, clean_cache) \ (void) 0 diff --git a/drivers/gpu/msm/kgsl_debugfs.c b/drivers/gpu/msm/kgsl_debugfs.c index a2490ec80fdbf575c3dbd897f586fc6ec2b5b9c0..09c9dfe6256b8f0670980424ed03ef8eedccf51f 100644 --- a/drivers/gpu/msm/kgsl_debugfs.c +++ b/drivers/gpu/msm/kgsl_debugfs.c @@ -123,7 +123,6 @@ KGSL_DEBUGFS_LOG(cmd_log); KGSL_DEBUGFS_LOG(ctxt_log); KGSL_DEBUGFS_LOG(mem_log); KGSL_DEBUGFS_LOG(pwr_log); -KGSL_DEBUGFS_LOG(ft_log); static int memfree_hist_print(struct seq_file *s, void *unused) { @@ -185,7 +184,6 @@ void kgsl_device_debugfs_init(struct kgsl_device *device) device->drv_log = KGSL_LOG_LEVEL_DEFAULT; device->mem_log = KGSL_LOG_LEVEL_DEFAULT; device->pwr_log = KGSL_LOG_LEVEL_DEFAULT; - device->ft_log = KGSL_LOG_LEVEL_DEFAULT; debugfs_create_file("log_level_cmd", 0644, device->d_debugfs, device, &cmd_log_fops); diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h index c48644b7d9e8c5d2f008a23f7801b0d801bc2732..fb3fade51e4292ebd8134c07812727a5351ebe96 100644 --- a/drivers/gpu/msm/kgsl_device.h +++ b/drivers/gpu/msm/kgsl_device.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved. +/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -13,9 +13,11 @@ #ifndef __KGSL_DEVICE_H #define __KGSL_DEVICE_H +#include <linux/slab.h> #include <linux/idr.h> #include <linux/pm_qos.h> #include <linux/earlysuspend.h> +#include <linux/sched.h> #include "kgsl.h" #include "kgsl_mmu.h" @@ -62,12 +64,21 @@ #define KGSL_EVENT_TIMESTAMP_RETIRED 0 #define KGSL_EVENT_CANCELLED 1 +/* + * "list" of event types for ftrace symbolic magic + */ + +#define KGSL_EVENT_TYPES \ + { KGSL_EVENT_TIMESTAMP_RETIRED, "retired" }, \ + { KGSL_EVENT_CANCELLED, "cancelled" } + struct kgsl_device; struct platform_device; struct kgsl_device_private; struct kgsl_context; struct kgsl_power_stats; struct kgsl_event; +struct kgsl_cmdbatch; struct kgsl_functable { /* Mandatory functions - these functions must be implemented @@ -79,9 +90,10 @@ struct kgsl_functable { void (*regwrite) (struct kgsl_device *device, unsigned int offsetwords, unsigned int value); int (*idle) (struct kgsl_device *device); - unsigned int (*isidle) (struct kgsl_device *device); + bool (*isidle) (struct kgsl_device *device); int (*suspend_context) (struct kgsl_device *device); - int (*start) (struct kgsl_device *device, unsigned int init_ram); + int (*init) (struct kgsl_device *device); + int (*start) (struct kgsl_device *device); int (*stop) (struct kgsl_device *device); int (*getproperty) (struct kgsl_device *device, enum kgsl_property_type type, void *value, @@ -92,9 +104,8 @@ struct kgsl_functable { unsigned int (*readtimestamp) (struct kgsl_device *device, struct kgsl_context *context, enum kgsl_timestamp_type type); int (*issueibcmds) (struct kgsl_device_private *dev_priv, - struct kgsl_context *context, struct kgsl_ibdesc *ibdesc, - unsigned int sizedwords, uint32_t *timestamp, - unsigned int flags); + struct kgsl_context *context, struct kgsl_cmdbatch *cmdbatch, + uint32_t *timestamps); int (*setup_pt)(struct kgsl_device *device, struct kgsl_pagetable *pagetable); void (*cleanup_pt)(struct kgsl_device *device, @@ -106,16 +117,16 @@ struct kgsl_functable { void * (*snapshot)(struct kgsl_device *device, void *snapshot, int *remain, int hang); irqreturn_t (*irq_handler)(struct kgsl_device *device); + int (*drain)(struct kgsl_device *device); /* Optional functions - these functions are not mandatory. The driver will check that the function pointer is not NULL before calling the hook */ - void (*setstate) (struct kgsl_device *device, unsigned int context_id, + int (*setstate) (struct kgsl_device *device, unsigned int context_id, uint32_t flags); - int (*drawctxt_create) (struct kgsl_device *device, - struct kgsl_pagetable *pagetable, struct kgsl_context *context, - uint32_t *flags); - void (*drawctxt_destroy) (struct kgsl_device *device, - struct kgsl_context *context); + struct kgsl_context *(*drawctxt_create) (struct kgsl_device_private *, + uint32_t *flags); + int (*drawctxt_detach) (struct kgsl_context *context); + void (*drawctxt_destroy) (struct kgsl_context *context); long (*ioctl) (struct kgsl_device_private *dev_priv, unsigned int cmd, void *data); int (*setproperty) (struct kgsl_device *device, @@ -124,6 +135,8 @@ struct kgsl_functable { int (*postmortem_dump) (struct kgsl_device *device, int manual); int (*next_event)(struct kgsl_device *device, struct kgsl_event *event); + void (*drawctxt_sched)(struct kgsl_device *device, + struct kgsl_context *context); }; /* MH register values */ @@ -147,6 +160,46 @@ struct kgsl_event { unsigned int created; }; +/** + * struct kgsl_cmdbatch - KGSl command descriptor + * @device: KGSL GPU device that the command was created for + * @context: KGSL context that created the command + * @timestamp: Timestamp assigned to the command + * @flags: flags + * @priv: Internal flags + * @fault_policy: Internal policy describing how to handle this command in case + * of a fault + * @ibcount: Number of IBs in the command list + * @ibdesc: Pointer to the list of IBs + * @expires: Point in time when the cmdbatch is considered to be hung + * @invalid: non-zero if the dispatcher determines the command and the owning + * context should be invalidated + * @refcount: kref structure to maintain the reference count + * @synclist: List of context/timestamp tuples to wait for before issuing + * + * This struture defines an atomic batch of command buffers issued from + * userspace. + */ +struct kgsl_cmdbatch { + struct kgsl_device *device; + struct kgsl_context *context; + spinlock_t lock; + uint32_t timestamp; + uint32_t flags; + uint32_t priv; + uint32_t fault_policy; + uint32_t ibcount; + struct kgsl_ibdesc *ibdesc; + unsigned long expires; + int invalid; + struct kref refcount; + struct list_head synclist; +}; + +/* Internal cmdbatch flags */ + +#define CMDBATCH_FLAG_SKIP BIT(0) +#define CMDBATCH_FLAG_FORCE_PREAMBLE BIT(1) struct kgsl_device { struct device *dev; @@ -174,16 +227,16 @@ struct kgsl_device { uint32_t state; uint32_t requested_state; - unsigned int active_cnt; + atomic_t active_cnt; struct completion suspend_gate; wait_queue_head_t wait_queue; struct workqueue_struct *work_queue; struct device *parentdev; - struct completion ft_gate; struct dentry *d_debugfs; struct idr context_idr; struct early_suspend display_off; + rwlock_t context_lock; void *snapshot; /* Pointer to the snapshot memory region */ int snapshot_maxsize; /* Max size of the snapshot region */ @@ -206,7 +259,6 @@ struct kgsl_device { int drv_log; int mem_log; int pwr_log; - int ft_log; int pm_dump_enable; struct kgsl_pwrscale pwrscale; struct kobject pwrscale_kobj; @@ -214,6 +266,7 @@ struct kgsl_device { struct work_struct ts_expired_ws; struct list_head events; struct list_head events_pending_list; + unsigned int events_last_timestamp; s64 on_time; /* Postmortem Control switches */ @@ -229,7 +282,6 @@ void kgsl_check_fences(struct work_struct *work); #define KGSL_DEVICE_COMMON_INIT(_dev) \ .hwaccess_gate = COMPLETION_INITIALIZER((_dev).hwaccess_gate),\ .suspend_gate = COMPLETION_INITIALIZER((_dev).suspend_gate),\ - .ft_gate = COMPLETION_INITIALIZER((_dev).ft_gate),\ .idle_check_ws = __WORK_INITIALIZER((_dev).idle_check_ws,\ kgsl_idle_check),\ .ts_expired_ws = __WORK_INITIALIZER((_dev).ts_expired_ws,\ @@ -244,37 +296,56 @@ void kgsl_check_fences(struct work_struct *work); .ver_minor = DRIVER_VERSION_MINOR +/* bits for struct kgsl_context.priv */ +/* the context has been destroyed by userspace and is no longer using the gpu */ +#define KGSL_CONTEXT_DETACHED 0 +/* the context has caused a pagefault */ +#define KGSL_CONTEXT_PAGEFAULT 1 + /** * struct kgsl_context - Master structure for a KGSL context object - * @refcount - kref object for reference counting the context - * @id - integer identifier for the context - * @dev_priv - pointer to the owning device instance - * @devctxt - pointer to the device specific context information - * @reset_status - status indication whether a gpu reset occured and whether + * @refcount: kref object for reference counting the context + * @id: integer identifier for the context + * @priv: in-kernel context flags, use KGSL_CONTEXT_* values + * @dev_priv: pointer to the owning device instance + * @reset_status: status indication whether a gpu reset occured and whether * this context was responsible for causing it - * @wait_on_invalid_ts - flag indicating if this context has tried to wait on a + * @wait_on_invalid_ts: flag indicating if this context has tried to wait on a * bad timestamp - * @timeline - sync timeline used to create fences that can be signaled when a + * @timeline: sync timeline used to create fences that can be signaled when a * sync_pt timestamp expires - * @events - list head of pending events for this context - * @events_list - list node for the list of all contexts that have pending events + * @events: list head of pending events for this context + * @events_list: list node for the list of all contexts that have pending events + * @pid: process that owns this context. + * @pagefault: flag set if this context caused a pagefault. + * @pagefault_ts: global timestamp of the pagefault, if KGSL_CONTEXT_PAGEFAULT + * is set. */ struct kgsl_context { struct kref refcount; uint32_t id; - struct kgsl_device_private *dev_priv; - void *devctxt; + pid_t pid; + unsigned long priv; + struct kgsl_device *device; + struct kgsl_pagetable *pagetable; unsigned int reset_status; bool wait_on_invalid_ts; struct sync_timeline *timeline; struct list_head events; struct list_head events_list; + unsigned int pagefault_ts; }; struct kgsl_process_private { unsigned int refcnt; pid_t pid; spinlock_t mem_lock; + + /* General refcount for process private struct obj */ + struct kref refcount; + /* Mutex to synchronize access to each process_private struct obj */ + struct mutex process_private_mutex; + struct rb_root mem_rb; struct idr mem_idr; struct kgsl_pagetable *pagetable; @@ -303,6 +374,9 @@ struct kgsl_device *kgsl_get_device(int dev_idx); int kgsl_add_event(struct kgsl_device *device, u32 id, u32 ts, kgsl_event_func func, void *priv, void *owner); +void kgsl_cancel_event(struct kgsl_device *device, struct kgsl_context *context, + unsigned int timestamp, kgsl_event_func func, void *priv); + static inline void kgsl_process_add_stats(struct kgsl_process_private *priv, unsigned int type, size_t size) { @@ -390,8 +464,6 @@ static inline int kgsl_create_device_workqueue(struct kgsl_device *device) return 0; } - - int kgsl_check_timestamp(struct kgsl_device *device, struct kgsl_context *context, unsigned int timestamp); @@ -416,10 +488,15 @@ kgsl_device_get_drvdata(struct kgsl_device *dev) void kgsl_context_destroy(struct kref *kref); +int kgsl_context_init(struct kgsl_device_private *, struct kgsl_context + *context); + /** - * kgsl_context_put - Release context reference count - * @context + * kgsl_context_put() - Release context reference count + * @context: Pointer to the KGSL context to be released * + * Reduce the reference count on a KGSL context and destroy it if it is no + * longer needed */ static inline void kgsl_context_put(struct kgsl_context *context) @@ -427,10 +504,26 @@ kgsl_context_put(struct kgsl_context *context) if (context) kref_put(&context->refcount, kgsl_context_destroy); } + /** - * kgsl_context_get - get a pointer to a KGSL context - * @devicex - Pointer to the KGSL device that owns the context - * @id - Context ID to return + * kgsl_context_detached() - check if a context is detached + * @context: the context + * + * Check if a context has been destroyed by userspace and is only waiting + * for reference counts to go away. This check is used to weed out + * contexts that shouldn't use the gpu, so NULL is considered detached. + */ +static inline bool kgsl_context_detached(struct kgsl_context *context) +{ + return (context == NULL || test_bit(KGSL_CONTEXT_DETACHED, + &context->priv)); +} + + +/** + * kgsl_context_get() - get a pointer to a KGSL context + * @device: Pointer to the KGSL device that owns the context + * @id: Context ID * * Find the context associated with the given ID number, increase the reference * count on it and return it. The caller must make sure that this call is @@ -438,26 +531,45 @@ kgsl_context_put(struct kgsl_context *context) * doesn't validate the ownership of the context with the calling process - use * kgsl_context_get_owner for that */ - static inline struct kgsl_context *kgsl_context_get(struct kgsl_device *device, uint32_t id) { struct kgsl_context *context = NULL; - rcu_read_lock(); + read_lock(&device->context_lock); + context = idr_find(&device->context_idr, id); - if (context) + /* Don't return a context that has been detached */ + if (kgsl_context_detached(context)) + context = NULL; + else kref_get(&context->refcount); - rcu_read_unlock(); + read_unlock(&device->context_lock); + return context; } /** - * kgsl_context_get_owner - get a pointer to a KGSL context - * @dev_priv - Pointer to the owner of the requesting process - * @id - Context ID to return +* _kgsl_context_get() - lightweight function to just increment the ref count +* @context: Pointer to the KGSL context +* +* Get a reference to the specified KGSL context structure. This is a +* lightweight way to just increase the refcount on a known context rather than +* walking through kgsl_context_get and searching the iterator +*/ +static inline void _kgsl_context_get(struct kgsl_context *context) +{ + if (context) + kref_get(&context->refcount); +} + +/** + * kgsl_context_get_owner() - get a pointer to a KGSL context in a specific + * process + * @dev_priv: Pointer to the process struct + * @id: Context ID to return * * Find the context associated with the given ID number, increase the reference * count on it and return it. The caller must make sure that this call is @@ -472,8 +584,8 @@ static inline struct kgsl_context *kgsl_context_get_owner( context = kgsl_context_get(dev_priv->device, id); - /* Verify that the context belongs to the dev_priv instance */ - if (context && context->dev_priv != dev_priv) { + /* Verify that the context belongs to current calling process. */ + if (context != NULL && context->pid != dev_priv->process_priv->pid) { kgsl_context_put(context); return NULL; } @@ -482,24 +594,12 @@ static inline struct kgsl_context *kgsl_context_get_owner( } /** - * kgsl_active_count_put - Decrease the device active count - * @device: Pointer to a KGSL device + * kgsl_context_cancel_events() - Cancel all events for a context + * @device: Pointer to the KGSL device structure for the GPU + * @context: Pointer to the KGSL context * - * Decrease the active count for the KGSL device and trigger the suspend_gate - * completion if it hits zero + * Signal all pending events on the context with KGSL_EVENT_CANCELLED */ -static inline void -kgsl_active_count_put(struct kgsl_device *device) -{ - if (device->active_cnt == 1) - INIT_COMPLETION(device->suspend_gate); - - device->active_cnt--; - - if (device->active_cnt == 0) - complete(&device->suspend_gate); -} - static inline void kgsl_context_cancel_events(struct kgsl_device *device, struct kgsl_context *context) { @@ -507,9 +607,9 @@ static inline void kgsl_context_cancel_events(struct kgsl_device *device, } /** - * kgsl_context_cancel_events_timestamp - cancel events for a given timestamp + * kgsl_context_cancel_events_timestamp() - cancel events for a given timestamp * @device: Pointer to the KGSL device that owns the context - * @cotnext: Pointer to the context that owns the event or NULL for global + * @context: Pointer to the context that owns the event or NULL for global * @timestamp: Timestamp to cancel events for * * Cancel events pending for a specific timestamp @@ -519,4 +619,30 @@ static inline void kgsl_cancel_events_timestamp(struct kgsl_device *device, { kgsl_signal_event(device, context, timestamp, KGSL_EVENT_CANCELLED); } + +void kgsl_cmdbatch_destroy(struct kgsl_cmdbatch *cmdbatch); + +void kgsl_cmdbatch_destroy_object(struct kref *kref); + +/** + * kgsl_cmdbatch_put() - Decrement the refcount for a command batch object + * @cmdbatch: Pointer to the command batch object + */ +static inline void kgsl_cmdbatch_put(struct kgsl_cmdbatch *cmdbatch) +{ + kref_put(&cmdbatch->refcount, kgsl_cmdbatch_destroy_object); +} + +/** + * kgsl_cmdbatch_sync_pending() - return true if the cmdbatch is waiting + * @cmdbatch: Pointer to the command batch object to check + * + * Return non-zero if the specified command batch is still waiting for sync + * point dependencies to be satisfied + */ +static inline int kgsl_cmdbatch_sync_pending(struct kgsl_cmdbatch *cmdbatch) +{ + return list_empty(&cmdbatch->synclist) ? 0 : 1; +} + #endif /* __KGSL_DEVICE_H */ diff --git a/drivers/gpu/msm/kgsl_events.c b/drivers/gpu/msm/kgsl_events.c index dc49faa07e5e089b4388cc6d1597e91b871300a2..e8c6c5d8a8e64305bbb459bc50e1b46d1aa8c05a 100644 --- a/drivers/gpu/msm/kgsl_events.c +++ b/drivers/gpu/msm/kgsl_events.c @@ -90,12 +90,12 @@ static struct kgsl_event *_find_event(struct kgsl_device *device, } /** - * _signal_event - send a signal to a specific event in the list - * @device - KGSL device - * @head - Pointer to the event list to process - * @timestamp - timestamp of the event to signal - * @cur - timestamp value to send to the callback - * @type - Signal ID to send to the callback + * _signal_event() - send a signal to a specific event in the list + * @device: Pointer to the KGSL device struct + * @head: Pointer to the event list to process + * @timestamp: timestamp of the event to signal + * @cur: timestamp value to send to the callback + * @type: Signal ID to send to the callback * * Send the specified signal to the events in the list with the specified * timestamp. The timestamp 'cur' is sent to the callback so it knows @@ -114,12 +114,12 @@ static void _signal_event(struct kgsl_device *device, } /** - * _signal_events - send a signal to all the events in a list - * @device - KGSL device - * @head - Pointer to the event list to process - * @timestamp - Timestamp to pass to the events (this should be the current + * _signal_events() - send a signal to all the events in a list + * @device: Pointer to the KGSL device struct + * @head: Pointer to the event list to process + * @timestamp: Timestamp to pass to the events (this should be the current * timestamp when the signal is sent) - * @type - Signal ID to send to the callback + * @type: Signal ID to send to the callback * * Send the specified signal to all the events in the list and destroy them */ @@ -134,6 +134,16 @@ static void _signal_events(struct kgsl_device *device, } +/** + * kgsl_signal_event() - send a signal to a specific event in the context + * @device: Pointer to the KGSL device struct + * @context: Pointer to the KGSL context + * @timestamp: Timestamp of the event to signal + * @type: Signal ID to send to the callback + * + * Send the specified signal to all the events in the context with the given + * timestamp + */ void kgsl_signal_event(struct kgsl_device *device, struct kgsl_context *context, unsigned int timestamp, unsigned int type) @@ -151,6 +161,14 @@ void kgsl_signal_event(struct kgsl_device *device, } EXPORT_SYMBOL(kgsl_signal_event); +/** + * kgsl_signal_events() - send a signal to all events in the context + * @device: Pointer to the KGSL device struct + * @context: Pointer to the KGSL context + * @type: Signal ID to send to the callback function + * + * Send the specified signal to all the events in the context + */ void kgsl_signal_events(struct kgsl_device *device, struct kgsl_context *context, unsigned int type) { @@ -192,6 +210,7 @@ EXPORT_SYMBOL(kgsl_signal_events); int kgsl_add_event(struct kgsl_device *device, u32 id, u32 ts, kgsl_event_func func, void *priv, void *owner) { + int ret; struct kgsl_event *event; unsigned int cur_ts; struct kgsl_context *context = NULL; @@ -229,6 +248,17 @@ int kgsl_add_event(struct kgsl_device *device, u32 id, u32 ts, return -ENOMEM; } + /* + * Increase the active count on the device to avoid going into power + * saving modes while events are pending + */ + ret = kgsl_active_count_get_light(device); + if (ret < 0) { + kgsl_context_put(context); + kfree(event); + return ret; + } + event->context = context; event->timestamp = ts; event->priv = priv; @@ -255,23 +285,17 @@ int kgsl_add_event(struct kgsl_device *device, u32 id, u32 ts, } else _add_event_to_list(&device->events, event); - /* - * Increase the active count on the device to avoid going into power - * saving modes while events are pending - */ - - device->active_cnt++; - queue_work(device->work_queue, &device->ts_expired_ws); return 0; } EXPORT_SYMBOL(kgsl_add_event); /** - * kgsl_cancel_events - Cancel all generic events for a process - * @device - KGSL device for the events to cancel - * @owner - driver instance that owns the events to cancel + * kgsl_cancel_events() - Cancel all global events owned by a process + * @device: Pointer to the KGSL device struct + * @owner: driver instance that owns the events to cancel * + * Cancel all global events that match the owner pointer */ void kgsl_cancel_events(struct kgsl_device *device, void *owner) { @@ -291,6 +315,19 @@ void kgsl_cancel_events(struct kgsl_device *device, void *owner) } EXPORT_SYMBOL(kgsl_cancel_events); +/** + * kgsl_cancel_event() - send a cancel signal to a specific event + * @device: Pointer to the KGSL device struct + * @context: Pointer to the KGSL context + * @timestamp: Timestamp of the event to cancel + * @func: Callback function of the event - this is used to match the actual + * event + * @priv: Private data for the callback function - this is used to match to the + * actual event + * + * Send the a cancel signal to a specific event that matches all the parameters + */ + void kgsl_cancel_event(struct kgsl_device *device, struct kgsl_context *context, unsigned int timestamp, kgsl_event_func func, void *priv) @@ -363,10 +400,19 @@ void kgsl_process_events(struct work_struct *work) struct kgsl_context *context, *tmp; uint32_t timestamp; - mutex_lock(&device->mutex); + /* + * Bail unless the global timestamp has advanced. We can safely do this + * outside of the mutex for speed + */ - /* Process expired global events */ timestamp = kgsl_readtimestamp(device, NULL, KGSL_TIMESTAMP_RETIRED); + if (timestamp == device->events_last_timestamp) + return; + + mutex_lock(&device->mutex); + + device->events_last_timestamp = timestamp; + _retire_events(device, &device->events, timestamp); _mark_next_event(device, &device->events); @@ -374,6 +420,11 @@ void kgsl_process_events(struct work_struct *work) list_for_each_entry_safe(context, tmp, &device->events_pending_list, events_list) { + /* + * Increment the refcount to make sure that the list_del_init + * is called with a valid context's list + */ + _kgsl_context_get(context); /* * If kgsl_timestamp_expired_context returns 0 then it no longer * has any pending events and can be removed from the list @@ -381,6 +432,7 @@ void kgsl_process_events(struct work_struct *work) if (kgsl_process_context_events(device, context) == 0) list_del_init(&context->events_list); + kgsl_context_put(context); } mutex_unlock(&device->mutex); diff --git a/drivers/gpu/msm/kgsl_gpummu.c b/drivers/gpu/msm/kgsl_gpummu.c index 8f285053fb8943811e905a14bcfe48425ae9b33c..e52bb5dfd374795e09274c46e7f8ee6ae7e43e04 100644 --- a/drivers/gpu/msm/kgsl_gpummu.c +++ b/drivers/gpu/msm/kgsl_gpummu.c @@ -465,12 +465,12 @@ err_free_gpummu: return NULL; } -static void kgsl_gpummu_default_setstate(struct kgsl_mmu *mmu, +static int kgsl_gpummu_default_setstate(struct kgsl_mmu *mmu, uint32_t flags) { struct kgsl_gpummu_pt *gpummu_pt; if (!kgsl_mmu_enabled()) - return; + return 0; if (flags & KGSL_MMUFLAGS_PTUPDATE) { kgsl_idle(mmu->device); @@ -483,12 +483,16 @@ static void kgsl_gpummu_default_setstate(struct kgsl_mmu *mmu, /* Invalidate all and tc */ kgsl_regwrite(mmu->device, MH_MMU_INVALIDATE, 0x00000003); } + + return 0; } -static void kgsl_gpummu_setstate(struct kgsl_mmu *mmu, +static int kgsl_gpummu_setstate(struct kgsl_mmu *mmu, struct kgsl_pagetable *pagetable, unsigned int context_id) { + int ret = 0; + if (mmu->flags & KGSL_FLAGS_STARTED) { /* page table not current, then setup mmu to use new * specified page table @@ -501,10 +505,13 @@ static void kgsl_gpummu_setstate(struct kgsl_mmu *mmu, kgsl_mmu_pt_get_flags(pagetable, mmu->device->id); /* call device specific set page table */ - kgsl_setstate(mmu, context_id, KGSL_MMUFLAGS_TLBFLUSH | + ret = kgsl_setstate(mmu, context_id, + KGSL_MMUFLAGS_TLBFLUSH | KGSL_MMUFLAGS_PTUPDATE); } } + + return ret; } static int kgsl_gpummu_init(struct kgsl_mmu *mmu) @@ -541,6 +548,7 @@ static int kgsl_gpummu_start(struct kgsl_mmu *mmu) struct kgsl_device *device = mmu->device; struct kgsl_gpummu_pt *gpummu_pt; + int ret; if (mmu->flags & KGSL_FLAGS_STARTED) return 0; @@ -552,9 +560,6 @@ static int kgsl_gpummu_start(struct kgsl_mmu *mmu) /* setup MMU and sub-client behavior */ kgsl_regwrite(device, MH_MMU_CONFIG, mmu->config); - /* idle device */ - kgsl_idle(device); - /* enable axi interrupts */ kgsl_regwrite(device, MH_INTERRUPT_MASK, GSL_MMU_INT_MASK | MH_INTERRUPT_MASK__MMU_PAGE_FAULT); @@ -585,10 +590,12 @@ static int kgsl_gpummu_start(struct kgsl_mmu *mmu) kgsl_regwrite(mmu->device, MH_MMU_VA_RANGE, (KGSL_PAGETABLE_BASE | (CONFIG_MSM_KGSL_PAGE_TABLE_SIZE >> 16))); - kgsl_setstate(mmu, KGSL_MEMSTORE_GLOBAL, KGSL_MMUFLAGS_TLBFLUSH); - mmu->flags |= KGSL_FLAGS_STARTED; - return 0; + ret = kgsl_setstate(mmu, KGSL_MEMSTORE_GLOBAL, KGSL_MMUFLAGS_TLBFLUSH); + if (!ret) + mmu->flags |= KGSL_FLAGS_STARTED; + + return ret; } static int @@ -598,7 +605,7 @@ kgsl_gpummu_unmap(void *mmu_specific_pt, { unsigned int numpages; unsigned int pte, ptefirst, ptelast, superpte; - unsigned int range = kgsl_sg_size(memdesc->sg, memdesc->sglen); + unsigned int range = memdesc->size; struct kgsl_gpummu_pt *gpummu_pt = mmu_specific_pt; /* All GPU addresses as assigned are page aligned, but some diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c index a12003ac126d0793e4c014ac9e3fd101d2e337d5..2c8abb1f6b3640d63a508728198f1e4c8ae91f8e 100644 --- a/drivers/gpu/msm/kgsl_iommu.c +++ b/drivers/gpu/msm/kgsl_iommu.c @@ -32,6 +32,7 @@ #include "adreno.h" #include "kgsl_trace.h" #include "z180.h" +#include "kgsl_cffdump.h" static struct kgsl_iommu_register_list kgsl_iommuv1_reg[KGSL_IOMMU_REG_MAX] = { @@ -62,6 +63,13 @@ static struct kgsl_iommu_register_list kgsl_iommuv2_reg[KGSL_IOMMU_REG_MAX] = { struct remote_iommu_petersons_spinlock kgsl_iommu_sync_lock_vars; +/* + * One page allocation for a guard region to protect against over-zealous + * GPU pre-fetch + */ + +static struct page *kgsl_guard_page; + static int get_iommu_unit(struct device *dev, struct kgsl_mmu **mmu_out, struct kgsl_iommu_unit **iommu_unit_out) { @@ -109,6 +117,170 @@ static struct kgsl_iommu_device *get_iommu_device(struct kgsl_iommu_unit *unit, return NULL; } +/* These functions help find the nearest allocated memory entries on either side + * of a faulting address. If we know the nearby allocations memory we can + * get a better determination of what we think should have been located in the + * faulting region + */ + +/* + * A local structure to make it easy to store the interesting bits for the + * memory entries on either side of the faulting address + */ + +struct _mem_entry { + unsigned int gpuaddr; + unsigned int size; + unsigned int flags; + unsigned int priv; + pid_t pid; +}; + +/* + * Find the closest alloated memory block with an smaller GPU address then the + * given address + */ + +static void _prev_entry(struct kgsl_process_private *priv, + unsigned int faultaddr, struct _mem_entry *ret) +{ + struct rb_node *node; + struct kgsl_mem_entry *entry; + + for (node = rb_first(&priv->mem_rb); node; ) { + entry = rb_entry(node, struct kgsl_mem_entry, node); + + if (entry->memdesc.gpuaddr > faultaddr) + break; + + /* + * If this is closer to the faulting address, then copy + * the entry + */ + + if (entry->memdesc.gpuaddr > ret->gpuaddr) { + ret->gpuaddr = entry->memdesc.gpuaddr; + ret->size = entry->memdesc.size; + ret->flags = entry->memdesc.flags; + ret->priv = entry->memdesc.priv; + ret->pid = priv->pid; + } + + node = rb_next(&entry->node); + } +} + +/* + * Find the closest alloated memory block with a greater starting GPU address + * then the given address + */ + +static void _next_entry(struct kgsl_process_private *priv, + unsigned int faultaddr, struct _mem_entry *ret) +{ + struct rb_node *node; + struct kgsl_mem_entry *entry; + + for (node = rb_last(&priv->mem_rb); node; ) { + entry = rb_entry(node, struct kgsl_mem_entry, node); + + if (entry->memdesc.gpuaddr < faultaddr) + break; + + /* + * If this is closer to the faulting address, then copy + * the entry + */ + + if (entry->memdesc.gpuaddr < ret->gpuaddr) { + ret->gpuaddr = entry->memdesc.gpuaddr; + ret->size = entry->memdesc.size; + ret->flags = entry->memdesc.flags; + ret->priv = entry->memdesc.priv; + ret->pid = priv->pid; + } + + node = rb_prev(&entry->node); + } +} + +static void _find_mem_entries(struct kgsl_mmu *mmu, unsigned int faultaddr, + unsigned int ptbase, struct _mem_entry *preventry, + struct _mem_entry *nextentry) +{ + struct kgsl_process_private *private; + int id = kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase); + + memset(preventry, 0, sizeof(*preventry)); + memset(nextentry, 0, sizeof(*nextentry)); + + /* Set the maximum possible size as an initial value */ + nextentry->gpuaddr = 0xFFFFFFFF; + + mutex_lock(&kgsl_driver.process_mutex); + + list_for_each_entry(private, &kgsl_driver.process_list, list) { + + if (private->pagetable->name != id) + continue; + + spin_lock(&private->mem_lock); + _prev_entry(private, faultaddr, preventry); + _next_entry(private, faultaddr, nextentry); + spin_unlock(&private->mem_lock); + } + + mutex_unlock(&kgsl_driver.process_mutex); +} + +static void _print_entry(struct kgsl_device *device, struct _mem_entry *entry) +{ + char name[32]; + memset(name, 0, sizeof(name)); + + kgsl_get_memory_usage(name, sizeof(name) - 1, entry->flags); + + KGSL_LOG_DUMP(device, + "[%8.8X - %8.8X] %s (pid = %d) (%s)\n", + entry->gpuaddr, + entry->gpuaddr + entry->size, + entry->priv & KGSL_MEMDESC_GUARD_PAGE ? "(+guard)" : "", + entry->pid, name); +} + +static void _check_if_freed(struct kgsl_iommu_device *iommu_dev, + unsigned long addr, unsigned int pid) +{ + void *base = kgsl_driver.memfree_hist.base_hist_rb; + struct kgsl_memfree_hist_elem *wptr; + struct kgsl_memfree_hist_elem *p; + + mutex_lock(&kgsl_driver.memfree_hist_mutex); + wptr = kgsl_driver.memfree_hist.wptr; + p = wptr; + for (;;) { + if (p->size && p->pid == pid) + if (addr >= p->gpuaddr && + addr < (p->gpuaddr + p->size)) { + + KGSL_LOG_DUMP(iommu_dev->kgsldev, + "---- premature free ----\n"); + KGSL_LOG_DUMP(iommu_dev->kgsldev, + "[%8.8X-%8.8X] was already freed by pid %d\n", + p->gpuaddr, + p->gpuaddr + p->size, + p->pid); + } + p++; + if ((void *)p >= base + kgsl_driver.memfree_hist.size) + p = (struct kgsl_memfree_hist_elem *) base; + + if (p == kgsl_driver.memfree_hist.wptr) + break; + } + mutex_unlock(&kgsl_driver.memfree_hist_mutex); +} + static int kgsl_iommu_fault_handler(struct iommu_domain *domain, struct device *dev, unsigned long addr, int flags) { @@ -124,6 +296,7 @@ static int kgsl_iommu_fault_handler(struct iommu_domain *domain, unsigned int pid; unsigned int fsynr0, fsynr1; int write; + struct _mem_entry prev, next; ret = get_iommu_unit(dev, &mmu, &iommu_unit); if (ret) @@ -168,6 +341,24 @@ static int kgsl_iommu_fault_handler(struct iommu_domain *domain, write ? "write" : "read"); } + _check_if_freed(iommu_dev, addr, pid); + + KGSL_LOG_DUMP(iommu_dev->kgsldev, "---- nearby memory ----\n"); + + _find_mem_entries(mmu, addr, ptbase, &prev, &next); + + if (prev.gpuaddr) + _print_entry(iommu_dev->kgsldev, &prev); + else + KGSL_LOG_DUMP(iommu_dev->kgsldev, "*EMPTY*\n"); + + KGSL_LOG_DUMP(iommu_dev->kgsldev, " <- fault @ %8.8lX\n", addr); + + if (next.gpuaddr != 0xFFFFFFFF) + _print_entry(iommu_dev->kgsldev, &next); + else + KGSL_LOG_DUMP(iommu_dev->kgsldev, "*EMPTY*\n"); + mmu->fault = 1; iommu_dev->fault = 1; @@ -648,13 +839,10 @@ static int kgsl_iommu_init_sync_lock(struct kgsl_mmu *mmu) return status; /* Map Lock variables to GPU pagetable */ - iommu->sync_lock_desc.priv |= KGSL_MEMDESC_GLOBAL; - pagetable = mmu->priv_bank_table ? mmu->priv_bank_table : mmu->defaultpagetable; - status = kgsl_mmu_map(pagetable, &iommu->sync_lock_desc, - GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); + status = kgsl_mmu_map_global(pagetable, &iommu->sync_lock_desc); if (status) { kgsl_mmu_unmap(pagetable, &iommu->sync_lock_desc); @@ -914,10 +1102,12 @@ static int kgsl_iommu_get_pt_lsb(struct kgsl_mmu *mmu, return 0; } -static void kgsl_iommu_setstate(struct kgsl_mmu *mmu, +static int kgsl_iommu_setstate(struct kgsl_mmu *mmu, struct kgsl_pagetable *pagetable, unsigned int context_id) { + int ret = 0; + if (mmu->flags & KGSL_FLAGS_STARTED) { /* page table not current, then setup mmu to use new * specified page table @@ -928,10 +1118,12 @@ static void kgsl_iommu_setstate(struct kgsl_mmu *mmu, flags |= kgsl_mmu_pt_get_flags(mmu->hwpagetable, mmu->device->id) | KGSL_MMUFLAGS_TLBFLUSH; - kgsl_setstate(mmu, context_id, + ret = kgsl_setstate(mmu, context_id, KGSL_MMUFLAGS_PTUPDATE | flags); } } + + return ret; } /* @@ -959,23 +1151,18 @@ static int kgsl_iommu_setup_regs(struct kgsl_mmu *mmu, return 0; for (i = 0; i < iommu->unit_count; i++) { - iommu->iommu_units[i].reg_map.priv |= KGSL_MEMDESC_GLOBAL; - status = kgsl_mmu_map(pt, - &(iommu->iommu_units[i].reg_map), - GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); - if (status) { - iommu->iommu_units[i].reg_map.priv &= - ~KGSL_MEMDESC_GLOBAL; + status = kgsl_mmu_map_global(pt, + &(iommu->iommu_units[i].reg_map)); + if (status) goto err; - } } + return 0; err: - for (i--; i >= 0; i--) { + for (i--; i >= 0; i--) kgsl_mmu_unmap(pt, &(iommu->iommu_units[i].reg_map)); - iommu->iommu_units[i].reg_map.priv &= ~KGSL_MEMDESC_GLOBAL; - } + return status; } @@ -1049,6 +1236,15 @@ static int kgsl_iommu_init(struct kgsl_mmu *mmu) iommu_ops.mmu_cleanup_pt = kgsl_iommu_cleanup_regs; } + if (kgsl_guard_page == NULL) { + kgsl_guard_page = alloc_page(GFP_KERNEL | __GFP_ZERO | + __GFP_HIGHMEM); + if (kgsl_guard_page == NULL) { + status = -ENOMEM; + goto done; + } + } + dev_info(mmu->device->dev, "|%s| MMU type set for device is IOMMU\n", __func__); done: @@ -1241,8 +1437,6 @@ static int kgsl_iommu_start(struct kgsl_mmu *mmu) kgsl_regwrite(mmu->device, MH_MMU_MPU_END, mh->mpu_base + mh->mpu_range); - } else { - kgsl_regwrite(mmu->device, MH_MMU_CONFIG, 0x00000000); } mmu->hwpagetable = mmu->defaultpagetable; @@ -1281,6 +1475,10 @@ static int kgsl_iommu_start(struct kgsl_mmu *mmu) kgsl_iommu_lock_rb_in_tlb(mmu); msm_iommu_unlock(); + /* For complete CFF */ + kgsl_cffdump_setmem(mmu->setstate_memory.gpuaddr + + KGSL_IOMMU_SETSTATE_NOP_OFFSET, + cp_nop_packet(1), sizeof(unsigned int)); kgsl_iommu_disable_clk_on_ts(mmu, 0, false); mmu->flags |= KGSL_FLAGS_STARTED; @@ -1299,7 +1497,7 @@ kgsl_iommu_unmap(void *mmu_specific_pt, unsigned int *tlb_flags) { int ret; - unsigned int range = kgsl_sg_size(memdesc->sg, memdesc->sglen); + unsigned int range = memdesc->size; struct kgsl_iommu_pt *iommu_pt = mmu_specific_pt; /* All GPU addresses as assigned are page aligned, but some @@ -1311,6 +1509,9 @@ kgsl_iommu_unmap(void *mmu_specific_pt, if (range == 0 || gpuaddr == 0) return 0; + if (kgsl_memdesc_has_guard_page(memdesc)) + range += PAGE_SIZE; + ret = iommu_unmap_range(iommu_pt->domain, gpuaddr, range); if (ret) KGSL_CORE_ERR("iommu_unmap_range(%p, %x, %d) failed " @@ -1335,26 +1536,35 @@ kgsl_iommu_map(void *mmu_specific_pt, int ret; unsigned int iommu_virt_addr; struct kgsl_iommu_pt *iommu_pt = mmu_specific_pt; - int size = kgsl_sg_size(memdesc->sg, memdesc->sglen); - unsigned int iommu_flags = IOMMU_READ; + int size = memdesc->size; BUG_ON(NULL == iommu_pt); - if (protflags & GSL_PT_PAGE_WV) - iommu_flags |= IOMMU_WRITE; - iommu_virt_addr = memdesc->gpuaddr; ret = iommu_map_range(iommu_pt->domain, iommu_virt_addr, memdesc->sg, - size, iommu_flags); + size, protflags); if (ret) { - KGSL_CORE_ERR("iommu_map_range(%p, %x, %p, %d, %d) " - "failed with err: %d\n", iommu_pt->domain, - iommu_virt_addr, memdesc->sg, size, - iommu_flags, ret); + KGSL_CORE_ERR("iommu_map_range(%p, %x, %p, %d, %x) err: %d\n", + iommu_pt->domain, iommu_virt_addr, memdesc->sg, size, + protflags, ret); return ret; } - + if (kgsl_memdesc_has_guard_page(memdesc)) { + ret = iommu_map(iommu_pt->domain, iommu_virt_addr + size, + page_to_phys(kgsl_guard_page), PAGE_SIZE, + protflags & ~IOMMU_WRITE); + if (ret) { + KGSL_CORE_ERR("iommu_map(%p, %x, %x, %x) err: %d\n", + iommu_pt->domain, iommu_virt_addr + size, + page_to_phys(kgsl_guard_page), + protflags & ~IOMMU_WRITE, + ret); + /* cleanup the partial mapping */ + iommu_unmap_range(iommu_pt->domain, iommu_virt_addr, + size); + } + } return ret; } @@ -1423,6 +1633,11 @@ static int kgsl_iommu_close(struct kgsl_mmu *mmu) kfree(iommu); + if (kgsl_guard_page != NULL) { + __free_page(kgsl_guard_page); + kgsl_guard_page = NULL; + } + return 0; } @@ -1458,19 +1673,22 @@ kgsl_iommu_get_current_ptbase(struct kgsl_mmu *mmu) * cpu * Return - void */ -static void kgsl_iommu_default_setstate(struct kgsl_mmu *mmu, +static int kgsl_iommu_default_setstate(struct kgsl_mmu *mmu, uint32_t flags) { struct kgsl_iommu *iommu = mmu->priv; int temp; int i; + int ret = 0; unsigned int pt_base = kgsl_iommu_get_pt_base_addr(mmu, mmu->hwpagetable); unsigned int pt_val; - if (kgsl_iommu_enable_clk(mmu, KGSL_IOMMU_CONTEXT_USER)) { + ret = kgsl_iommu_enable_clk(mmu, KGSL_IOMMU_CONTEXT_USER); + + if (ret) { KGSL_DRV_ERR(mmu->device, "Failed to enable iommu clocks\n"); - return; + return ret; } /* Mask off the lsb of the pt base address since lsb will not change */ pt_base &= (iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_mask << @@ -1513,6 +1731,7 @@ static void kgsl_iommu_default_setstate(struct kgsl_mmu *mmu, /* Disable smmu clock */ kgsl_iommu_disable_clk_on_ts(mmu, 0, false); + return ret; } /* @@ -1554,6 +1773,7 @@ struct kgsl_mmu_ops iommu_ops = { .mmu_pagefault = NULL, .mmu_get_current_ptbase = kgsl_iommu_get_current_ptbase, .mmu_enable_clk = kgsl_iommu_enable_clk, + .mmu_disable_clk = kgsl_iommu_disable_clk, .mmu_disable_clk_on_ts = kgsl_iommu_disable_clk_on_ts, .mmu_get_pt_lsb = kgsl_iommu_get_pt_lsb, .mmu_get_reg_gpuaddr = kgsl_iommu_get_reg_gpuaddr, diff --git a/drivers/gpu/msm/kgsl_log.h b/drivers/gpu/msm/kgsl_log.h index 83d14f79cc7f956b7e0ef70a851e06b517b5ceb1..81a35e0d20e5b784828f4eec58f7fae54b241a6d 100644 --- a/drivers/gpu/msm/kgsl_log.h +++ b/drivers/gpu/msm/kgsl_log.h @@ -103,15 +103,6 @@ KGSL_LOG_ERR(_dev->dev, _dev->pwr_log, fmt, ##args) #define KGSL_PWR_CRIT(_dev, fmt, args...) \ KGSL_LOG_CRIT(_dev->dev, _dev->pwr_log, fmt, ##args) -#define KGSL_FT_INFO(_dev, fmt, args...) \ -KGSL_LOG_INFO(_dev->dev, _dev->ft_log, fmt, ##args) -#define KGSL_FT_WARN(_dev, fmt, args...) \ -KGSL_LOG_WARN(_dev->dev, _dev->ft_log, fmt, ##args) -#define KGSL_FT_ERR(_dev, fmt, args...) \ -KGSL_LOG_ERR(_dev->dev, _dev->ft_log, fmt, ##args) -#define KGSL_FT_CRIT(_dev, fmt, args...) \ -KGSL_LOG_CRIT(_dev->dev, _dev->ft_log, fmt, ##args) - /* Core error messages - these are for core KGSL functions that have no device associated with them (such as memory) */ diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c index b2507a3aec39560e2103d50ea3b3761b7a1cd4e9..ea127d266e10b6e1d3a7824eb15c2b2605755ea2 100644 --- a/drivers/gpu/msm/kgsl_mmu.c +++ b/drivers/gpu/msm/kgsl_mmu.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved. +/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -560,7 +560,7 @@ void kgsl_mmu_putpagetable(struct kgsl_pagetable *pagetable) } EXPORT_SYMBOL(kgsl_mmu_putpagetable); -void kgsl_setstate(struct kgsl_mmu *mmu, unsigned int context_id, +int kgsl_setstate(struct kgsl_mmu *mmu, unsigned int context_id, uint32_t flags) { struct kgsl_device *device = mmu->device; @@ -568,14 +568,16 @@ void kgsl_setstate(struct kgsl_mmu *mmu, unsigned int context_id, if (!(flags & (KGSL_MMUFLAGS_TLBFLUSH | KGSL_MMUFLAGS_PTUPDATE)) && !adreno_is_a2xx(adreno_dev)) - return; + return 0; if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type) - return; + return 0; else if (device->ftbl->setstate) - device->ftbl->setstate(device, context_id, flags); + return device->ftbl->setstate(device, context_id, flags); else if (mmu->mmu_ops->mmu_device_setstate) - mmu->mmu_ops->mmu_device_setstate(mmu, flags); + return mmu->mmu_ops->mmu_device_setstate(mmu, flags); + + return 0; } EXPORT_SYMBOL(kgsl_setstate); @@ -584,7 +586,6 @@ void kgsl_mh_start(struct kgsl_device *device) struct kgsl_mh *mh = &device->mh; /* force mmu off to for now*/ kgsl_regwrite(device, MH_MMU_CONFIG, 0); - kgsl_idle(device); /* define physical memory range accessible by the core */ kgsl_regwrite(device, MH_MMU_MPU_BASE, mh->mpu_base); @@ -605,16 +606,17 @@ void kgsl_mh_start(struct kgsl_device *device) * kgsl_pwrctrl_irq() is called */ } +EXPORT_SYMBOL(kgsl_mh_start); int kgsl_mmu_map(struct kgsl_pagetable *pagetable, - struct kgsl_memdesc *memdesc, - unsigned int protflags) + struct kgsl_memdesc *memdesc) { int ret; struct gen_pool *pool = NULL; int size; int page_align = ilog2(PAGE_SIZE); + unsigned int protflags = kgsl_memdesc_protflags(memdesc); if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE) { if (memdesc->sglen == 1) { @@ -634,7 +636,10 @@ kgsl_mmu_map(struct kgsl_pagetable *pagetable, } } - size = kgsl_sg_size(memdesc->sg, memdesc->sglen); + /* Add space for the guard page when allocating the mmu VA. */ + size = memdesc->size; + if (kgsl_memdesc_has_guard_page(memdesc)) + size += PAGE_SIZE; pool = pagetable->pool; @@ -732,7 +737,10 @@ kgsl_mmu_unmap(struct kgsl_pagetable *pagetable, return 0; } - size = kgsl_sg_size(memdesc->sg, memdesc->sglen); + /* Add space for the guard page when freeing the mmu VA. */ + size = memdesc->size; + if (kgsl_memdesc_has_guard_page(memdesc)) + size += PAGE_SIZE; start_addr = memdesc->gpuaddr; end_addr = (memdesc->gpuaddr + size); @@ -777,7 +785,7 @@ kgsl_mmu_unmap(struct kgsl_pagetable *pagetable, EXPORT_SYMBOL(kgsl_mmu_unmap); int kgsl_mmu_map_global(struct kgsl_pagetable *pagetable, - struct kgsl_memdesc *memdesc, unsigned int protflags) + struct kgsl_memdesc *memdesc) { int result = -EINVAL; unsigned int gpuaddr = 0; @@ -789,11 +797,10 @@ int kgsl_mmu_map_global(struct kgsl_pagetable *pagetable, /* Not all global mappings are needed for all MMU types */ if (!memdesc->size) return 0; - gpuaddr = memdesc->gpuaddr; memdesc->priv |= KGSL_MEMDESC_GLOBAL; - result = kgsl_mmu_map(pagetable, memdesc, protflags); + result = kgsl_mmu_map(pagetable, memdesc); if (result) goto error; diff --git a/drivers/gpu/msm/kgsl_mmu.h b/drivers/gpu/msm/kgsl_mmu.h index 9d1bffa945454b1d59e73c61778c42f8a8ab717a..fe1b2ee58eadd6a14aee76c0b72847bf8dedd35e 100644 --- a/drivers/gpu/msm/kgsl_mmu.h +++ b/drivers/gpu/msm/kgsl_mmu.h @@ -125,10 +125,10 @@ struct kgsl_mmu_ops { int (*mmu_close) (struct kgsl_mmu *mmu); int (*mmu_start) (struct kgsl_mmu *mmu); void (*mmu_stop) (struct kgsl_mmu *mmu); - void (*mmu_setstate) (struct kgsl_mmu *mmu, + int (*mmu_setstate) (struct kgsl_mmu *mmu, struct kgsl_pagetable *pagetable, unsigned int context_id); - void (*mmu_device_setstate) (struct kgsl_mmu *mmu, + int (*mmu_device_setstate) (struct kgsl_mmu *mmu, uint32_t flags); void (*mmu_pagefault) (struct kgsl_mmu *mmu); unsigned int (*mmu_get_current_ptbase) @@ -137,6 +137,8 @@ struct kgsl_mmu_ops { (struct kgsl_mmu *mmu, uint32_t ts, bool ts_valid); int (*mmu_enable_clk) (struct kgsl_mmu *mmu, int ctx_id); + void (*mmu_disable_clk) + (struct kgsl_mmu *mmu); int (*mmu_get_pt_lsb)(struct kgsl_mmu *mmu, unsigned int unit_id, enum kgsl_iommu_context_id ctx_id); @@ -204,14 +206,13 @@ int kgsl_mmu_init(struct kgsl_device *device); int kgsl_mmu_start(struct kgsl_device *device); int kgsl_mmu_close(struct kgsl_device *device); int kgsl_mmu_map(struct kgsl_pagetable *pagetable, - struct kgsl_memdesc *memdesc, - unsigned int protflags); + struct kgsl_memdesc *memdesc); int kgsl_mmu_map_global(struct kgsl_pagetable *pagetable, - struct kgsl_memdesc *memdesc, unsigned int protflags); + struct kgsl_memdesc *memdesc); int kgsl_mmu_unmap(struct kgsl_pagetable *pagetable, struct kgsl_memdesc *memdesc); unsigned int kgsl_virtaddr_to_physaddr(void *virtaddr); -void kgsl_setstate(struct kgsl_mmu *mmu, unsigned int context_id, +int kgsl_setstate(struct kgsl_mmu *mmu, unsigned int context_id, uint32_t flags); int kgsl_mmu_get_ptname_from_ptbase(struct kgsl_mmu *mmu, unsigned int pt_base); @@ -240,19 +241,23 @@ static inline unsigned int kgsl_mmu_get_current_ptbase(struct kgsl_mmu *mmu) return 0; } -static inline void kgsl_mmu_setstate(struct kgsl_mmu *mmu, +static inline int kgsl_mmu_setstate(struct kgsl_mmu *mmu, struct kgsl_pagetable *pagetable, unsigned int context_id) { if (mmu->mmu_ops && mmu->mmu_ops->mmu_setstate) - mmu->mmu_ops->mmu_setstate(mmu, pagetable, context_id); + return mmu->mmu_ops->mmu_setstate(mmu, pagetable, context_id); + + return 0; } -static inline void kgsl_mmu_device_setstate(struct kgsl_mmu *mmu, +static inline int kgsl_mmu_device_setstate(struct kgsl_mmu *mmu, uint32_t flags) { if (mmu->mmu_ops && mmu->mmu_ops->mmu_device_setstate) - mmu->mmu_ops->mmu_device_setstate(mmu, flags); + return mmu->mmu_ops->mmu_device_setstate(mmu, flags); + + return 0; } static inline void kgsl_mmu_stop(struct kgsl_mmu *mmu) @@ -299,6 +304,12 @@ static inline int kgsl_mmu_enable_clk(struct kgsl_mmu *mmu, return 0; } +static inline void kgsl_mmu_disable_clk(struct kgsl_mmu *mmu) +{ + if (mmu->mmu_ops && mmu->mmu_ops->mmu_disable_clk) + mmu->mmu_ops->mmu_disable_clk(mmu); +} + static inline void kgsl_mmu_disable_clk_on_ts(struct kgsl_mmu *mmu, unsigned int ts, bool ts_valid) { diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c index 452d8a3180913bb725418b63a83ae533c9672954..bcdce85b24b315b2fad01bb3c1755df0a9f93300 100644 --- a/drivers/gpu/msm/kgsl_pwrctrl.c +++ b/drivers/gpu/msm/kgsl_pwrctrl.c @@ -1012,6 +1012,14 @@ void kgsl_pwrctrl_close(struct kgsl_device *device) pwr->power_flags = 0; } +/** + * kgsl_idle_check() - Work function for GPU interrupts and idle timeouts. + * @device: The device + * + * This function is called for work that is queued by the interrupt + * handler or the idle timer. It attempts to transition to a clocks + * off state if the active_cnt is 0 and the hardware is idle. + */ void kgsl_idle_check(struct work_struct *work) { struct kgsl_device *device = container_of(work, struct kgsl_device, @@ -1021,15 +1029,24 @@ void kgsl_idle_check(struct work_struct *work) return; mutex_lock(&device->mutex); - if (device->state & (KGSL_STATE_ACTIVE | KGSL_STATE_NAP)) { - kgsl_pwrscale_idle(device); + kgsl_pwrscale_idle(device); + + if (device->state == KGSL_STATE_ACTIVE + || device->state == KGSL_STATE_NAP) { + + /* If we failed to sleep then reset the timer and try again */ if (kgsl_pwrctrl_sleep(device) != 0) { + + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + mod_timer(&device->idle_timer, jiffies + device->pwrctrl.interval_timeout); - /* If the GPU has been too busy to sleep, make sure * - * that is acurately reflected in the % busy numbers. */ + /* + * If the GPU has been too busy to sleep, make sure + * that is acurately reflected in the % busy numbers. + */ device->pwrctrl.clk_stats.no_nap_cnt++; if (device->pwrctrl.clk_stats.no_nap_cnt > UPDATE_BUSY) { @@ -1037,13 +1054,11 @@ void kgsl_idle_check(struct work_struct *work) device->pwrctrl.clk_stats.no_nap_cnt = 0; } } - } else if (device->state & (KGSL_STATE_HUNG | - KGSL_STATE_DUMP_AND_FT)) { - kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); } mutex_unlock(&device->mutex); } +EXPORT_SYMBOL(kgsl_idle_check); void kgsl_timer(unsigned long data) { @@ -1061,54 +1076,26 @@ void kgsl_timer(unsigned long data) } } + +/** + * kgsl_pre_hwaccess - Enforce preconditions for touching registers + * @device: The device + * + * This function ensures that the correct lock is held and that the GPU + * clock is on immediately before a register is read or written. Note + * that this function does not check active_cnt because the registers + * must be accessed during device start and stop, when the active_cnt + * may legitimately be 0. + */ void kgsl_pre_hwaccess(struct kgsl_device *device) { + /* In order to touch a register you must hold the device mutex...*/ BUG_ON(!mutex_is_locked(&device->mutex)); - switch (device->state) { - case KGSL_STATE_ACTIVE: - return; - case KGSL_STATE_NAP: - case KGSL_STATE_SLEEP: - case KGSL_STATE_SLUMBER: - kgsl_pwrctrl_wake(device); - break; - case KGSL_STATE_SUSPEND: - kgsl_check_suspended(device); - break; - case KGSL_STATE_INIT: - case KGSL_STATE_HUNG: - case KGSL_STATE_DUMP_AND_FT: - if (test_bit(KGSL_PWRFLAGS_CLK_ON, - &device->pwrctrl.power_flags)) - break; - else - KGSL_PWR_ERR(device, - "hw access while clocks off from state %d\n", - device->state); - break; - default: - KGSL_PWR_ERR(device, "hw access while in unknown state %d\n", - device->state); - break; - } + /* and have the clock on! */ + BUG_ON(!test_bit(KGSL_PWRFLAGS_CLK_ON, &device->pwrctrl.power_flags)); } EXPORT_SYMBOL(kgsl_pre_hwaccess); -void kgsl_check_suspended(struct kgsl_device *device) -{ - if (device->requested_state == KGSL_STATE_SUSPEND || - device->state == KGSL_STATE_SUSPEND) { - mutex_unlock(&device->mutex); - wait_for_completion(&device->hwaccess_gate); - mutex_lock(&device->mutex); - } else if (device->state == KGSL_STATE_DUMP_AND_FT) { - mutex_unlock(&device->mutex); - wait_for_completion(&device->ft_gate); - mutex_lock(&device->mutex); - } else if (device->state == KGSL_STATE_SLUMBER) - kgsl_pwrctrl_wake(device); -} - static int _nap(struct kgsl_device *device) { @@ -1187,6 +1174,8 @@ _slumber(struct kgsl_device *device) case KGSL_STATE_NAP: case KGSL_STATE_SLEEP: del_timer_sync(&device->idle_timer); + /* make sure power is on to stop the device*/ + kgsl_pwrctrl_enable(device); device->ftbl->suspend_context(device); device->ftbl->stop(device); _sleep_accounting(device); @@ -1236,9 +1225,9 @@ EXPORT_SYMBOL(kgsl_pwrctrl_sleep); /******************************************************************/ /* Caller must hold the device mutex. */ -void kgsl_pwrctrl_wake(struct kgsl_device *device) +int kgsl_pwrctrl_wake(struct kgsl_device *device) { - int status; + int status = 0; unsigned int context_id; unsigned int state = device->state; unsigned int ts_processed = 0xdeaddead; @@ -1247,7 +1236,7 @@ void kgsl_pwrctrl_wake(struct kgsl_device *device) kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE); switch (device->state) { case KGSL_STATE_SLUMBER: - status = device->ftbl->start(device, 0); + status = device->ftbl->start(device); if (status) { kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); KGSL_DRV_ERR(device, "start failed %d\n", status); @@ -1276,9 +1265,6 @@ void kgsl_pwrctrl_wake(struct kgsl_device *device) /* Enable state before turning on irq */ kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON); - /* Re-enable HW access */ - mod_timer(&device->idle_timer, - jiffies + device->pwrctrl.interval_timeout); pm_qos_update_request(&device->pm_qos_req_dma, GPU_SWFI_LATENCY); case KGSL_STATE_ACTIVE: @@ -1288,8 +1274,10 @@ void kgsl_pwrctrl_wake(struct kgsl_device *device) KGSL_PWR_WARN(device, "unhandled state %s\n", kgsl_pwrstate_to_str(device->state)); kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + status = -EINVAL; break; } + return status; } EXPORT_SYMBOL(kgsl_pwrctrl_wake); @@ -1342,10 +1330,6 @@ const char *kgsl_pwrstate_to_str(unsigned int state) return "SLEEP"; case KGSL_STATE_SUSPEND: return "SUSPEND"; - case KGSL_STATE_HUNG: - return "HUNG"; - case KGSL_STATE_DUMP_AND_FT: - return "DNR"; case KGSL_STATE_SLUMBER: return "SLUMBER"; default: @@ -1355,3 +1339,118 @@ const char *kgsl_pwrstate_to_str(unsigned int state) } EXPORT_SYMBOL(kgsl_pwrstate_to_str); + +/** + * kgsl_active_count_get() - Increase the device active count + * @device: Pointer to a KGSL device + * + * Increase the active count for the KGSL device and turn on + * clocks if this is the first reference. Code paths that need + * to touch the hardware or wait for the hardware to complete + * an operation must hold an active count reference until they + * are finished. An error code will be returned if waking the + * device fails. The device mutex must be held while *calling + * this function. + */ +int kgsl_active_count_get(struct kgsl_device *device) +{ + int ret = 0; + BUG_ON(!mutex_is_locked(&device->mutex)); + + if (atomic_read(&device->active_cnt) == 0) { + if (device->requested_state == KGSL_STATE_SUSPEND || + device->state == KGSL_STATE_SUSPEND) { + mutex_unlock(&device->mutex); + wait_for_completion(&device->hwaccess_gate); + mutex_lock(&device->mutex); + } + + /* Stop the idle timer */ + del_timer_sync(&device->idle_timer); + + ret = kgsl_pwrctrl_wake(device); + } + if (ret == 0) + atomic_inc(&device->active_cnt); + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + return ret; +} +EXPORT_SYMBOL(kgsl_active_count_get); + +/** + * kgsl_active_count_get_light() - Increase the device active count + * @device: Pointer to a KGSL device + * + * Increase the active count for the KGSL device WITHOUT + * turning on the clocks based on the assumption that the clocks are already + * on from a previous active_count_get(). Currently this is only used for + * creating kgsl_events. + */ +int kgsl_active_count_get_light(struct kgsl_device *device) +{ + if (atomic_inc_not_zero(&device->active_cnt) == 0) { + dev_WARN_ONCE(device->dev, 1, "active count is 0!\n"); + return -EINVAL; + } + + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + return 0; +} +EXPORT_SYMBOL(kgsl_active_count_get_light); + +/** + * kgsl_active_count_put() - Decrease the device active count + * @device: Pointer to a KGSL device + * + * Decrease the active count for the KGSL device and turn off + * clocks if there are no remaining references. This function will + * transition the device to NAP if there are no other pending state + * changes. It also completes the suspend gate. The device mutex must + * be held while calling this function. + */ +void kgsl_active_count_put(struct kgsl_device *device) +{ + BUG_ON(!mutex_is_locked(&device->mutex)); + BUG_ON(atomic_read(&device->active_cnt) == 0); + + kgsl_pwrscale_idle(device); + + if (atomic_dec_and_test(&device->active_cnt)) { + INIT_COMPLETION(device->suspend_gate); + + if (device->pwrctrl.nap_allowed == true) { + /* Request nap */ + kgsl_pwrctrl_request_state(device, KGSL_STATE_NAP); + kgsl_pwrctrl_sleep(device); + } + + mod_timer(&device->idle_timer, + jiffies + device->pwrctrl.interval_timeout); + + complete(&device->suspend_gate); + } + + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); +} +EXPORT_SYMBOL(kgsl_active_count_put); + +/** + * kgsl_active_count_wait() - Wait for activity to finish. + * @device: Pointer to a KGSL device + * + * Block until all active_cnt users put() their reference. + */ +void kgsl_active_count_wait(struct kgsl_device *device) +{ + BUG_ON(!mutex_is_locked(&device->mutex)); + + if (atomic_read(&device->active_cnt) != 0) { + mutex_unlock(&device->mutex); + wait_for_completion(&device->suspend_gate); + mutex_lock(&device->mutex); + } +} +EXPORT_SYMBOL(kgsl_active_count_wait); diff --git a/drivers/gpu/msm/kgsl_pwrctrl.h b/drivers/gpu/msm/kgsl_pwrctrl.h index 8d66505cd5dbf4d5705d9d0924cc02a737f8dcc0..94cd8eb5b52e2efa09bc247e97fa730e0523338c 100644 --- a/drivers/gpu/msm/kgsl_pwrctrl.h +++ b/drivers/gpu/msm/kgsl_pwrctrl.h @@ -93,9 +93,8 @@ void kgsl_pwrctrl_close(struct kgsl_device *device); void kgsl_timer(unsigned long data); void kgsl_idle_check(struct work_struct *work); void kgsl_pre_hwaccess(struct kgsl_device *device); -void kgsl_check_suspended(struct kgsl_device *device); int kgsl_pwrctrl_sleep(struct kgsl_device *device); -void kgsl_pwrctrl_wake(struct kgsl_device *device); +int kgsl_pwrctrl_wake(struct kgsl_device *device); void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device, unsigned int level); int kgsl_pwrctrl_init_sysfs(struct kgsl_device *device); @@ -109,4 +108,10 @@ static inline unsigned long kgsl_get_clkrate(struct clk *clk) void kgsl_pwrctrl_set_state(struct kgsl_device *device, unsigned int state); void kgsl_pwrctrl_request_state(struct kgsl_device *device, unsigned int state); + +int kgsl_active_count_get(struct kgsl_device *device); +int kgsl_active_count_get_light(struct kgsl_device *device); +void kgsl_active_count_put(struct kgsl_device *device); +void kgsl_active_count_wait(struct kgsl_device *device); + #endif /* __KGSL_PWRCTRL_H */ diff --git a/drivers/gpu/msm/kgsl_pwrscale.c b/drivers/gpu/msm/kgsl_pwrscale.c index dffae7016e162c343db44100f14370dfd3ca9b8b..4f7dc5cc2717ae35ed22545efbeaaf0afb7f5636 100644 --- a/drivers/gpu/msm/kgsl_pwrscale.c +++ b/drivers/gpu/msm/kgsl_pwrscale.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved. +/* Copyright (c) 2010-2013, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -241,6 +241,7 @@ void kgsl_pwrscale_busy(struct kgsl_device *device) device->pwrscale.policy->busy(device, &device->pwrscale); } +EXPORT_SYMBOL(kgsl_pwrscale_busy); void kgsl_pwrscale_idle(struct kgsl_device *device) { diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c index bbab3c2f0fb7d855571c626a594b862e10726a5d..9329846bac80325967279bbc40b57c13608c6e33 100755 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -65,14 +65,6 @@ struct mem_entry_stats { mem_entry_max_show), \ } - -/* - * One page allocation for a guard region to protect against over-zealous - * GPU pre-fetch - */ - -static struct page *kgsl_guard_page; - /** * Given a kobj, find the process structure attached to it */ @@ -244,6 +236,29 @@ static int kgsl_drv_histogram_show(struct device *dev, return len; } +static int kgsl_drv_full_cache_threshold_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned int thresh; + ret = sscanf(buf, "%d", &thresh); + if (ret != 1) + return count; + + kgsl_driver.full_cache_threshold = thresh; + + return count; +} + +static int kgsl_drv_full_cache_threshold_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", + kgsl_driver.full_cache_threshold); +} + DEVICE_ATTR(vmalloc, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(vmalloc_max, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(page_alloc, 0444, kgsl_drv_memstat_show, NULL); @@ -253,6 +268,9 @@ DEVICE_ATTR(coherent_max, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(mapped, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(mapped_max, 0444, kgsl_drv_memstat_show, NULL); DEVICE_ATTR(histogram, 0444, kgsl_drv_histogram_show, NULL); +DEVICE_ATTR(full_cache_threshold, 0644, + kgsl_drv_full_cache_threshold_show, + kgsl_drv_full_cache_threshold_store); static const struct device_attribute *drv_attr_list[] = { &dev_attr_vmalloc, @@ -264,6 +282,7 @@ static const struct device_attribute *drv_attr_list[] = { &dev_attr_mapped, &dev_attr_mapped_max, &dev_attr_histogram, + &dev_attr_full_cache_threshold, NULL }; @@ -366,10 +385,6 @@ static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc) struct scatterlist *sg; int sglen = memdesc->sglen; - /* Don't free the guard page if it was used */ - if (memdesc->priv & KGSL_MEMDESC_GUARD_PAGE) - sglen--; - kgsl_driver.stats.page_alloc -= memdesc->size; if (memdesc->hostptr) { @@ -407,10 +422,6 @@ static int kgsl_page_alloc_map_kernel(struct kgsl_memdesc *memdesc) int sglen = memdesc->sglen; int i, count = 0; - /* Don't map the guard page if it exists */ - if (memdesc->priv & KGSL_MEMDESC_GUARD_PAGE) - sglen--; - /* create a list of pages to call vmap */ pages = vmalloc(npages * sizeof(struct page *)); if (!pages) { @@ -568,14 +579,6 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, sglen_alloc = PAGE_ALIGN(size) >> PAGE_SHIFT; - /* - * Add guard page to the end of the allocation when the - * IOMMU is in use. - */ - - if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU) - sglen_alloc++; - memdesc->size = size; memdesc->pagetable = pagetable; memdesc->ops = &kgsl_page_alloc_ops; @@ -648,26 +651,6 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, len -= page_size; } - /* Add the guard page to the end of the sglist */ - - if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU) { - /* - * It doesn't matter if we use GFP_ZERO here, this never - * gets mapped, and we only allocate it once in the life - * of the system - */ - - if (kgsl_guard_page == NULL) - kgsl_guard_page = alloc_page(GFP_KERNEL | __GFP_ZERO | - __GFP_HIGHMEM); - - if (kgsl_guard_page != NULL) { - sg_set_page(&memdesc->sg[sglen++], kgsl_guard_page, - PAGE_SIZE, 0); - memdesc->priv |= KGSL_MEMDESC_GUARD_PAGE; - } - } - memdesc->sglen = sglen; /* diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h index e31b8f3d88eac2e038c2603cf57d22c0e025c7da..c000cbb6df63552f1672478d332a43b3918dcbbb 100644 --- a/drivers/gpu/msm/kgsl_sharedmem.h +++ b/drivers/gpu/msm/kgsl_sharedmem.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved. +/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -19,6 +19,7 @@ #include "kgsl_mmu.h" #include <linux/slab.h> #include <linux/kmemleak.h> +#include <linux/iommu.h> #include "kgsl_log.h" @@ -200,15 +201,24 @@ kgsl_memdesc_has_guard_page(const struct kgsl_memdesc *memdesc) /* * kgsl_memdesc_protflags - get mmu protection flags * @memdesc - the memdesc - * Returns a mask of GSL_PT_PAGE* values based on the - * memdesc flags. + * Returns a mask of GSL_PT_PAGE* or IOMMU* values based + * on the memdesc flags. */ static inline unsigned int kgsl_memdesc_protflags(const struct kgsl_memdesc *memdesc) { - unsigned int protflags = GSL_PT_PAGE_RV; - if (!(memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY)) - protflags |= GSL_PT_PAGE_WV; + unsigned int protflags = 0; + enum kgsl_mmutype mmutype = kgsl_mmu_get_mmutype(); + + if (mmutype == KGSL_MMU_TYPE_GPU) { + protflags = GSL_PT_PAGE_RV; + if (!(memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY)) + protflags |= GSL_PT_PAGE_WV; + } else if (mmutype == KGSL_MMU_TYPE_IOMMU) { + protflags = IOMMU_READ; + if (!(memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY)) + protflags |= IOMMU_WRITE; + } return protflags; } @@ -253,8 +263,7 @@ kgsl_allocate(struct kgsl_memdesc *memdesc, ret = kgsl_sharedmem_page_alloc(memdesc, pagetable, size); if (ret) return ret; - ret = kgsl_mmu_map(pagetable, memdesc, - kgsl_memdesc_protflags(memdesc)); + ret = kgsl_mmu_map(pagetable, memdesc); if (ret) kgsl_sharedmem_free(memdesc); return ret; @@ -291,15 +300,4 @@ kgsl_allocate_contiguous(struct kgsl_memdesc *memdesc, size_t size) return ret; } -static inline int kgsl_sg_size(struct scatterlist *sg, int sglen) -{ - int i, size = 0; - struct scatterlist *s; - - for_each_sg(sg, s, sglen, i) { - size += s->length; - } - - return size; -} #endif /* __KGSL_SHAREDMEM_H */ diff --git a/drivers/gpu/msm/kgsl_snapshot.c b/drivers/gpu/msm/kgsl_snapshot.c index e20029453c45346da9067acc82d5eda363813031..50ac9c1bd3417fe32c4bb776d0dee30028a96176 100644 --- a/drivers/gpu/msm/kgsl_snapshot.c +++ b/drivers/gpu/msm/kgsl_snapshot.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2012, The Linux Foundation. All rights reserved. +/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -106,7 +106,12 @@ static int snapshot_context_info(int id, void *ptr, void *data) { struct kgsl_snapshot_linux_context *header = _ctxtptr; struct kgsl_context *context = ptr; - struct kgsl_device *device = context->dev_priv->device; + struct kgsl_device *device; + + if (context) + device = context->device; + else + device = (struct kgsl_device *)data; header->id = id; @@ -139,9 +144,12 @@ static int snapshot_os(struct kgsl_device *device, /* Figure out how many active contexts there are - these will * be appended on the end of the structure */ - rcu_read_lock(); + read_lock(&device->context_lock); idr_for_each(&device->context_idr, snapshot_context_count, &ctxtcount); - rcu_read_unlock(); + read_unlock(&device->context_lock); + + /* Increment ctxcount for the global memstore */ + ctxtcount++; size += ctxtcount * sizeof(struct kgsl_snapshot_linux_context); @@ -171,8 +179,9 @@ static int snapshot_os(struct kgsl_device *device, header->grpclk = kgsl_get_clkrate(pwr->grp_clks[0]); header->busclk = kgsl_get_clkrate(pwr->ebi1_clk); - /* Future proof for per-context timestamps */ - header->current_context = -1; + /* Save the last active context */ + kgsl_sharedmem_readl(&device->memstore, &header->current_context, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context)); /* Get the current PT base */ header->ptbase = kgsl_mmu_get_current_ptbase(&device->mmu); @@ -187,11 +196,17 @@ static int snapshot_os(struct kgsl_device *device, header->ctxtcount = ctxtcount; - /* append information for each context */ _ctxtptr = snapshot + sizeof(*header); - rcu_read_lock(); + + /* append information for the global context */ + snapshot_context_info(KGSL_MEMSTORE_GLOBAL, NULL, device); + + /* append information for each context */ + + read_lock(&device->context_lock); idr_for_each(&device->context_idr, snapshot_context_info, NULL); - rcu_read_unlock(); + read_unlock(&device->context_lock); + /* Return the size of the data segment */ return size; } @@ -286,7 +301,7 @@ static void kgsl_snapshot_put_object(struct kgsl_device *device, { list_del(&obj->node); - obj->entry->flags &= ~KGSL_MEM_ENTRY_FROZEN; + obj->entry->memdesc.priv &= ~KGSL_MEMDESC_FROZEN; kgsl_mem_entry_put(obj->entry); kfree(obj); @@ -317,6 +332,7 @@ int kgsl_snapshot_have_object(struct kgsl_device *device, unsigned int ptbase, return 0; } +EXPORT_SYMBOL(kgsl_snapshot_have_object); /* kgsl_snapshot_get_object - Mark a GPU buffer to be frozen * @device - the device that is being snapshotted @@ -336,6 +352,10 @@ int kgsl_snapshot_get_object(struct kgsl_device *device, unsigned int ptbase, struct kgsl_mem_entry *entry; struct kgsl_snapshot_object *obj; int offset; + int ret = -EINVAL; + + if (!gpuaddr) + return 0; entry = kgsl_get_mem_entry(device, ptbase, gpuaddr, size); @@ -349,7 +369,7 @@ int kgsl_snapshot_get_object(struct kgsl_device *device, unsigned int ptbase, if (entry->memtype != KGSL_MEM_ENTRY_KERNEL) { KGSL_DRV_ERR(device, "Only internal GPU buffers can be frozen\n"); - return -EINVAL; + goto err_put; } /* @@ -372,36 +392,33 @@ int kgsl_snapshot_get_object(struct kgsl_device *device, unsigned int ptbase, if (size + offset > entry->memdesc.size) { KGSL_DRV_ERR(device, "Invalid size for GPU buffer %8.8X\n", gpuaddr); - return -EINVAL; + goto err_put; } /* If the buffer is already on the list, skip it */ list_for_each_entry(obj, &device->snapshot_obj_list, node) { if (obj->gpuaddr == gpuaddr && obj->ptbase == ptbase) { - /* If the size is different, use the new size */ - if (obj->size != size) + /* If the size is different, use the bigger size */ + if (obj->size < size) obj->size = size; - - return 0; + ret = 0; + goto err_put; } } if (kgsl_memdesc_map(&entry->memdesc) == NULL) { KGSL_DRV_ERR(device, "Unable to map GPU buffer %X\n", gpuaddr); - return -EINVAL; + goto err_put; } obj = kzalloc(sizeof(*obj), GFP_KERNEL); if (obj == NULL) { KGSL_DRV_ERR(device, "Unable to allocate memory\n"); - return -EINVAL; + goto err_put; } - /* Ref count the mem entry */ - kgsl_mem_entry_get(entry); - obj->type = type; obj->entry = entry; obj->gpuaddr = gpuaddr; @@ -419,12 +436,15 @@ int kgsl_snapshot_get_object(struct kgsl_device *device, unsigned int ptbase, * 0 so it doesn't get counted twice */ - if (entry->flags & KGSL_MEM_ENTRY_FROZEN) - return 0; + ret = (entry->memdesc.priv & KGSL_MEMDESC_FROZEN) ? 0 + : entry->memdesc.size; - entry->flags |= KGSL_MEM_ENTRY_FROZEN; + entry->memdesc.priv |= KGSL_MEMDESC_FROZEN; - return entry->memdesc.size; + return ret; +err_put: + kgsl_mem_entry_put(entry); + return ret; } EXPORT_SYMBOL(kgsl_snapshot_get_object); diff --git a/drivers/gpu/msm/kgsl_sync.c b/drivers/gpu/msm/kgsl_sync.c index 8ee076d8e47fb462d8698a0e6d06f0d39bf09729..b74d4604d14ac22cff946d8205776ff1fda837ef 100644 --- a/drivers/gpu/msm/kgsl_sync.c +++ b/drivers/gpu/msm/kgsl_sync.c @@ -11,6 +11,7 @@ * */ +#include <linux/err.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/uaccess.h> @@ -225,3 +226,65 @@ void kgsl_sync_timeline_destroy(struct kgsl_context *context) { sync_timeline_destroy(context->timeline); } + +static void kgsl_sync_callback(struct sync_fence *fence, + struct sync_fence_waiter *waiter) +{ + struct kgsl_sync_fence_waiter *kwaiter = + (struct kgsl_sync_fence_waiter *) waiter; + kwaiter->func(kwaiter->priv); + sync_fence_put(kwaiter->fence); + kfree(kwaiter); +} + +struct kgsl_sync_fence_waiter *kgsl_sync_fence_async_wait(int fd, + void (*func)(void *priv), void *priv) +{ + struct kgsl_sync_fence_waiter *kwaiter; + struct sync_fence *fence; + int status; + + fence = sync_fence_fdget(fd); + if (fence == NULL) + return ERR_PTR(-EINVAL); + + /* create the waiter */ + kwaiter = kzalloc(sizeof(*kwaiter), GFP_KERNEL); + if (kwaiter == NULL) { + sync_fence_put(fence); + return ERR_PTR(-ENOMEM); + } + kwaiter->fence = fence; + kwaiter->priv = priv; + kwaiter->func = func; + sync_fence_waiter_init((struct sync_fence_waiter *) kwaiter, + kgsl_sync_callback); + + /* if status then error or signaled */ + status = sync_fence_wait_async(fence, + (struct sync_fence_waiter *) kwaiter); + if (status) { + kfree(kwaiter); + sync_fence_put(fence); + if (status < 0) + kwaiter = ERR_PTR(status); + else + kwaiter = NULL; + } + + return kwaiter; +} + +int kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_waiter *kwaiter) +{ + if (kwaiter == NULL) + return 0; + + if(sync_fence_cancel_async(kwaiter->fence, + (struct sync_fence_waiter *) kwaiter) == 0) { + sync_fence_put(kwaiter->fence); + kfree(kwaiter); + return 1; + } + return 0; +} diff --git a/drivers/gpu/msm/kgsl_sync.h b/drivers/gpu/msm/kgsl_sync.h index 06b3ad0d89188316ddb2d61ed99381f63b5cf171..2f28b21fc6dd84fb48736ed0a24a773e4dc56020 100644 --- a/drivers/gpu/msm/kgsl_sync.h +++ b/drivers/gpu/msm/kgsl_sync.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2012, The Linux Foundation. All rights reserved. +/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -26,6 +26,13 @@ struct kgsl_sync_pt { unsigned int timestamp; }; +struct kgsl_sync_fence_waiter { + struct sync_fence_waiter waiter; + struct sync_fence *fence; + void (*func)(void *priv); + void *priv; +}; + #if defined(CONFIG_SYNC) struct sync_pt *kgsl_sync_pt_create(struct sync_timeline *timeline, unsigned int timestamp); @@ -37,6 +44,9 @@ int kgsl_sync_timeline_create(struct kgsl_context *context); void kgsl_sync_timeline_signal(struct sync_timeline *timeline, unsigned int timestamp); void kgsl_sync_timeline_destroy(struct kgsl_context *context); +struct kgsl_sync_fence_waiter *kgsl_sync_fence_async_wait(int fd, + void (*func)(void *priv), void *priv); +int kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_waiter *waiter); #else static inline struct sync_pt *kgsl_sync_pt_create(struct sync_timeline *timeline, unsigned int timestamp) @@ -70,6 +80,20 @@ kgsl_sync_timeline_signal(struct sync_timeline *timeline, static inline void kgsl_sync_timeline_destroy(struct kgsl_context *context) { } + +static inline struct +kgsl_sync_fence_waiter *kgsl_sync_fence_async_wait(int fd, + void (*func)(void *priv), void *priv) +{ + return NULL; +} + +static inline int +kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_waiter *waiter) +{ + return 1; +} + #endif #endif /* __KGSL_SYNC_H */ diff --git a/drivers/gpu/msm/kgsl_trace.h b/drivers/gpu/msm/kgsl_trace.h index 8c62739eed995ffb1d64f81155ce5fb7ee92581a..b55075935db729f44f1a2db669bcbe5265b9d132 100644 --- a/drivers/gpu/msm/kgsl_trace.h +++ b/drivers/gpu/msm/kgsl_trace.h @@ -37,14 +37,13 @@ TRACE_EVENT(kgsl_issueibcmds, TP_PROTO(struct kgsl_device *device, int drawctxt_id, - struct kgsl_ibdesc *ibdesc, - int numibs, + struct kgsl_cmdbatch *cmdbatch, int timestamp, int flags, int result, unsigned int type), - TP_ARGS(device, drawctxt_id, ibdesc, numibs, timestamp, flags, + TP_ARGS(device, drawctxt_id, cmdbatch, timestamp, flags, result, type), TP_STRUCT__entry( @@ -61,8 +60,8 @@ TRACE_EVENT(kgsl_issueibcmds, TP_fast_assign( __assign_str(device_name, device->name); __entry->drawctxt_id = drawctxt_id; - __entry->ibdesc_addr = ibdesc[0].gpuaddr; - __entry->numibs = numibs; + __entry->ibdesc_addr = cmdbatch->ibdesc[0].gpuaddr; + __entry->numibs = cmdbatch->ibcount; __entry->timestamp = timestamp; __entry->flags = flags; __entry->result = result; @@ -479,6 +478,67 @@ TRACE_EVENT(kgsl_mem_free, ) ); +TRACE_EVENT(kgsl_mem_sync_cache, + + TP_PROTO(struct kgsl_mem_entry *mem_entry, unsigned int op), + + TP_ARGS(mem_entry, op), + + TP_STRUCT__entry( + __field(unsigned int, gpuaddr) + __field(unsigned int, size) + __array(char, usage, 16) + __field(unsigned int, tgid) + __field(unsigned int, id) + __field(unsigned int, op) + ), + + TP_fast_assign( + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + __entry->size = mem_entry->memdesc.size; + __entry->tgid = mem_entry->priv->pid; + __entry->id = mem_entry->id; + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->op = op; + ), + + TP_printk( + "gpuaddr=0x%08x size=%d tgid=%d usage=%s id=%d op=%c%c", + __entry->gpuaddr, __entry->size, __entry->tgid, __entry->usage, + __entry->id, + (__entry->op & KGSL_GPUMEM_CACHE_CLEAN) ? 'c' : '.', + (__entry->op & KGSL_GPUMEM_CACHE_INV) ? 'i' : '.' + ) +); + +TRACE_EVENT(kgsl_mem_sync_full_cache, + + TP_PROTO(unsigned int num_bufs, unsigned int bulk_size, + unsigned int op), + + TP_ARGS(num_bufs, bulk_size, op), + + TP_STRUCT__entry( + __field(unsigned int, num_bufs) + __field(unsigned int, bulk_size) + __field(unsigned int, op) + ), + + TP_fast_assign( + __entry->num_bufs = num_bufs; + __entry->bulk_size = bulk_size; + __entry->op = op; + ), + + TP_printk( + "num_bufs=%d bulk_size=%d op=%c%c", + __entry->num_bufs, __entry->bulk_size, + (__entry->op & KGSL_GPUMEM_CACHE_CLEAN) ? 'c' : '.', + (__entry->op & KGSL_GPUMEM_CACHE_INV) ? 'i' : '.' + ) +); + DECLARE_EVENT_CLASS(kgsl_mem_timestamp_template, TP_PROTO(struct kgsl_device *device, struct kgsl_mem_entry *mem_entry, @@ -591,6 +651,28 @@ TRACE_EVENT(kgsl_context_detach, ) ); +TRACE_EVENT(kgsl_context_destroy, + + TP_PROTO(struct kgsl_device *device, struct kgsl_context *context), + + TP_ARGS(device, context), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, id) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->id = context->id; + ), + + TP_printk( + "d_name=%s ctx=%u", + __get_str(device_name), __entry->id + ) +); + TRACE_EVENT(kgsl_mmu_pagefault, TP_PROTO(struct kgsl_device *device, unsigned int page, @@ -681,6 +763,30 @@ TRACE_EVENT(kgsl_fire_event, __entry->id, __entry->ts, __entry->type, __entry->age) ); +TRACE_EVENT(kgsl_active_count, + + TP_PROTO(struct kgsl_device *device, unsigned long ip), + + TP_ARGS(device, ip), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, count) + __field(unsigned long, ip) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->count = atomic_read(&device->active_cnt); + __entry->ip = ip; + ), + + TP_printk( + "d_name=%s active_cnt=%x func=%pf", + __get_str(device_name), __entry->count, (void *) __entry->ip + ) +); + #endif /* _KGSL_TRACE_H */ /* This part must be outside protection */ diff --git a/drivers/gpu/msm/z180.c b/drivers/gpu/msm/z180.c index c62f67b4a5c68fad7af6e29969e66091b8ccae88..9cebacec0484c3a2c5b2a0150af3ab8fce92c972 100644 --- a/drivers/gpu/msm/z180.c +++ b/drivers/gpu/msm/z180.c @@ -17,7 +17,6 @@ #include "kgsl.h" #include "kgsl_cffdump.h" #include "kgsl_sharedmem.h" -#include "kgsl_trace.h" #include "z180.h" #include "z180_reg.h" @@ -94,7 +93,8 @@ enum z180_cmdwindow_type { #define Z180_CMDWINDOW_TARGET_SHIFT 0 #define Z180_CMDWINDOW_ADDR_SHIFT 8 -static int z180_start(struct kgsl_device *device, unsigned int init_ram); +static int z180_init(struct kgsl_device *device); +static int z180_start(struct kgsl_device *device); static int z180_stop(struct kgsl_device *device); static int z180_wait(struct kgsl_device *device, struct kgsl_context *context, @@ -245,20 +245,17 @@ static int z180_setup_pt(struct kgsl_device *device, int result = 0; struct z180_device *z180_dev = Z180_DEVICE(device); - result = kgsl_mmu_map_global(pagetable, &device->mmu.setstate_memory, - GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); + result = kgsl_mmu_map_global(pagetable, &device->mmu.setstate_memory); if (result) goto error; - result = kgsl_mmu_map_global(pagetable, &device->memstore, - GSL_PT_PAGE_RV | GSL_PT_PAGE_WV); + result = kgsl_mmu_map_global(pagetable, &device->memstore); if (result) goto error_unmap_dummy; result = kgsl_mmu_map_global(pagetable, - &z180_dev->ringbuffer.cmdbufdesc, - GSL_PT_PAGE_RV); + &z180_dev->ringbuffer.cmdbufdesc); if (result) goto error_unmap_memstore; /* @@ -323,16 +320,11 @@ static void addcmd(struct z180_ringbuffer *rb, unsigned int timestamp, *p++ = ADDR_VGV3_LAST << 24; } -static void z180_cmdstream_start(struct kgsl_device *device, int init_ram) +static void z180_cmdstream_start(struct kgsl_device *device) { struct z180_device *z180_dev = Z180_DEVICE(device); unsigned int cmd = VGV3_NEXTCMD_JUMP << VGV3_NEXTCMD_NEXTCMD_FSHIFT; - if (init_ram) { - z180_dev->timestamp = 0; - z180_dev->current_timestamp = 0; - } - addmarker(&z180_dev->ringbuffer, 0); z180_cmdwindow_write(device, ADDR_VGV3_MODE, 4); @@ -362,7 +354,13 @@ static int room_in_rb(struct z180_device *device) return ts_diff < Z180_PACKET_COUNT; } -static int z180_idle(struct kgsl_device *device) +/** + * z180_idle() - Idle the 2D device + * @device: Pointer to the KGSL device struct for the Z180 + * + * wait until the z180 submission queue is idle + */ +int z180_idle(struct kgsl_device *device) { int status = 0; struct z180_device *z180_dev = Z180_DEVICE(device); @@ -382,10 +380,8 @@ static int z180_idle(struct kgsl_device *device) int z180_cmdstream_issueibcmds(struct kgsl_device_private *dev_priv, struct kgsl_context *context, - struct kgsl_ibdesc *ibdesc, - unsigned int numibs, - uint32_t *timestamp, - unsigned int ctrl) + struct kgsl_cmdbatch *cmdbatch, + uint32_t *timestamp) { long result = 0; unsigned int ofs = PACKETSIZE_STATESTREAM * sizeof(unsigned int); @@ -398,6 +394,20 @@ z180_cmdstream_issueibcmds(struct kgsl_device_private *dev_priv, struct kgsl_pagetable *pagetable = dev_priv->process_priv->pagetable; struct z180_device *z180_dev = Z180_DEVICE(device); unsigned int sizedwords; + unsigned int numibs; + struct kgsl_ibdesc *ibdesc; + + mutex_lock(&device->mutex); + + kgsl_active_count_get(device); + + if (cmdbatch == NULL) { + result = EINVAL; + goto error; + } + + ibdesc = cmdbatch->ibdesc; + numibs = cmdbatch->ibcount; if (device->state & KGSL_STATE_HUNG) { result = -EINVAL; @@ -439,7 +449,7 @@ z180_cmdstream_issueibcmds(struct kgsl_device_private *dev_priv, context->id, cmd, sizedwords); /* context switch */ if ((context->id != (int)z180_dev->ringbuffer.prevctx) || - (ctrl & KGSL_CONTEXT_CTX_SWITCH)) { + (cmdbatch->flags & KGSL_CONTEXT_CTX_SWITCH)) { KGSL_CMD_INFO(device, "context switch %d -> %d\n", context->id, z180_dev->ringbuffer.prevctx); kgsl_mmu_setstate(&device->mmu, pagetable, @@ -447,10 +457,13 @@ z180_cmdstream_issueibcmds(struct kgsl_device_private *dev_priv, cnt = PACKETSIZE_STATESTREAM; ofs = 0; } - kgsl_setstate(&device->mmu, + + result = kgsl_setstate(&device->mmu, KGSL_MEMSTORE_GLOBAL, kgsl_mmu_pt_get_flags(device->mmu.hwpagetable, device->id)); + if (result < 0) + goto error; result = wait_event_interruptible_timeout(device->wait_queue, room_in_rb(z180_dev), @@ -491,9 +504,12 @@ z180_cmdstream_issueibcmds(struct kgsl_device_private *dev_priv, z180_cmdwindow_write(device, ADDR_VGV3_CONTROL, cmd); z180_cmdwindow_write(device, ADDR_VGV3_CONTROL, 0); error: + kgsl_trace_issueibcmds(device, context->id, cmdbatch, + *timestamp, cmdbatch->flags, result, 0); - trace_kgsl_issueibcmds(device, context->id, ibdesc, numibs, - *timestamp, ctrl, result, 0); + kgsl_active_count_put(device); + + mutex_unlock(&device->mutex); return (int)result; } @@ -503,6 +519,7 @@ static int z180_ringbuffer_init(struct kgsl_device *device) struct z180_device *z180_dev = Z180_DEVICE(device); memset(&z180_dev->ringbuffer, 0, sizeof(struct z180_ringbuffer)); z180_dev->ringbuffer.prevctx = Z180_INVALID_CONTEXT; + z180_dev->ringbuffer.cmdbufdesc.flags = KGSL_MEMFLAGS_GPUREADONLY; return kgsl_allocate_contiguous(&z180_dev->ringbuffer.cmdbufdesc, Z180_RB_SIZE); } @@ -559,7 +576,17 @@ static int __devexit z180_remove(struct platform_device *pdev) return 0; } -static int z180_start(struct kgsl_device *device, unsigned int init_ram) +static int z180_init(struct kgsl_device *device) +{ + struct z180_device *z180_dev = Z180_DEVICE(device); + + z180_dev->timestamp = 0; + z180_dev->current_timestamp = 0; + + return 0; +} + +static int z180_start(struct kgsl_device *device) { int status = 0; @@ -576,7 +603,7 @@ static int z180_start(struct kgsl_device *device, unsigned int init_ram) if (status) goto error_clk_off; - z180_cmdstream_start(device, init_ram); + z180_cmdstream_start(device); mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT); kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON); @@ -661,7 +688,7 @@ static int z180_getproperty(struct kgsl_device *device, return status; } -static unsigned int z180_isidle(struct kgsl_device *device) +static bool z180_isidle(struct kgsl_device *device) { struct z180_device *z180_dev = Z180_DEVICE(device); @@ -822,9 +849,9 @@ static int z180_waittimestamp(struct kgsl_device *device, { int status = -EINVAL; - /* Don't wait forever, set a max (10 sec) value for now */ + /* Don't wait forever, set a max of Z180_IDLE_TIMEOUT */ if (msecs == -1) - msecs = 10 * MSEC_PER_SEC; + msecs = Z180_IDLE_TIMEOUT; mutex_unlock(&device->mutex); status = z180_wait(device, context, timestamp, msecs); @@ -858,11 +885,30 @@ static int z180_wait(struct kgsl_device *device, return status; } -static void -z180_drawctxt_destroy(struct kgsl_device *device, - struct kgsl_context *context) +struct kgsl_context * +z180_drawctxt_create(struct kgsl_device_private *dev_priv, + uint32_t *flags) { - struct z180_device *z180_dev = Z180_DEVICE(device); + int ret; + struct kgsl_context *context = kzalloc(sizeof(*context), GFP_KERNEL); + if (context == NULL) + return ERR_PTR(-ENOMEM); + ret = kgsl_context_init(dev_priv, context); + if (ret != 0) { + kfree(context); + return ERR_PTR(ret); + } + return context; +} + +static int +z180_drawctxt_detach(struct kgsl_context *context) +{ + struct kgsl_device *device; + struct z180_device *z180_dev; + + device = context->device; + z180_dev = Z180_DEVICE(device); z180_idle(device); @@ -872,6 +918,14 @@ z180_drawctxt_destroy(struct kgsl_device *device, kgsl_setstate(&device->mmu, KGSL_MEMSTORE_GLOBAL, KGSL_MMUFLAGS_PTUPDATE); } + + return 0; +} + +static void +z180_drawctxt_destroy(struct kgsl_context *context) +{ + kfree(context); } static void z180_power_stats(struct kgsl_device *device, @@ -926,6 +980,7 @@ static const struct kgsl_functable z180_functable = { .idle = z180_idle, .isidle = z180_isidle, .suspend_context = z180_suspend_context, + .init = z180_init, .start = z180_start, .stop = z180_stop, .getproperty = z180_getproperty, @@ -938,8 +993,10 @@ static const struct kgsl_functable z180_functable = { .irqctrl = z180_irqctrl, .gpuid = z180_gpuid, .irq_handler = z180_irq_handler, + .drain = z180_idle, /* drain == idle for the z180 */ /* Optional functions */ - .drawctxt_create = NULL, + .drawctxt_create = z180_drawctxt_create, + .drawctxt_detach = z180_drawctxt_detach, .drawctxt_destroy = z180_drawctxt_destroy, .ioctl = NULL, .postmortem_dump = z180_dump, diff --git a/drivers/gpu/msm/z180.h b/drivers/gpu/msm/z180.h index 268aac3efe686b2d1ba12a2740e0dd7240608df1..a36e92d864fd47c2e93b3a4c7203e72b346d63a1 100644 --- a/drivers/gpu/msm/z180.h +++ b/drivers/gpu/msm/z180.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-2012, The Linux Foundation. All rights reserved. +/* Copyright (c) 2008-2013, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -29,7 +29,7 @@ #define Z180_DEFAULT_PWRSCALE_POLICY NULL /* Wait a maximum of 10 seconds when trying to idle the core */ -#define Z180_IDLE_TIMEOUT (10 * 1000) +#define Z180_IDLE_TIMEOUT (20 * 1000) struct z180_ringbuffer { unsigned int prevctx; @@ -45,5 +45,6 @@ struct z180_device { }; int z180_dump(struct kgsl_device *, int); +int z180_idle(struct kgsl_device *); #endif /* __Z180_H */ diff --git a/drivers/gpu/msm/z180_postmortem.c b/drivers/gpu/msm/z180_postmortem.c index c1e5f07cf0897b0b70badf5483eaae4cadf24842..03ebdb572d8bcd13421808cdfefe68f486039c57 100644 --- a/drivers/gpu/msm/z180_postmortem.c +++ b/drivers/gpu/msm/z180_postmortem.c @@ -58,6 +58,8 @@ static void z180_dump_regs(struct kgsl_device *device) unsigned int i; unsigned int reg_val; + z180_idle(device); + KGSL_LOG_DUMP(device, "Z180 Register Dump\n"); for (i = 0; i < ARRAY_SIZE(regs_to_dump); i++) { kgsl_regread(device, @@ -168,6 +170,7 @@ static void z180_dump_ib(struct kgsl_device *device) KGSL_LOG_DUMP(device, "Could not map IB to kernel memory, Ringbuffer Slot: %d\n", rb_slot_num); + kgsl_mem_entry_put(entry); continue; } @@ -190,6 +193,7 @@ static void z180_dump_ib(struct kgsl_device *device) linebuf); } KGSL_LOG_DUMP(device, "IB Dump Finished\n"); + kgsl_mem_entry_put(entry); } } } diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 6200095bc5c404ef2220e02aa9376630241cdbeb..66bfac2458eb298a20d2ac1432f851b516be51bd 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -1,5 +1,4 @@ -/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved. - * +/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and * only version 2 as published by the Free Software Foundation. @@ -50,6 +49,9 @@ __asm__ __volatile__ ( \ #define MSM_IOMMU_ATTR_CACHED_WT 0x3 +static int msm_iommu_unmap_range(struct iommu_domain *domain, unsigned int va, + unsigned int len); + static inline void clean_pte(unsigned long *start, unsigned long *end, int redirect) { @@ -907,6 +909,7 @@ static int msm_iommu_map_range(struct iommu_domain *domain, unsigned int va, int prot) { unsigned int pa; + unsigned int start_va = va; unsigned int offset = 0; unsigned long *fl_table; unsigned long *fl_pte; @@ -978,12 +981,6 @@ static int msm_iommu_map_range(struct iommu_domain *domain, unsigned int va, chunk_offset = 0; sg = sg_next(sg); pa = get_phys_addr(sg); - if (pa == 0) { - pr_debug("No dma address for sg %p\n", - sg); - ret = -EINVAL; - goto fail; - } } continue; } @@ -1037,12 +1034,6 @@ static int msm_iommu_map_range(struct iommu_domain *domain, unsigned int va, chunk_offset = 0; sg = sg_next(sg); pa = get_phys_addr(sg); - if (pa == 0) { - pr_debug("No dma address for sg %p\n", - sg); - ret = -EINVAL; - goto fail; - } } } @@ -1055,6 +1046,8 @@ static int msm_iommu_map_range(struct iommu_domain *domain, unsigned int va, __flush_iotlb(domain); fail: mutex_unlock(&msm_iommu_lock); + if (ret && offset > 0) + msm_iommu_unmap_range(domain, start_va, offset); return ret; } diff --git a/drivers/iommu/msm_iommu_pagetable.c b/drivers/iommu/msm_iommu_pagetable.c index 2ee9ba6a27bdf6fbdccac31616cf343c2217c56b..34bbddc7caff64613dec8dbf2e8c741da3cfe81c 100644 --- a/drivers/iommu/msm_iommu_pagetable.c +++ b/drivers/iommu/msm_iommu_pagetable.c @@ -351,11 +351,6 @@ int msm_iommu_pagetable_map_range(struct iommu_pt *pt, unsigned int va, sl_offset = SL_OFFSET(va); chunk_pa = get_phys_addr(sg); - if (chunk_pa == 0) { - pr_debug("No dma address for sg %p\n", sg); - ret = -EINVAL; - goto fail; - } while (offset < len) { /* Set up a 2nd level page table if one doesn't exist */ @@ -399,12 +394,6 @@ int msm_iommu_pagetable_map_range(struct iommu_pt *pt, unsigned int va, chunk_offset = 0; sg = sg_next(sg); chunk_pa = get_phys_addr(sg); - if (chunk_pa == 0) { - pr_debug("No dma address for sg %p\n", - sg); - ret = -EINVAL; - goto fail; - } } } diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h index 29a44de41c7473d29cafdad0a49d0d2a753f76dd..0af811c86ab8f1dc6adfc473b4e4416ae73c1cf8 100644 --- a/include/linux/msm_kgsl.h +++ b/include/linux/msm_kgsl.h @@ -12,25 +12,27 @@ #define KGSL_VERSION_MINOR 14 /*context flags */ -#define KGSL_CONTEXT_SAVE_GMEM 0x00000001 -#define KGSL_CONTEXT_NO_GMEM_ALLOC 0x00000002 -#define KGSL_CONTEXT_SUBMIT_IB_LIST 0x00000004 -#define KGSL_CONTEXT_CTX_SWITCH 0x00000008 -#define KGSL_CONTEXT_PREAMBLE 0x00000010 -#define KGSL_CONTEXT_TRASH_STATE 0x00000020 -#define KGSL_CONTEXT_PER_CONTEXT_TS 0x00000040 -#define KGSL_CONTEXT_USER_GENERATED_TS 0x00000080 -#define KGSL_CONTEXT_END_OF_FRAME 0x00000100 -#define KGSL_CONTEXT_NO_FAULT_TOLERANCE 0x00000200 +#define KGSL_CONTEXT_SAVE_GMEM 0x00000001 +#define KGSL_CONTEXT_NO_GMEM_ALLOC 0x00000002 +#define KGSL_CONTEXT_SUBMIT_IB_LIST 0x00000004 +#define KGSL_CONTEXT_CTX_SWITCH 0x00000008 +#define KGSL_CONTEXT_PREAMBLE 0x00000010 +#define KGSL_CONTEXT_TRASH_STATE 0x00000020 +#define KGSL_CONTEXT_PER_CONTEXT_TS 0x00000040 +#define KGSL_CONTEXT_USER_GENERATED_TS 0x00000080 +#define KGSL_CONTEXT_END_OF_FRAME 0x00000100 + +#define KGSL_CONTEXT_NO_FAULT_TOLERANCE 0x00000200 +#define KGSL_CONTEXT_SYNC 0x00000400 /* bits [12:15] are reserved for future use */ -#define KGSL_CONTEXT_TYPE_MASK 0x01F00000 -#define KGSL_CONTEXT_TYPE_SHIFT 20 +#define KGSL_CONTEXT_TYPE_MASK 0x01F00000 +#define KGSL_CONTEXT_TYPE_SHIFT 20 -#define KGSL_CONTEXT_TYPE_ANY 0 -#define KGSL_CONTEXT_TYPE_GL 1 -#define KGSL_CONTEXT_TYPE_CL 2 -#define KGSL_CONTEXT_TYPE_C2D 3 -#define KGSL_CONTEXT_TYPE_RS 4 +#define KGSL_CONTEXT_TYPE_ANY 0 +#define KGSL_CONTEXT_TYPE_GL 1 +#define KGSL_CONTEXT_TYPE_CL 2 +#define KGSL_CONTEXT_TYPE_C2D 3 +#define KGSL_CONTEXT_TYPE_RS 4 #define KGSL_CONTEXT_INVALID 0xffffffff @@ -194,31 +196,6 @@ enum kgsl_property_type { KGSL_PROP_VERSION = 0x00000008, KGSL_PROP_GPU_RESET_STAT = 0x00000009, KGSL_PROP_PWRCTRL = 0x0000000E, - KGSL_PROP_FAULT_TOLERANCE = 0x00000011, -}; - -/* Fault Tolerance policy flags */ -#define KGSL_FT_DISABLE 0x00000001 -#define KGSL_FT_REPLAY 0x00000002 -#define KGSL_FT_SKIPIB 0x00000004 -#define KGSL_FT_SKIPFRAME 0x00000008 -#define KGSL_FT_DEFAULT_POLICY (KGSL_FT_REPLAY + KGSL_FT_SKIPIB) - -/* Pagefault policy flags */ -#define KGSL_FT_PAGEFAULT_INT_ENABLE 0x00000001 -#define KGSL_FT_PAGEFAULT_GPUHALT_ENABLE 0x00000002 -#define KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE 0x00000004 -#define KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT 0x00000008 -#define KGSL_FT_PAGEFAULT_DEFAULT_POLICY (KGSL_FT_PAGEFAULT_INT_ENABLE + \ - KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE) - -/* Fault tolerance config */ -struct kgsl_ft_config { - unsigned int ft_policy; /* Fault Tolerance policy flags */ - unsigned int ft_pf_policy; /* Pagefault policy flags */ - unsigned int ft_pm_dump; /* KGSL enable postmortem dump */ - unsigned int ft_detect_ms; - unsigned int ft_dos_timeout_ms; }; struct kgsl_shadowprop { @@ -234,6 +211,26 @@ struct kgsl_version { unsigned int dev_minor; }; +/* Performance counter groups */ + +#define KGSL_PERFCOUNTER_GROUP_CP 0x0 +#define KGSL_PERFCOUNTER_GROUP_RBBM 0x1 +#define KGSL_PERFCOUNTER_GROUP_PC 0x2 +#define KGSL_PERFCOUNTER_GROUP_VFD 0x3 +#define KGSL_PERFCOUNTER_GROUP_HLSQ 0x4 +#define KGSL_PERFCOUNTER_GROUP_VPC 0x5 +#define KGSL_PERFCOUNTER_GROUP_TSE 0x6 +#define KGSL_PERFCOUNTER_GROUP_RAS 0x7 +#define KGSL_PERFCOUNTER_GROUP_UCHE 0x8 +#define KGSL_PERFCOUNTER_GROUP_TP 0x9 +#define KGSL_PERFCOUNTER_GROUP_SP 0xA +#define KGSL_PERFCOUNTER_GROUP_RB 0xB +#define KGSL_PERFCOUNTER_GROUP_PWR 0xC +#define KGSL_PERFCOUNTER_GROUP_VBIF 0xD +#define KGSL_PERFCOUNTER_GROUP_VBIF_PWR 0xE + +#define KGSL_PERFCOUNTER_NOT_USED 0xFFFFFFFF + /* structure holds list of ibs */ struct kgsl_ibdesc { unsigned int gpuaddr; @@ -287,7 +284,7 @@ struct kgsl_device_waittimestamp_ctxtid { #define IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID \ _IOW(KGSL_IOC_TYPE, 0x7, struct kgsl_device_waittimestamp_ctxtid) -/* issue indirect commands to the GPU. +/* DEPRECATED: issue indirect commands to the GPU. * drawctxt_id must have been created with IOCTL_KGSL_DRAWCTXT_CREATE * ibaddr and sizedwords must specify a subset of a buffer created * with IOCTL_KGSL_SHAREDMEM_FROM_PMEM @@ -295,6 +292,9 @@ struct kgsl_device_waittimestamp_ctxtid { * timestamp is a returned counter value which can be passed to * other ioctls to determine when the commands have been executed by * the GPU. + * + * This fucntion is deprecated - consider using IOCTL_KGSL_SUBMIT_COMMANDS + * instead */ struct kgsl_ringbuffer_issueibcmds { unsigned int drawctxt_id; @@ -684,6 +684,202 @@ struct kgsl_gpumem_sync_cache { #define IOCTL_KGSL_GPUMEM_SYNC_CACHE \ _IOW(KGSL_IOC_TYPE, 0x37, struct kgsl_gpumem_sync_cache) +/** + * struct kgsl_perfcounter_get - argument to IOCTL_KGSL_PERFCOUNTER_GET + * @groupid: Performance counter group ID + * @countable: Countable to select within the group + * @offset: Return offset of the reserved counter + * + * Get an available performance counter from a specified groupid. The offset + * of the performance counter will be returned after successfully assigning + * the countable to the counter for the specified group. An error will be + * returned and an offset of 0 if the groupid is invalid or there are no + * more counters left. After successfully getting a perfcounter, the user + * must call kgsl_perfcounter_put(groupid, contable) when finished with + * the perfcounter to clear up perfcounter resources. + * + */ +struct kgsl_perfcounter_get { + unsigned int groupid; + unsigned int countable; + unsigned int offset; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_GET \ + _IOWR(KGSL_IOC_TYPE, 0x38, struct kgsl_perfcounter_get) + +/** + * struct kgsl_perfcounter_put - argument to IOCTL_KGSL_PERFCOUNTER_PUT + * @groupid: Performance counter group ID + * @countable: Countable to release within the group + * + * Put an allocated performance counter to allow others to have access to the + * resource that was previously taken. This is only to be called after + * successfully getting a performance counter from kgsl_perfcounter_get(). + * + */ +struct kgsl_perfcounter_put { + unsigned int groupid; + unsigned int countable; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_PUT \ + _IOW(KGSL_IOC_TYPE, 0x39, struct kgsl_perfcounter_put) + +/** + * struct kgsl_perfcounter_query - argument to IOCTL_KGSL_PERFCOUNTER_QUERY + * @groupid: Performance counter group ID + * @countable: Return active countables array + * @size: Size of active countables array + * @max_counters: Return total number counters for the group ID + * + * Query the available performance counters given a groupid. The array + * *countables is used to return the current active countables in counters. + * The size of the array is passed in so the kernel will only write at most + * size or counter->size for the group id. The total number of available + * counters for the group ID is returned in max_counters. + * If the array or size passed in are invalid, then only the maximum number + * of counters will be returned, no data will be written to *countables. + * If the groupid is invalid an error code will be returned. + * + */ +struct kgsl_perfcounter_query { + unsigned int groupid; + /* Array to return the current countable for up to size counters */ + unsigned int *countables; + unsigned int count; + unsigned int max_counters; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_QUERY \ + _IOWR(KGSL_IOC_TYPE, 0x3A, struct kgsl_perfcounter_query) + +/** + * struct kgsl_perfcounter_query - argument to IOCTL_KGSL_PERFCOUNTER_QUERY + * @groupid: Performance counter group IDs + * @countable: Performance counter countable IDs + * @value: Return performance counter reads + * @size: Size of all arrays (groupid/countable pair and return value) + * + * Read in the current value of a performance counter given by the groupid + * and countable. + * + */ + +struct kgsl_perfcounter_read_group { + unsigned int groupid; + unsigned int countable; + unsigned long long value; +}; + +struct kgsl_perfcounter_read { + struct kgsl_perfcounter_read_group *reads; + unsigned int count; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_READ \ + _IOWR(KGSL_IOC_TYPE, 0x3B, struct kgsl_perfcounter_read) +/* + * struct kgsl_gpumem_sync_cache_bulk - argument to + * IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK + * @id_list: list of GPU buffer ids of the buffers to sync + * @count: number of GPU buffer ids in id_list + * @op: a mask of KGSL_GPUMEM_CACHE_* values + * + * Sync the cache for memory headed to and from the GPU. Certain + * optimizations can be made on the cache operation based on the total + * size of the working set of memory to be managed. + */ +struct kgsl_gpumem_sync_cache_bulk { + unsigned int *id_list; + unsigned int count; + unsigned int op; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK \ + _IOWR(KGSL_IOC_TYPE, 0x3C, struct kgsl_gpumem_sync_cache_bulk) + +/* + * struct kgsl_cmd_syncpoint_timestamp + * @context_id: ID of a KGSL context + * @timestamp: GPU timestamp + * + * This structure defines a syncpoint comprising a context/timestamp pair. A + * list of these may be passed by IOCTL_KGSL_SUBMIT_COMMANDS to define + * dependencies that must be met before the command can be submitted to the + * hardware + */ +struct kgsl_cmd_syncpoint_timestamp { + unsigned int context_id; + unsigned int timestamp; +}; + +#define KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP 0 + +struct kgsl_cmd_syncpoint_fence { + int fd; +}; + +#define KGSL_CMD_SYNCPOINT_TYPE_FENCE 1 + +/** + * struct kgsl_cmd_syncpoint - Define a sync point for a command batch + * @type: type of sync point defined here + * @priv: Pointer to the type specific buffer + * @size: Size of the type specific buffer + * + * This structure contains pointers defining a specific command sync point. + * The pointer and size should point to a type appropriate structure. + */ +struct kgsl_cmd_syncpoint { + int type; + void __user *priv; + unsigned int size; +}; + +/** + * struct kgsl_submit_commands - Argument to IOCTL_KGSL_SUBMIT_COMMANDS + * @context_id: KGSL context ID that owns the commands + * @flags: + * @cmdlist: User pointer to a list of kgsl_ibdesc structures + * @numcmds: Number of commands listed in cmdlist + * @synclist: User pointer to a list of kgsl_cmd_syncpoint structures + * @numsyncs: Number of sync points listed in synclist + * @timestamp: On entry the a user defined timestamp, on exist the timestamp + * assigned to the command batch + * + * This structure specifies a command to send to the GPU hardware. This is + * similar to kgsl_issueibcmds expect that it doesn't support the legacy way to + * submit IB lists and it adds sync points to block the IB until the + * dependencies are satisified. This entry point is the new and preferred way + * to submit commands to the GPU. + */ + +struct kgsl_submit_commands { + unsigned int context_id; + unsigned int flags; + struct kgsl_ibdesc __user *cmdlist; + unsigned int numcmds; + struct kgsl_cmd_syncpoint __user *synclist; + unsigned int numsyncs; + unsigned int timestamp; +/* private: reserved for future use */ + unsigned int __pad[4]; +}; + +#define IOCTL_KGSL_SUBMIT_COMMANDS \ + _IOWR(KGSL_IOC_TYPE, 0x3D, struct kgsl_submit_commands) + #ifdef __KERNEL__ #ifdef CONFIG_MSM_KGSL_DRM int kgsl_gem_obj_addr(int drm_fd, int handle, unsigned long *start,