#include <stdio.h>
#include <assert.h>
#include <inttypes.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
#include <CL/cl.h>
#include <omp.h>
#define MAXGPU 3
#define MAXN 1024
#define MAXM 32
#define MAXMID 32
#define CheckErr(status) { gpuAssert((status), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, int abort=true) {
    if (code != cudaSuccess) {
        fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
        if (abort) exit(code);
    }
}
uint32_t hostMtx[MAXM][MAXN*MAXN];
uint32_t hostMid[MAXGPU][MAXN*MAXN];
int N = MAXN, M, Q;
int clNeedDevCnt = 3; 
__global__ void matrixMul(uint32_t A[], uint32_t B[], uint32_t C[], int N) {
    int r = blockIdx.x * blockDim.x + threadIdx.x;
    int x = r / N, y = r % N;
    uint32_t sum = 0;
    for (int i = 0; i < N; i++)
        sum += A[x*N + i] * B[i*N + y];
    C[x * N + y] = sum;
}
__global__ void matrixAdd(uint32_t A[], uint32_t B[], uint32_t C[]) {
    int r = blockIdx.x * blockDim.x + threadIdx.x;
    C[r] = A[r] + B[r];
}
void matrix_multiply(uint32_t *cuMtxA, uint32_t *cuMtxB, uint32_t *cuMtxC) {
    int localSz = 1;
    for (int i = 1; i <= 1024; i++) {
        if (N*N % i == 0)
            localSz = i;
    }
    dim3 cuBlock(localSz);
    dim3 cuGrid(N*N/localSz);
    matrixMul<<<cuGrid, cuBlock>>>(cuMtxA, cuMtxB, cuMtxC, N);
    CheckErr(cudaGetLastError());
}
void matrix_add(uint32_t *cuMtxA, uint32_t *cuMtxB, uint32_t *cuMtxC) {
    int localSz = 1;
    for (int i = 1; i <= 1024; i++) {
        if (N*N % i == 0)
            localSz = i;
    }
    dim3 cuBlock(localSz);
    dim3 cuGrid(N*N/localSz);
    matrixAdd<<<cuGrid, cuBlock>>>(cuMtxA, cuMtxB, cuMtxC);
    CheckErr(cudaGetLastError());
}
char expr[1024];
typedef struct Node {
    struct Node *l, *r;
    int opcode;
    uint32_t *hostV, *cuV;
    int regNeed, regId;
    int pid, mid;
    long long h;
} Node;
void replaceReg(Node *u, int a, int b) {
    if (u->l == NULL)	return ;
    if (u->regId == a)
        u->regId = b;
    replaceReg(u->l, a, b);
    replaceReg(u->r, a, b);
}
void updateNode(Node *u, Node *l, Node *r, int opcode) {
    u->l = l, u->r = r, u->opcode = opcode;
    if (opcode == '+') {
        u->h = u->l->h + u->r->h + N;
        
        if (u->l->regNeed == u->r->regNeed) {
            u->regNeed = u->l->regNeed + 1;
            u->regId = u->regNeed;
            replaceReg(u->r, u->r->regId, u->regId);
        } else {
            u->regNeed = u->l->regNeed > u->r->regNeed ? u->l->regNeed : u->r->regNeed;
            u->regId = u->regNeed;
        }
    } else if (opcode == '*') {
        u->h = u->l->h + u->r->h + N*N;
        
        if (abs(u->l->regNeed - u->r->regNeed) == 1) {
            u->regNeed = u->l->regNeed + 1;
            u->regId = u->regNeed;
        } else if (u->l->regNeed == u->r->regNeed) {
            u->regNeed = u->l->regNeed + 2;
            u->regId = u->regNeed;
            replaceReg(u->r, u->r->regId, u->regId-1);
        } else {
            u->regNeed = u->l->regNeed > u->r->regNeed ? u->l->regNeed : u->r->regNeed;
            u->regId = u->regNeed;
            int a, b;
            if (u->l->regId == u->regId) {
                a = u->l->regId, b = u->l->regId-1;
                replaceReg(u->l, a, -1);
                replaceReg(u->l, b, a);
                replaceReg(u->l, -1, b);
            } else {
                a = u->r->regId, b = u->r->regId-1;
                replaceReg(u->r, a, -1);
                replaceReg(u->r, b, a);
                replaceReg(u->r, -1, b);
            }
        }
    }
    assert(u->regId < MAXMID);
}
Node* parseExpr(int l, int r, char expr[], int procId) {
    Node *u = (Node *) calloc(1, sizeof(Node));
    u->pid = procId;
    if (l == r) {
        int idx = expr[l] - 'A';
        u->mid = idx;
        u->h = 0;
        return u;
    }
    int cnt = 0;
    for (int i = l; i <= r; i++) {
        if (expr[i] == '(') {
            cnt++;
        } else if (expr[i] == ')') {
            cnt--;
        } else if (expr[i] == '+' && cnt == 0) {
            updateNode(u, parseExpr(l, i-1, expr, procId), parseExpr(i+1, r, expr, procId), '+');
            return u;
        }
    }
    for (int i = l; i <= r; i++) {
        if (expr[i] == '(') {
            if (cnt == 0 && i != l) {
                updateNode(u, parseExpr(l, i-1, expr, procId), parseExpr(i, r, expr, procId), '*');
                return u;
            }
            cnt++;
        } else if (expr[i] == ')') {
            cnt--;
        } else if (expr[i] >= 'A' && expr[i] <= 'Z' && cnt == 0 && i != l) {
            updateNode(u, parseExpr(l, i-1, expr, procId), parseExpr(i, r, expr, procId), '*');
            return u;
        }
    }
    free(u);
    return parseExpr(l+1, r-1, expr, procId);
}
uint32_t writeOut(uint32_t *hostC) {
    uint32_t h = 0;
    uint32_t *Cend = hostC + N*N, *C = hostC;
    for (; C != Cend; C++)
        h = (h + *C) * 2654435761LU;
    return h;
}
uint32_t *cuMemMid[MAXGPU][MAXMID];
uint32_t *cuMemIn[MAXGPU][MAXM];
void memRelocation(Node *u, int did, Node *nodes[], int *offset) {
    if (u->l == NULL) {
        nodes[*offset] = u, (*offset)++;
        return ;
    }
    u->cuV = cuMemMid[did][u->regId];
    if (u->l->regNeed > u->r->regNeed) {
        memRelocation(u->l, did, nodes, offset);
        memRelocation(u->r, did, nodes, offset);
    } else {
        memRelocation(u->r, did, nodes, offset);
        memRelocation(u->l, did, nodes, offset);
    }
    fprintf(stderr, "reg%d = %s ", u->regId, u->opcode == '+' ? "add" : "mul");
    if (u->l->l == NULL)	fprintf(stderr, "%c ", u->l->mid + 'A');
    else					fprintf(stderr, "reg%d ", u->l->regId);
    if (u->r->l == NULL)	fprintf(stderr, "%c\n", u->r->mid + 'A');
    else					fprintf(stderr, "reg%d\n", u->r->regId);
    nodes[*offset] = u, (*offset)++;
    return ;
}
int executeGPU(Node *workQue[][128], int workQueSz[], uint32_t resultBuff[]) {
    Node* nodes[MAXGPU][128];
    int offset[MAXGPU] = {};
    uint32_t memSz = N*N*sizeof(uint32_t);
    int memDeploy[MAXGPU][MAXM] = {};
    int regDeploy[MAXGPU][MAXMID] = {};
    
#pragma omp parallel for
    for (int p = 0; p < clNeedDevCnt; p++) {
        cudaSetDevice(p);
        for (int q = 0; q < workQueSz[p]; q++) {
            
            offset[p] = 0;
            memRelocation(workQue[p][q], p, nodes[p], &offset[p]);
            
            for (int i = 0; i < offset[p]; i++) {
                Node *u = nodes[p][i];
                
                if (u->l == NULL) {
                    if (!memDeploy[p][u->mid]) {
                        cudaMalloc((void **) &cuMemIn[p][u->mid], memSz);
                        cudaMemcpy(cuMemIn[p][u->mid], hostMtx[u->mid], memSz, cudaMemcpyHostToDevice); 
                        memDeploy[p][u->mid] = 1;
                    }
                    u->cuV = cuMemIn[p][u->mid];
                    continue;
                }
                
                if (!regDeploy[p][u->regId])
                    cudaMalloc((void **) &cuMemMid[p][u->regId], memSz);
                if (u->cuV == NULL)	
                    u->cuV = cuMemMid[p][u->regId];
                if (u->opcode == '*')
                    matrix_multiply(u->l->cuV, u->r->cuV, u->cuV);	
                else
                    matrix_add(u->l->cuV, u->r->cuV, u->cuV);
            }
            
            Node *root = workQue[p][q];
            fprintf(stderr, "register need %d\n", root->regNeed);
            cudaMemcpy(hostMid[p], root->cuV, memSz, cudaMemcpyDeviceToHost);
            uint32_t ret = writeOut(hostMid[p]);
            resultBuff[root->pid] = ret;
            
            for (int i = 0; i < offset[p]; i++) {
                Node *u = nodes[p][i];
                if (u->l != NULL && u->hostV)
                    free(u->hostV);
                free(u);
            }
        }
        
        cudaSetDevice(p);
        for (int i = 0; i < MAXMID; i++) {
            cudaFree(cuMemMid[p][i]);
        }
        for (int i = 0; i < M; i++) {
            cudaFree(cuMemIn[p][i]);
        }
    }
    return 1;
}
int readIn() {
    if (scanf("%s", expr) != 1)
        return 0;
    return 1;
}
int balance_cmp(const void *a, const void *b) {
    Node *x = *(Node **) a;
    Node *y = *(Node **) b;
    if (x->h == y->h)	return 0;
    if (x->h < y->h)	return 1;
    return -1;
}
void onStart() {
    int S[64];
    assert(scanf("%d %d", &M, &N) == 2);
    for (int i = 0; i < M; i++)
        assert(scanf("%d", &S[i]) == 1);
#pragma omp parallel for
    for (int p = 0; p < M; p++) {
        uint32_t x = 2, n = N*N;
        uint32_t c = S[p];
        for (int i = 0; i < N; i++) {
            for (int j = 0; j < N; j++) {
                x = (x * x + c + i + j)%n;
                hostMtx[p][i*N+j] = x;
            }
        }
    }
    Node *procBuff[128];
    if (scanf("%d", &Q) != 1)
        return ;
    for (int i = 0; i < Q; i++) {
        readIn();
        int expr_len = strlen(expr);
        procBuff[i] = parseExpr(0, expr_len-1, expr, i);
    }
    qsort(procBuff, Q, sizeof(Node*), balance_cmp);
    float gpuSpeed[MAXGPU] = {1.f, 1.8f, 2.0f};
    long long workload[MAXGPU] = {};
    int workQueSz[MAXGPU] = {};
    uint32_t resultBuff[MAXGPU] = {};
    Node *workQue[MAXGPU][128];
    for (int i = 0; i < Q; i++) {
        int mn = 0;
        for (int j = 0; j < clNeedDevCnt; j++) {
            if (workload[j]*gpuSpeed[j] < workload[mn]*gpuSpeed[mn])
                mn = j;
        }
        workload[mn] += procBuff[i]->h;
        workQue[mn][workQueSz[mn]++] = procBuff[i];
    }
    executeGPU(workQue, workQueSz, resultBuff);
    for (int i = 0; i < Q; i++)
        printf("%u\n", resultBuff[i]);
}
int main(int argc, char *argv[]) {
    onStart();
    return 0;
}