from pyqpanda import *
from pyqpanda import exp as vexp
from pyqpanda import log as vlog
from scipy.optimize import minimize
from functools import partial
from matplotlib.pyplot import *
import sklearn.datasets as datasets
import numpy as np
import random
import math
import copy
import time
p = 0
MAX_ROW = 4
MAX_COLUMN = 4
class state:
def __init__(self, row, column):
self.r = row
self.c = column
def __eq__(self, other):
if(self.r == other.r and self.c == other.c):
return True
else:
return False
class rstate(state):
def __init__(self, row, column, reward):
state.__init__(self, row, column)
self.rw = reward
terminalset = [state(3, 3)]
abyssset = [state(0, 3), state(1, 1), state(1, 2), state(2, 1), state(2, 2)]
def reward(currentstate):
if currentstate in terminalset:
return 1
elif currentstate in abyssset:
return -1
else:
return 0
def end_or_not(currentstate):
if (currentstate in terminalset) or (currentstate in abyssset):
return True
else:
return False
def epsilon_greedy(currentstate, epsilon, Q):
global MAX_ROW
global MAX_COLUMN
global rewardset
choice = 0
ran = np.random.random()
if(ran < epsilon):
choice = np.random.permutation(4)[0]
else:
set = []
for i in range(4):
set.append({'order': i + 1, 'value': Q[i]})
np.random.shuffle(set)
choice = max(set, key=lambda x: x['value'])['order']
if(choice == 1 and currentstate.r > 0):
nextstate = state(currentstate.r - 1, currentstate.c)
elif(choice == 2 and currentstate.c < MAX_COLUMN - 1):
nextstate = state(currentstate.r, currentstate.c + 1)
elif(choice == 3 and currentstate.r < MAX_ROW - 1):
nextstate = state(currentstate.r + 1, currentstate.c)
elif(choice == 4 and currentstate.c > 0):
nextstate = state(currentstate.r, currentstate.c - 1)
else:
nextstate = state(currentstate.r, currentstate.c)
return nextstate, choice
def new_experience(exp_set, currentstate, nextstate, choice):
reward_value = reward(nextstate)
end_or_not_value = end_or_not(nextstate)
exp_set.append([currentstate, nextstate, reward_value, end_or_not_value, choice])
def sample_from_exp_set(exp_set, num):
if len(exp_set) <= num:
sample_set = exp_set
else:
sample_set = random.sample(exp_set, num)
return sample_set
def initial_state(qubitlist, currentstate):
vqc = VariationalQuantumCircuit()
bi_str = bin(currentstate.r + 4)[3:] + bin(currentstate.c + 4)[3:]
for i in range(len(bi_str)):
# a = math.pi * int(bi_str[i])
vqc.insert(VariationalQuantumGate_RY(qubitlist[i], var(math.pi * int(bi_str[i]))))
return vqc
def unitary_local(qubitlist, theta_y, theta_z):
vqc = VariationalQuantumCircuit()
for i in range(len(qubitlist)):
vqc.insert(VariationalQuantumGate_RY(qubitlist[i], theta_y[i])) \
.insert(VariationalQuantumGate_RZ(qubitlist[i], theta_z[i]))
return vqc
def unitary_entangling(qubitlist):
vqc = VariationalQuantumCircuit()
for i in range(len(qubitlist) - 1):
vqc.insert(VariationalQuantumGate_CZ(qubitlist[i], qubitlist[i + 1]))
vqc.insert(VariationalQuantumGate_CZ(qubitlist[0], qubitlist[len(qubitlist) - 1]))
return vqc
def classifier_circuit(qubitlist, currentstate, depth, theta_y, theta_z):
vqc = VariationalQuantumCircuit()
vqc.insert(initial_state(qubitlist, currentstate)) \
.insert(unitary_local(qubitlist, theta_y[0], theta_z[0]))
for i in range(1, depth):
vqc.insert(unitary_entangling(qubitlist)) \
.insert(unitary_local(qubitlist, theta_y[i], theta_z[i]))
return vqc
def q_meas(vqc, Z_str, machine, qubitlist):
expection_v = qop(vqc, PauliOperator({Z_str : 1}), machine, qubitlist)
return expection_v
def Q_2(sample, gamma, qubit_num, depth, theta_y, theta_z, machine):
if sample[3] == True:
Q = sample[2]
else:
qubitlist_2 = machine.qAlloc_many(qubit_num)
vqc_2 = classifier_circuit(qubitlist_2, sample[1], depth, theta_y, theta_z)
expection_set = []
for i in range(4):
Z_str = 'Z' + str(i)
expection_v = q_meas(vqc_2, Z_str, machine, qubitlist_2)
expection_c = eval(expection_v, True)
expection_set.append(expection_c[0][0])
machine.qFree_all(qubitlist_2)
Q_set = expection_set
np.random.shuffle(Q_set)
Q = sample[2] + gamma * max(Q_set)
return Q
def naturedqn(qubit_num, depth, epsilon):
count = 1
gamma = 0.9
# epsilon = 0.1
t = 20
sample_num = 5
learning_rate = 0.3
coefficient = 0.9
period = 5
exp_set = []
theta_y = 2 * math.pi * np.random.random((depth, qubit_num))
theta_z = 2 * math.pi * np.random.random((depth, qubit_num))
theta_y_2 = theta_y
theta_z_2 = theta_z
theta_y_3 = theta_y
theta_z_3 = theta_z
machine = init_quantum_machine(QMachineType.CPU_SINGLE_THREAD)
for i in range(t):
currentstate = state(0, 0)
while not ((currentstate in terminalset) or (currentstate in abyssset)):
theta_y_3_v = var(theta_y_3)
theta_z_3_v = var(theta_z_3)
qubitlist_3 = machine.qAlloc_many(qubit_num)
vqc_3 = classifier_circuit(qubitlist_3, currentstate, depth, theta_y_3_v, theta_z_3_v)
expection_set = []
for i in range(4):
Z_str = 'Z' + str(i)
expection_v = q_meas(vqc_3, Z_str, machine, qubitlist_3)
expection_c = eval(expection_v, True)
expection_set.append(expection_c[0][0])
machine.qFree_all(qubitlist_3)
Q_set = expection_set
nextstate, choice = epsilon_greedy(currentstate, epsilon, Q_set)
new_experience(exp_set, currentstate, nextstate, choice)
currentstate = nextstate
sample_set = sample_from_exp_set(exp_set, sample_num)
theta_y_v = var(theta_y)
theta_z_v = var(theta_z)
theta_y_2_v = var(theta_y_2)
theta_z_2_v = var(theta_z_2)
grad = {theta_y_v : np.zeros((depth, qubit_num)), theta_z_v : np.zeros((depth, qubit_num))}
velocity = {theta_y_v : np.zeros((depth, qubit_num)), theta_z_v : np.zeros((depth, qubit_num))}
Q1_set = []
Q2_set = []
loss_v = var(0)
qubitlist = machine.qAlloc_many(qubit_num)
for j in range(len(sample_set)):
vqc = classifier_circuit(qubitlist, sample_set[j][0], depth, theta_y_v, theta_z_v)
Z_str = 'Z' + str(sample_set[j][4] - 1)
Q1_set.append(q_meas(vqc, Z_str, machine, qubitlist))
Q2_set.append(Q_2(sample_set[j], gamma, qubit_num, depth, theta_y_2_v, theta_z_2_v, machine))
for j in range(len(sample_set)):
loss_v = loss_v + (Q1_set[j] - Q2_set[j]) * (Q1_set[j] - Q2_set[j])
loss_v = loss_v / len(sample_set)
loss_c = eval(loss_v, True)
print(count,':', loss_c)
count += 1
back(expression(loss_v), grad)
# print(grad)
for variable in grad:
raw_value = variable.get_value()
velocity[variable] = - grad[variable] * learning_rate + velocity[variable] * coefficient
new_value = raw_value + velocity[variable]
variable.set_value(new_value)
theta_y = eval(theta_y_v, True)
theta_z = eval(theta_z_v, True)
theta_y_3 = theta_y
theta_z_3 = theta_z
del grad
del velocity
if i % period == 0:
theta_y_2 = theta_y
theta_z_2 = theta_z
machine.qFree_all(qubitlist)
print(theta_y)
print(theta_z)
destroy_quantum_machine(machine)
return theta_y, theta_z
def strategy(currentstate, qubit_num, depth, epsilon, theta_y, theta_z):
machine = init_quantum_machine(QMachineType.CPU_SINGLE_THREAD)
qubitlist = machine.qAlloc_many(qubit_num)
machine = init_quantum_machine(QMachineType.CPU_SINGLE_THREAD)
qubitlist = machine.qAlloc_many(qubit_num)
vqc = classifier_circuit(qubitlist, currentstate, depth, var(theta_y), var(theta_z))
expection_set = []
for i in range(4):
Z_str = 'Z' + str(i)
expection_v = q_meas(vqc, Z_str, machine, qubitlist)
expection_c = eval(expection_v, True)
expection_set.append(expection_c)
machine.qFree_all(qubitlist)
destroy_quantum_machine(machine)
Q_set = expection_set
nextstate, choice = epsilon_greedy(currentstate, epsilon, Q_set)
machine.qFree_all(qubitlist)
destroy_quantum_machine(machine)
return nextstate
qubit_num = 4
depth = 8
epsilon = 0.1
theta_y, theta_z = naturedqn(qubit_num, depth, epsilon)
test_state_set = [state(0,0), state(1,0), state(2,0), state(3,0), state(3,1), state(3,2), state(1,3)]
for i in range(len(test_state_set)):
nextstate = strategy(test_state_set[i], qubit_num, depth, epsilon, theta_y, theta_z)
print([test_state_set[i].r, test_state_set[i].c], ':', [nextstate.r, nextstate.c])