I create a csv file with one column of data named ("c0") and 20 values. When I run ANALYZE, an exception in thrown
import bayeslite
import bayeslite.crosscat
import numpy as np
import math
import random
import sys
from crosscat.MultiprocessingEngine import MultiprocessingEngine
from bayeslite.shell.pretty import pp_cursor
def pprint(cursor):
return pp_cursor(sys.stdout, cursor)
if __name__ == '__main__':
# create one column of data, save to data.csv, with header c0
t = 20
data = np.random.rand(t)
data = data.reshape(len(data),1)
np.savetxt('data.csv', data, header='c0', comments='')
btable = "table{}".format(t)
generator = "table{}_cc".format(t)
bdb = bayeslite.bayesdb_open()
engine = bayeslite.crosscat.CrosscatMetamodel(
MultiprocessingEngine())
bayeslite.bayesdb_register_metamodel(bdb, engine)
bayeslite.bayesdb_read_csv_file(bdb, btable, "data.csv",
header=True, create=True)
bql = '''
SELECT * FROM {}
'''.format(btable)
c = bdb.execute(bql)
pprint(c)
bql = '''
CREATE GENERATOR {} FOR {}
USING crosscat (
c0 NUMERICAL
);
'''.format(generator, btable)
bdb.execute(bql)
# exception thrown in the following call
bql = '''
INITIALIZE {} MODELS FOR {};
'''.format(10, generator)
bdb.execute(bql)
bql = '''
ANALYZE {} for {} ITERATIONS WAIT;
'''.format(generator, 10)
bdb.execute(bql)
bql = '''
CREATE TEMP TABLE simres AS
SIMULATE c0 FROM {}
LIMIT {};
'''.format(generator, 15)
bdb.execute(bql)
bql = 'SELECT * FROM simres;'
simdata = None
with bdb.savepoint():
c = bdb.execute(bql)
simdata = np.array(c.fetchall())
In [19]: run one_col.py
c0
---------------
0.21819395493
0.930373567089
0.725379439808
0.691447842751
0.261562572085
0.948943970262
0.46605176487
0.0151432877238
0.441854759811
0.665655889346
0.0765081395686
0.447978645136
0.825578309208
0.500403070452
0.658746843184
0.843358329166
0.248048357726
0.79623218477
0.526216988005
0.875729646947
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
/home/fsaad/Documents/pcp/bayeslite/experiments/one_col.py in <module>()
55 ANALYZE {} for {} ITERATIONS WAIT;
56 '''.format(generator, 10)
---> 57 bdb.execute(bql)
58
59 bql = '''
/usr/local/lib/python2.7/dist-packages/bayeslite-0.1.dev-py2.7.egg/bayeslite/bayesdb.pyc in execute(self, string, bindings)
149 if more:
150 raise ValueError('>1 phrase in string')
--> 151 return bql.execute_phrase(self, phrase, bindings)
152
153 def sql_execute(self, string, bindings=None):
/usr/local/lib/python2.7/dist-packages/bayeslite-0.1.dev-py2.7.egg/bayeslite/bql.pyc in execute_phrase(bdb, phrase, bindings)
553 max_seconds=phrase.seconds,
554 ckpt_iterations=phrase.ckpt_iterations,
--> 555 ckpt_seconds=phrase.ckpt_seconds)
556 return empty_cursor(bdb)
557
/usr/local/lib/python2.7/dist-packages/bayeslite-0.1.dev-py2.7.egg/bayeslite/crosscat.pyc in analyze_models(self, bdb, generator_id, modelnos, iterations, max_seconds, ckpt_iterations, ckpt_seconds)
588 X_L=X_L_list,
589 X_D=X_D_list,
--> 590 n_steps=n_steps,
591 )
592 if iterations is not None:
/usr/local/lib/python2.7/dist-packages/CrossCat-0.1.8-py2.7-linux-x86_64.egg/crosscat/LocalEngine.pyc in analyze(self, M_c, T, X_L, X_D, kernel_list, n_steps, c, r, max_iterations, max_time, do_diagnostics, diagnostics_every_N, ROW_CRP_ALPHA_GRID, COLUMN_CRP_ALPHA_GRID, S_GRID, MU_GRID, N_GRID, do_timing, CT_KERNEL)
267 diagnostics_dict = munge_diagnostics(diagnostics_dict_list)
268 if reprocess_diagnostics_func is not None:
--> 269 diagnostics_dict = reprocess_diagnostics_func(diagnostics_dict)
270 ret_tuple = ret_tuple + (diagnostics_dict, )
271 if do_timing:
/usr/local/lib/python2.7/dist-packages/CrossCat-0.1.8-py2.7-linux-x86_64.egg/crosscat/utils/diagnostic_utils.pyc in default_reprocess_diagnostics_func(diagnostics_arr_dict)
50 # column_paritition_assignments are column, iter, chain
51 D = column_partition_assignments.shape[0] - 1
---> 52 f_z_statistic_0_1 = column_partition_assignments_to_f_z_statistic(column_partition_assignments, 1, 0)
53 f_z_statistic_0_D = column_partition_assignments_to_f_z_statistic(column_partition_assignments, D, 0)
54 diagnostics_arr_dict['f_z[0, 1]'] = f_z_statistic_0_1
/usr/local/lib/python2.7/dist-packages/CrossCat-0.1.8-py2.7-linux-x86_64.egg/crosscat/utils/diagnostic_utils.pyc in column_partition_assignments_to_f_z_statistic(column_partition_assignments, j, i)
43 iter_column_chain_arr = column_partition_assignments.transpose((1, 0, 2))
44 helper = lambda column_chain_arr: column_chain_to_ratio(column_chain_arr, j, i)
---> 45 as_list = map(helper, iter_column_chain_arr)
46 return numpy.array(as_list)[:, numpy.newaxis]
47
/usr/local/lib/python2.7/dist-packages/CrossCat-0.1.8-py2.7-linux-x86_64.egg/crosscat/utils/diagnostic_utils.pyc in <lambda>(column_chain_arr)
42 j, i=0):
43 iter_column_chain_arr = column_partition_assignments.transpose((1, 0, 2))
---> 44 helper = lambda column_chain_arr: column_chain_to_ratio(column_chain_arr, j, i)
45 as_list = map(helper, iter_column_chain_arr)
46 return numpy.array(as_list)[:, numpy.newaxis]
/usr/local/lib/python2.7/dist-packages/CrossCat-0.1.8-py2.7-linux-x86_64.egg/crosscat/utils/diagnostic_utils.pyc in column_chain_to_ratio(column_chain_arr, j, i)
32
33 def column_chain_to_ratio(column_chain_arr, j, i=0):
---> 34 chain_i_j = column_chain_arr[[i, j], :]
35 is_same = numpy.diff(chain_i_j, axis=0)[0] == 0
36 n_chains = len(is_same)
IndexError: index 1 is out of bounds for axis 0 with size 1