kanaries / rath Goto Github PK
View Code? Open in Web Editor NEWNext generation of automated data exploratory analysis and visualization platform.
Home Page: https://kanaries.net
License: GNU Affero General Public License v3.0
Next generation of automated data exploratory analysis and visualization platform.
Home Page: https://kanaries.net
License: GNU Affero General Public License v3.0
effect_modifiers=effect_modifiers, # causes of outcome other than treatment
graph=graph,
evaluate_effect_strength=True,
graph = constructPAG(fields, causalModel)
print('treat:', treatment)
results = []
def testModel(results, model):
# model.view_model()
estimand = model.identify_effect(proceed_when_unidentifiable=True)
methods = {
'psm': 'backdoor.propensity_score_matching',
'pss': 'backdoor.propensity_score_stratification',
'psw': 'backdoor.propensity_score_weighting',
'lr': 'backdoor.linear_regression',
'glm': 'backdoor.generalized_linear_model',
'iv': 'iv.instrumental_variable',
'iv/rd': 'iv.regression_discontinuity'
}
tmp = lambda df: satisfy(df, groups.current)
satCurrent, satOther = satisfy(dataSource, groups.current), satisfy(dataSource, groups.other)
method = 'lr'
if methods[method].startswith('backdoor.propensity_score_'):
for treat in model._treatment:
filters = [f for f in groups.current.predicates if f.fid == treat]
tmp = IDoWhy.IRInsightExplainSubspace(predicates=filters)
model._data = model._data.assign(**{treat: satisfy(transData, tmp) })
estimate = model.estimate_effect(
estimand,
methods[method],
target_units=lambda df: inferDiff(satCurrent, satOther), # satisfy(self.dataSource, groups.current), satisfy(df, groups.other)),
# evaluate_effect_strength=True,
)
results.append(IDoWhy.LinkInfo(
src=f.fid,
tar=measures[0].fid,
src_type=2,
tar_type=1,
description=IDoWhy.LinkInfoDescription(key='', data={'estimate': str(estimate)}),
responsibility=significance_value(estimate.value, var=1.)
))
# TODO: params
if estimate.value > 0:
print("f===========", f.fid)
print("target_units=\n", dataSource[tmp(transData)])
print('unobserved f = ', f, '\n', estimate)
for e in adj[f_ind[measures[0].fid]]:
if e['src_type'] in [-1, 2]:
# TODO:
pass
# General: use origin graph
# Fallback: without graph, any variable can be used as common_cause
for f in fields:
if f.fid not in dimensions and f.fid not in [f.fid for f in measures]:
# common_causes = [f.fid]
# effect_modifiers = [f.fid]
effect_modifiers = [f.fid]
# TODO: if edges in graph
model = dowhy.CausalModel(
data=transData,
# treatment=[d for d in dimensions if flipped or not compare(current.get(d, None), other.get(d, None))],
common_causes=[f.fid],
treatment=treatment,
outcome=[measures[0].fid],
# instruments=[], # Z, causes of treatment, no confounding for the effect of Z on outcome
# effect_modifiers=effect_modifiers, # causes of outcome other than treatment
# graph=graph,
identify_vars=True
)
testModel(results, model)
return results
def significance_value(x: float, var: float=1.):
import scipy.stats as st
"""
x (float): X - EX
var (float): σ(X)
"""
print("x = ", x)
print("norm cdf =", st.norm.cdf(abs(x)))
return 2 * st.norm.cdf(abs(x), scale=var) - 1
def ExplainData(props: IDoWhy.IRInsightExplainProps) -> tp.List[IDoWhy.IRInsightExplainResult]:
session = ExplainDataSession(props.data, props.fields)
session.g_gml = constructPAG(props.fields, props.causalModel)
session.updateModel(props.view.dimensions, props.view.measures, props.groups)
session.identitifyEstimand()
session.estimateEffect(props.groups)
results = []
try:
descrip_data = {
'data': inferInfo(session),
'target estimand': session.estimate.target_estimand.__str__(),
'realized estimand': session.estimate.realized_estimand_expr,
'target units': session.estimate.estimator.target_units_tostr() if hasattr(session.estimate, "estimator") else None,
'mean value of estimation': session.estimate.value,
'effect estimates': session.estimate.cate_estimates if hasattr(session.estimate, "cate_estimates") else None,
}
if hasattr(session.estimate, "estimator"):
if session.estimate.estimator._significance_test:
descrip_data['p-value'] = session.estimate.test_stat_significance()
# session.estimate.estimator.signif_results_tostr(session.estimate.test_stat_significance())
if session.estimate.estimator._confidence_intervals:
descrip_data['confidence interval'], [session.estimate.estimator.confidence_level,
session.estimate.get_confidence_intervals()]
if session.estimate.conditional_estimates is not None:
descrip_data['conditional estimates'] = str(session.estimate.conditional_estimates)
if session.estimate.effect_strength is not None:
descrip_data['change in outcome attributable to treatment'] = session.estimate.effect_strength["fraction-effect"]
print("descrip_data=", descrip_data)
descrip_data['desc_by'] = 'ExplainData'
results.append(IDoWhy.LinkInfo(
src=props.view.dimensions[0], tar=props.view.measures[0].fid, src_type=-1, tar_type=1,
description=IDoWhy.LinkInfoDescription(key='', data=descrip_data),
responsibility=significance_value(session.estimate.value, var=1.)
))
except Exception as e:
print(str(e), file=sys.stderr)
results.extend(explainData(props))
# print("results =", results)
return IDoWhy.IRInsightExplainResult(
causalEffects=results
type InfoArrayType = keyof DateTimeInfoArray
export type DateTimeInfoType = keyof DateTimeInfo
export function parseDateTimeArray(dateTime: string[]): DateTimeInfoArray {
// TODO: [refactor] Polyfills: 中文格式等
// TODO: [feat] assume the same dateTime format or support different format in one column
let infoArray = {} as DateTimeInfoArray
let reg_id: number | undefined, max_cnt = 0;
for (let i = 0; i < analyzer.rules.length; ++i) {
eslint-disable-next-line no-console
import { Checkbox, DefaultButton, DetailsList, Dropdown, IColumn, Icon, Label, Pivot, PivotItem, SelectionMode, Spinner } from "@fluentui/react";
import produce from "immer";
import { observer } from "mobx-react-lite";
import { nanoid } from "nanoid";
import { forwardRef, useCallback, useEffect, useImperativeHandle, useMemo, useRef, useState } from "react";
import styled from "styled-components";
import type { IFieldMeta } from "../../interfaces";
import { useGlobalStore } from "../../store";
import { execPredict, IPredictProps, IPredictResult, PredictAlgorithm, PredictAlgorithms, TrainTestSplitFlag } from "./predict";
const Container = styled.div`
flex-grow: 1;
flex-shrink: 1;
display: flex;
flex-direction: column;
overflow: hidden;
> .content {
flex-grow: 1;
flex-shrink: 1;
display: flex;
flex-direction: column;
padding: 0.5em;
overflow: auto;
> * {
flex-grow: 0;
flex-shrink: 0;
}
}
`;
const TableContainer = styled.div`
flex-grow: 0;
flex-shrink: 0;
overflow: auto;
`;
const Row = styled.div<{ selected: 'attribution' | 'target' | false }>`
> div {
background-color: ${({ selected }) => (
selected === 'attribution' ? 'rgba(194,132,2,0.2)' : selected === 'target' ? 'rgba(66,121,242,0.2)' : undefined
)};
filter: ${({ selected }) => selected ? 'unset' : 'opacity(0.8)'};
cursor: pointer;
:hover {
filter: unset;
}
}
`;
const ModeOptions = [
{ key: 'classification', text: '分类' },
{ key: 'regression', text: '回归' },
] as const;
// FIXME: 防止切到别的流程时预测结果被清空,先在全局存一下,决定好要不要保留 && 状态应该存哪里以后及时迁走
const predictCache: {
id: string; algo: PredictAlgorithm; startTime: number; completeTime: number; data: IPredictResult;
}[] = [];
const PredictPanel = forwardRef<{
updateInput?: (input: { features: IFieldMeta[]; targets: IFieldMeta[] }) => void;
}, {}>(function PredictPanel (_, ref) {
const { causalStore, dataSourceStore } = useGlobalStore();
const { selectedFields } = causalStore;
const { cleanedData, fieldMetas } = dataSourceStore;
const [predictInput, setPredictInput] = useState<{ features: IFieldMeta[]; targets: IFieldMeta[] }>({
features: [],
targets: [],
});
const [algo, setAlgo] = useState<PredictAlgorithm>('decisionTree');
const [mode, setMode] = useState<IPredictProps['mode']>('classification');
useImperativeHandle(ref, () => ({
updateInput: input => setPredictInput(input),
}));
useEffect(() => {
setPredictInput(before => {
if (before.features.length || before.targets.length) {
return {
features: selectedFields.filter(f => before.features.some(feat => feat.fid === f.fid)),
targets: selectedFields.filter(f => before.targets.some(tar => tar.fid === f.fid)),
};
}
return {
features: selectedFields.slice(1).map(f => f),
targets: selectedFields.slice(0, 1),
};
});
}, [selectedFields]);
const [running, setRunning] = useState(false);
const fieldsTableCols = useMemo<IColumn[]>(() => {
return [
{
key: 'selectedAsFeature',
name: `特征 (${predictInput.features.length} / ${selectedFields.length})`,
onRender: (item) => {
const field = item as IFieldMeta;
const checked = predictInput.features.some(f => f.fid === field.fid);
return (
<Checkbox
checked={checked}
disabled={running}
onChange={(_, ok) => {
if (running) {
return;
}
setPredictInput(produce(predictInput, draft => {
draft.features = draft.features.filter(f => f.fid !== field.fid);
draft.targets = draft.targets.filter(f => f.fid !== field.fid);
if (ok) {
draft.features.push(field);
}
}));
}}
/>
);
},
isResizable: false,
minWidth: 90,
maxWidth: 90,
},
{
key: 'selectedAsTarget',
name: `目标 (${predictInput.targets.length} / ${selectedFields.length})`,
onRender: (item) => {
const field = item as IFieldMeta;
const checked = predictInput.targets.some(f => f.fid === field.fid);
return (
<Checkbox
checked={checked}
disabled={running}
onChange={(_, ok) => {
if (running) {
return;
}
setPredictInput(produce(predictInput, draft => {
draft.features = draft.features.filter(f => f.fid !== field.fid);
draft.targets = draft.targets.filter(f => f.fid !== field.fid);
if (ok) {
draft.targets.push(field);
}
}));
}}
/>
);
},
isResizable: false,
minWidth: 90,
maxWidth: 90,
},
{
key: 'name',
name: '因素',
onRender: (item) => {
const field = item as IFieldMeta;
return (
<span style={{ overflow: 'hidden', textOverflow: 'ellipsis' }}>
{field.name || field.fid}
</span>
);
},
minWidth: 120,
},
];
}, [selectedFields, predictInput, running]);
const canExecute = predictInput.features.length > 0 && predictInput.targets.length > 0;
const pendingRef = useRef<Promise<unknown>>();
useEffect(() => {
pendingRef.current = undefined;
setRunning(false);
}, [predictInput]);
const dataSourceRef = useRef(cleanedData);
dataSourceRef.current = cleanedData;
const allFieldsRef = useRef(fieldMetas);
allFieldsRef.current = fieldMetas;
const [results, setResults] = useState<{
id: string; algo: PredictAlgorithm; startTime: number; completeTime: number; data: IPredictResult;
}[]>([]);
// FIXME: 防止切到别的流程时预测结果被清空,先在全局存一下,决定好要不要保留 && 状态应该存哪里以后及时迁走
useEffect(() => {
setResults(predictCache);
return () => {
setResults(res => {
predictCache.splice(0, Infinity, ...res);
return [];
});
};
}, [cleanedData, fieldMetas]);
const [tab, setTab] = useState<'config' | 'result'>('config');
const trainTestSplitIndices = useMemo<TrainTestSplitFlag[]>(() => {
const TRAIN_RATE = 0.2;
const indices = cleanedData.map((_, i) => i);
const trainSetIndices = new Map<number, 1>();
const trainSetTargetSize = Math.floor(cleanedData.length * TRAIN_RATE);
while (trainSetIndices.size < trainSetTargetSize && indices.length) {
const [index] = indices.splice(Math.floor(indices.length * Math.random()), 1);
trainSetIndices.set(index, 1);
}
return cleanedData.map((_, i) => trainSetIndices.has(i) ? TrainTestSplitFlag.train : TrainTestSplitFlag.test);
}, [cleanedData]);
const trainTestSplitIndicesRef = useRef(trainTestSplitIndices);
trainTestSplitIndicesRef.current = trainTestSplitIndices;
const handleClickExec = useCallback(() => {
const startTime = Date.now();
setRunning(true);
const task = execPredict({
dataSource: dataSourceRef.current,
fields: allFieldsRef.current,
model: {
algorithm: algo,
features: predictInput.features.map(f => f.fid),
targets: predictInput.targets.map(f => f.fid),
},
trainTestSplitIndices: trainTestSplitIndicesRef.current,
mode,
});
pendingRef.current = task;
task.then(res => {
if (task === pendingRef.current && res) {
const completeTime = Date.now();
setResults(list => {
const record = {
id: nanoid(8),
algo,
startTime,
completeTime,
data: res,
};
if (list.length > 0 && list[0].algo !== algo) {
return [record];
}
return list.concat([record]);
});
setTab('result');
}
}).finally(() => {
pendingRef.current = undefined;
setRunning(false);
});
}, [predictInput, algo, mode]);
const sortedResults = useMemo(() => {
return results.slice(0).sort((a, b) => b.completeTime - a.completeTime);
}, [results]);
const [comparison, setComparison] = useState<null | [string] | [string, string]>(null);
useEffect(() => {
setComparison(group => {
if (!group) {
return null;
}
const next = group.filter(id => results.some(rec => rec.id === id));
if (next.length === 0) {
return null;
}
return next as [string] | [string, string];
});
}, [results]);
const resultTableCols = useMemo<IColumn[]>(() => {
return [
{
key: 'selected',
name: '对比',
onRender: (item) => {
const record = item as typeof sortedResults[number];
const selected = (comparison ?? [] as string[]).includes(record.id);
return (
<Checkbox
checked={selected}
onChange={(_, checked) => {
if (checked) {
setComparison(group => {
if (group === null) {
return [record.id];
}
return [group[0], record.id];
});
} else if (selected) {
setComparison(group => {
if (group?.some(id => id === record.id)) {
return group.length === 1 ? null : group.filter(id => id !== record.id) as [string];
}
return null;
});
}
}}
/>
);
},
isResizable: false,
minWidth: 30,
maxWidth: 30,
},
{
key: 'index',
name: '运行次数',
minWidth: 70,
maxWidth: 70,
isResizable: false,
onRender(_, index) {
return <>{index !== undefined ? (sortedResults.length - index) : ''}</>;
},
},
{
key: 'algo',
name: '预测模型',
minWidth: 70,
onRender(item) {
const record = item as typeof sortedResults[number];
return <>{PredictAlgorithms.find(which => which.key === record.algo)?.text}</>
},
},
{
key: 'accuracy',
name: '准确率',
minWidth: 150,
onRender(item, index) {
if (!item || index === undefined) {
return <></>;
}
const record = item as typeof sortedResults[number];
const previous = sortedResults[index + 1];
const comparison: 'better' | 'worse' | 'same' | null = previous ? (
previous.data.accuracy === record.data.accuracy ? 'same'
: record.data.accuracy > previous.data.accuracy ? 'better' : 'worse'
) : null;
return (
<span
style={{
color: {
better: '#0b5a08',
worse: '#6e0811',
same: '#7a7574',
}[comparison!],
display: 'flex',
alignItems: 'center',
}}
>
{comparison && (
<Icon
iconName={{
better: 'CaretSolidUp',
worse: 'CaretSolidDown',
same: 'ChromeMinimize',
}[comparison]}
style={{
transform: 'scale(0.8)',
transformOrigin: '0 50%',
marginRight: '0.2em',
}}
/>
)}
{record.data.accuracy}
</span>
);
},
},
];
}, [sortedResults, comparison]);
const diff = useMemo(() => {
if (comparison?.length === 2) {
const before = sortedResults.find(res => res.id === comparison[0]);
const after = sortedResults.find(res => res.id === comparison[1]);
if (before && after) {
const temp: unknown[] = [];
for (let i = 0; i < before.data.result.length; i += 1) {
const row = dataSourceRef.current[before.data.result[i][0]];
const prev = before.data.result[i][1];
const next = after.data.result[i][1];
if (next === 1 && prev === 0) {
temp.push(Object.fromEntries(Object.entries(row).map(([k, v]) => [
allFieldsRef.current.find(f => f.fid === k)?.name ?? k,
v,
])));
}
}
return temp;
}
}
}, [sortedResults, comparison]);
useEffect(() => {
if (diff) {
// TODO: 在界面上实现一个 diff view,代替这个 console
// eslint-disable-next-line no-console
console.table(diff);
}
}, [diff]);
return (
<Container>
<DefaultButton
primary
iconProps={{ iconName: 'Trending12' }}
disabled={!canExecute || running}
onClick={running ? undefined : handleClickExec}
onRenderIcon={() => running ? <Spinner style={{ transform: 'scale(0.75)' }} /> : <Icon iconName="Play" />}
style={{ width: 'max-content', flexGrow: 0, flexShrink: 0, marginLeft: '0.6em' }}
split
menuProps={{
items: ModeOptions.map(opt => opt),
onItemClick: (_e, item) => {
if (item) {
setMode(item.key as typeof mode);
}
},
}}
>
{`${ModeOptions.find(m => m.key === mode)?.text}预测`}
</DefaultButton>
<Pivot
selectedKey={tab}
onLinkClick={(item) => {
item && setTab(item.props.itemKey as typeof tab);
}}
style={{ marginTop: '0.5em' }}
>
<PivotItem itemKey="config" headerText="模型设置" />
<PivotItem itemKey="result" headerText="预测结果" />
</Pivot>
<div className="content">
{{
config: (
<>
<Dropdown
label="模型选择"
options={PredictAlgorithms.map(algo => ({ key: algo.key, text: algo.text }))}
selectedKey={algo}
onChange={(_, option) => {
const item = PredictAlgorithms.find(which => which.key === option?.key);
if (item) {
setAlgo(item.key);
}
}}
style={{ width: 'max-content' }}
/>
<Label style={{ marginTop: '1em' }}>分析空间</Label>
<TableContainer>
<DetailsList
items={selectedFields}
columns={fieldsTableCols}
selectionMode={SelectionMode.none}
onRenderRow={(props, defaultRender) => {
const field = props?.item as IFieldMeta;
const checkedAsAttr = predictInput.features.some(f => f.fid === field.fid);
const checkedAsTar = predictInput.targets.some(f => f.fid === field.fid);
return (
<Row selected={checkedAsAttr ? 'attribution' : checkedAsTar ? 'target' : false}>
{defaultRender?.(props)}
</Row>
);
}}
/>
</TableContainer>
</>
),
result: (
<>
<DefaultButton
iconProps={{ iconName: 'Delete' }}
disabled={results.length === 0}
onClick={() => setResults([])}
style={{ width: 'max-content' }}
>
清空记录
</DefaultButton>
<TableContainer>
<DetailsList
items={sortedResults}
columns={resultTableCols}
selectionMode={SelectionMode.none}
/>
</TableContainer>
</>
),
}[tab]}
</div>
</Container>
);
});
export default observer(PredictPanel);
allow user-specified filter for the data view.
when the elements are small. view size can be controlled by stepsize.
when the elements (influencing axis) are large, view size should be controlled by fixed width and height.
anc = []
S = []
G_fd = set()
# for (u, v) in background_knowledge.required_rules_specs:
# src, dest = NodeId.get(u.get_name(), None), NodeId.get(v.get_name(), None)
# if src is None:
# src = NodeId[u.get_name()] = cur_id
# FDNodes.append(u)
# G_fd.add(u.get_attribute('id'))
# adj.append(set())
# anc.append(set())
# attr_id.append(u.get_attribute('id'))
# cur_id += 1
# if dest is None:
# dest = NodeId[v.get_name()] = cur_id
# FDNodes.append(v)
# G_fd.add(v.get_attribute('id'))
# adj.append(set())
# anc.append(set())
# attr_id.append(v.get_attribute('id'))
# cur_id += 1
# adj[src].add(dest)
# anc[dest].add(src)
"""
NodeId: Dict[int, int] 原始图中对应点的局域编号
FDNode: List[int]: 在Gfd中的causallearn格式的graphnodes,全局编号
attr_id: Gfd中每个点在原始图中对应的点编号
adj, anc: Gfd的邻接表
G_fd: Gfd中的点集,原图编号
"""
for dep in functional_dependencies:
if len(dep.params) == 1: # TODO: dep.fid depends only on dep.params[0]:
param, f = dep.params[0].fid, dep.fid
u, v = f_ind[dep.params[0].fid], f_ind[dep.fid]
src, dest = NodeId.get(u, None), NodeId.get(v, None)
if src is None:
src = NodeId[u] = cur_id
node = FCI.GraphNode(f"X{u+1}")
node.add_attribute('id', u)
G_fd.add(u), adj.append(set()), anc.append(set()), FDNodes.append(node)
attr_id.append(u)
cur_id += 1
if dest is None:
dest = NodeId[v] = cur_id
node = FCI.GraphNode(f"X{v+1}")
node.add_attribute('id', v)
G_fd.add(v), adj.append(set()), anc.append(set()), FDNodes.append(node)
attr_id.append(v)
cur_id += 1
adj[src].add(dest)
anc[dest].add(src)
else:
# TODO: should be treated the same as bgKnowledge
pass
topo = toposort(adj)
fake_knowledge = BackgroundKnowledge()
skeleton_knowledge = set()
for t in topo[::-1]:
mxvcnt, y = 0, -1
for a in anc[t]:
print("a = ", a, attr_id[a])
vcnt = np.unique(dataset[:, attr_id[a]]).size
if vcnt > mxvcnt:
y = a
mxvcnt = vcnt
if y == -1: continue
# S.append((attr_id[t], attr_id[y]))
# fake_knowledge.add_required_by_node(FDNodes[t], FDNodes[y])
fake_knowledge.add_required_by_node(FDNodes[y], FDNodes[t])
skeleton_knowledge.add((attr_id[y], attr_id[t]))
# remove X and connected edges from G_FD
G_fd.remove(attr_id[t])
for a in anc[t]:
adj[a].remove(t)
GfdNodes = []
for i, v in enumerate(G_fd):
node = FCI.GraphNode(f"X{v + 1}")
node.add_attribute("id", v)
GfdNodes.append(node)
FDgraph, FD_sep_sets = FCI.fas(dataset, GfdNodes, independence_test_method=independence_test_method, alpha=alpha,
knowledge=None, depth=depth, verbose=verbose)
After installing all packages and dependencies, running yarn workspace backend dev
throws error:
Rath/node_modules/visual-insights/build/esm"' has no exported member 'getInsightViews'.
import { getInsightViews } from 'visual-insights';
Rath/packages/rath-client/src/components/fieldFilter/setSelection.tsx
Lines 16 to 24 in 7148167
filter for nominal (currently non-quantitative) column
Rath/packages/visual-insights/src/insights/subspaces.ts
Lines 33 to 38 in 168bb0e
todo
comment in 168bb0e. It's been assigned to @ObservedObserver because they committed the code.Currently designed notebook
breaks the continuity of the algorithm. The result of each steps of the algorithm has to be transport between front end and backend, which might be a potential cost of traffic and efficiency.
It also improve the cost for front end to maintain the state of the algorithm. It requires the front end to adjust the algorithm's output for next stage which make the computation logic separated maintained in different places.
Suggestions:
dev模块内存溢出,此时explore模块和dashboard模块都可以正常工作。
排查一下内存溢出的原因
console.log(app_path)
Line 2 in e6a6406
const { app, BrowserWindow } = require('electron')
// TODO: test todo bot
const path = require('path');
// const app_path = path.resolve(__dirname, './build/index.html')
// console.log(app_path)
connection configs (such as host, port, user) of clickhouse are fixed in the connector. It needs to support dynamic config for both proxy server and clickhouse server.
For visualization dark theme, we are going to replace opacity
channel with lightness
in hsl color space. There might be shadow and default opacity for this design.
view data using const data view from default recommendation. when users set a customized aggregator, the data view is not changed.
graph = constructPAG(fields, causalModel)
print('treat:', treatment)
results = []
def testModel(results, model):
# model.view_model()
estimand = model.identify_effect(proceed_when_unidentifiable=True)
methods = {
'psm': 'backdoor.propensity_score_matching',
'pss': 'backdoor.propensity_score_stratification',
'psw': 'backdoor.propensity_score_weighting',
'lr': 'backdoor.linear_regression',
'glm': 'backdoor.generalized_linear_model',
'iv': 'iv.instrumental_variable',
'iv/rd': 'iv.regression_discontinuity'
}
tmp = lambda df: satisfy(df, groups.current)
satCurrent, satOther = satisfy(dataSource, groups.current), satisfy(dataSource, groups.other)
method = 'lr'
if methods[method].startswith('backdoor.propensity_score_'):
for treat in model._treatment:
filters = [f for f in groups.current.predicates if f.fid == treat]
tmp = IDoWhy.IRInsightExplainSubspace(predicates=filters)
model._data = model._data.assign(**{treat: satisfy(transData, tmp) })
estimate = model.estimate_effect(
estimand,
methods[method],
target_units=lambda df: inferDiff(satCurrent, satOther), # satisfy(self.dataSource, groups.current), satisfy(df, groups.other)),
# evaluate_effect_strength=True,
)
results.append(IDoWhy.LinkInfo(
src=f.fid,
tar=measures[0].fid,
src_type=2,
tar_type=1,
description=IDoWhy.LinkInfoDescription(key='', data={'estimate': str(estimate)}),
responsibility=significance_value(estimate.value, var=1.)
))
# TODO: params
if estimate.value > 0:
print("f===========", f.fid)
print("target_units=\n", dataSource[tmp(transData)])
print('unobserved f = ', f, '\n', estimate)
for e in adj[f_ind[measures[0].fid]]:
if e['src_type'] in [-1, 2]:
# TODO:
pass
# General: use origin graph
# Fallback: without graph, any variable can be used as common_cause
for f in fields:
if f.fid not in dimensions and f.fid not in [f.fid for f in measures]:
# common_causes = [f.fid]
# effect_modifiers = [f.fid]
effect_modifiers = [f.fid]
# TODO: if edges in graph
model = dowhy.CausalModel(
data=transData,
# treatment=[d for d in dimensions if flipped or not compare(current.get(d, None), other.get(d, None))],
common_causes=[f.fid],
treatment=treatment,
outcome=[measures[0].fid],
# instruments=[], # Z, causes of treatment, no confounding for the effect of Z on outcome
# effect_modifiers=effect_modifiers, # causes of outcome other than treatment
# graph=graph,
identify_vars=True
)
testModel(results, model)
return results
def significance_value(x: float, var: float=1.):
import scipy.stats as st
"""
x (float): X - EX
var (float): σ(X)
"""
print("x = ", x)
print("norm cdf =", st.norm.cdf(abs(x)))
return 2 * st.norm.cdf(abs(x), scale=var) - 1
def ExplainData(props: IDoWhy.IRInsightExplainProps) -> tp.List[IDoWhy.IRInsightExplainResult]:
session = ExplainDataSession(props.data, props.fields)
session.g_gml = constructPAG(props.fields, props.causalModel)
session.updateModel(props.view.dimensions, props.view.measures, props.groups)
session.identitifyEstimand()
session.estimateEffect(props.groups)
results = []
try:
descrip_data = {
'data': inferInfo(session),
'target estimand': session.estimate.target_estimand.__str__(),
'realized estimand': session.estimate.realized_estimand_expr,
'target units': session.estimate.estimator.target_units_tostr() if hasattr(session.estimate, "estimator") else None,
'mean value of estimation': session.estimate.value,
'effect estimates': session.estimate.cate_estimates if hasattr(session.estimate, "cate_estimates") else None,
}
if hasattr(session.estimate, "estimator"):
if session.estimate.estimator._significance_test:
descrip_data['p-value'] = session.estimate.test_stat_significance()
# session.estimate.estimator.signif_results_tostr(session.estimate.test_stat_significance())
if session.estimate.estimator._confidence_intervals:
descrip_data['confidence interval'], [session.estimate.estimator.confidence_level,
session.estimate.get_confidence_intervals()]
if session.estimate.conditional_estimates is not None:
descrip_data['conditional estimates'] = str(session.estimate.conditional_estimates)
if session.estimate.effect_strength is not None:
descrip_data['change in outcome attributable to treatment'] = session.estimate.effect_strength["fraction-effect"]
print("descrip_data=", descrip_data)
descrip_data['desc_by'] = 'ExplainData'
results.append(IDoWhy.LinkInfo(
src=props.view.dimensions[0], tar=props.view.measures[0].fid, src_type=-1, tar_type=1,
description=IDoWhy.LinkInfoDescription(key='', data=descrip_data),
responsibility=significance_value(session.estimate.value, var=1.)
))
except Exception as e:
print(str(e), file=sys.stderr)
results.extend(explainData(props))
# print("results =", results)
return IDoWhy.IRInsightExplainResult(
causalEffects=results
anc = []
S = []
G_fd = set()
# for (u, v) in background_knowledge.required_rules_specs:
# src, dest = NodeId.get(u.get_name(), None), NodeId.get(v.get_name(), None)
# if src is None:
# src = NodeId[u.get_name()] = cur_id
# FDNodes.append(u)
# G_fd.add(u.get_attribute('id'))
# adj.append(set())
# anc.append(set())
# attr_id.append(u.get_attribute('id'))
# cur_id += 1
# if dest is None:
# dest = NodeId[v.get_name()] = cur_id
# FDNodes.append(v)
# G_fd.add(v.get_attribute('id'))
# adj.append(set())
# anc.append(set())
# attr_id.append(v.get_attribute('id'))
# cur_id += 1
# adj[src].add(dest)
# anc[dest].add(src)
"""
NodeId: Dict[int, int] 原始图中对应点的局域编号
FDNode: List[int]: 在Gfd中的causallearn格式的graphnodes,全局编号
attr_id: Gfd中每个点在原始图中对应的点编号
adj, anc: Gfd的邻接表
G_fd: Gfd中的点集,原图编号
"""
for dep in functional_dependencies:
if len(dep.params) == 1: # TODO: dep.fid depends only on dep.params[0]:
param, f = dep.params[0].fid, dep.fid
u, v = f_ind[dep.params[0].fid], f_ind[dep.fid]
src, dest = NodeId.get(u, None), NodeId.get(v, None)
if src is None:
src = NodeId[u] = cur_id
node = FCI.GraphNode(f"X{u+1}")
node.add_attribute('id', u)
G_fd.add(u), adj.append(set()), anc.append(set()), FDNodes.append(node)
attr_id.append(u)
cur_id += 1
if dest is None:
dest = NodeId[v] = cur_id
node = FCI.GraphNode(f"X{v+1}")
node.add_attribute('id', v)
G_fd.add(v), adj.append(set()), anc.append(set()), FDNodes.append(node)
attr_id.append(v)
cur_id += 1
adj[src].add(dest)
anc[dest].add(src)
else:
# TODO: should be treated the same as bgKnowledge
pass
topo = toposort(adj)
fake_knowledge = BackgroundKnowledge()
skeleton_knowledge = set()
for t in topo[::-1]:
mxvcnt, y = 0, -1
for a in anc[t]:
print("a = ", a, attr_id[a])
vcnt = np.unique(dataset[:, attr_id[a]]).size
if vcnt > mxvcnt:
y = a
mxvcnt = vcnt
if y == -1: continue
# S.append((attr_id[t], attr_id[y]))
# fake_knowledge.add_required_by_node(FDNodes[t], FDNodes[y])
fake_knowledge.add_required_by_node(FDNodes[y], FDNodes[t])
skeleton_knowledge.add((attr_id[y], attr_id[t]))
# remove X and connected edges from G_FD
G_fd.remove(attr_id[t])
for a in anc[t]:
adj[a].remove(t)
GfdNodes = []
for i, v in enumerate(G_fd):
node = FCI.GraphNode(f"X{v + 1}")
node.add_attribute("id", v)
GfdNodes.append(node)
FDgraph, FD_sep_sets = FCI.fas(dataset, GfdNodes, independence_test_method=independence_test_method, alpha=alpha,
knowledge=None, depth=depth, verbose=verbose)
Rath/packages/frontend/src/components/react-vega.tsx
Lines 17 to 23 in 7f3c6f5
Add download image button.
Although rath allows you right click the image and download, there are still a lot of users not familiar with such kind of operation.
might be a bug in graphic-walker @AntoineYANG
text: '矩阵',
iconName: 'GridViewSmall',
}, {
text: '图',
iconName: 'BranchPullRequest',
}, {
// itemKey: 'matrix', // TODO: 实现矩阵编辑
// text: '矩阵',
// iconName: 'GridViewSmall',
// }, {
itemKey: 'table',
text: '表',
iconName: 'BulletedListText',
type InfoArrayType = keyof DateTimeInfoArray
export type DateTimeInfoType = keyof DateTimeInfo
export function parseDateTimeArray(dateTime: string[]): DateTimeInfoArray {
// TODO: [refactor] Polyfills: 中文格式等
// TODO: [feat] assume the same dateTime format or support different format in one column
let infoArray = {} as DateTimeInfoArray
let reg_id: number | undefined, max_cnt = 0;
for (let i = 0; i < analyzer.rules.length; ++i) {
Found some cases with key includes '\r', may caused by windows csv input error.
{"id": 1, "x": 818, "y\r": 2020},
{"id": 4, "x": 713, "y\r": 2013},
{"id": 33, "x": 2305, "y\r": 291},
{"id": 35, "x": 700, "y\r": 1953},
{"id": 36, "x": 949, "y\r": 2293},
{"id": 52, "x": 1796, "y\r": 1239},
public r_squared (): number {
const [, meanY] = this.mean();
const [alpha, beta] = this.getRegressionEquation();
let SSR = 0;
let SST = 0;
this.normalizedDataSource.forEach(record => {
let x = record[this.X];
let y = record[this.Y];
let yHat = x * beta + alpha;
SSR += (yHat - meanY) ** 2;
SST += (y - meanY) ** 2;
})
return SSR / SST;
}
如果你使用Rath之后有什么反馈或者建议,可以发布在这里。不限任何内容,如未能完成完整的分析流程;找不到一些指引;可视化效果差;页面卡顿;网络资源加载慢等。
QQ交流群:129132269
You can write feedback or suggestions here after using Rath. (fail to finish an analysis pipeline; few instructions; bad visualization; bad performance; slow page loading etc )
When I place a field (e.g., a dimension key) onto a droppable receiver (e.g., "Columns"), I can no more use a new draggable field of the same key to place in onto another receiver. Dragging occurs on the placed item.
max_cnt = cnt
}
}
// TODO: [feat] 推荐多种不同选择
for (let i = 0; i < dateTime.length; ++i) {
let info = parseDateTime(dateTime[i], max_cnt > 0 ? reg_id : undefined)
Object.keys(info).forEach(key => {
A declarative, efficient, and flexible JavaScript library for building user interfaces.
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google ❤️ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.