-
Notifications
You must be signed in to change notification settings - Fork 32
Expand file tree
/
Copy pathdata.ts
More file actions
124 lines (111 loc) · 4.05 KB
/
data.ts
File metadata and controls
124 lines (111 loc) · 4.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import path from "node:path";
import { Dataset, processing } from "@epfml/discojs";
import {
DataFormat,
DataType,
Image,
Task,
} from "@epfml/discojs";
import { loadCSV, loadImage, loadImagesInDir } from "@epfml/discojs-node";
import { Repeat } from "immutable";
async function loadSimpleFaceData(userIdx: number, totalClient: number): Promise<Dataset<DataFormat.Raw["image"]>> {
const folder = path.join("..", "datasets", "simple_face");
const [adults, childs]: Dataset<[Image, string]>[] = [
(await loadImagesInDir(path.join(folder, "adult"))).zip(Repeat("adult")),
(await loadImagesInDir(path.join(folder, "child"))).zip(Repeat("child")),
];
const combinded = adults.chain(childs);
const sharded = combinded.filter((_, i) => i % totalClient === userIdx);
return sharded;
}
async function loadLusCovidData(userIdx: number, totalClient: number): Promise<Dataset<DataFormat.Raw["image"]>> {
const folder = path.join("..", "datasets", "lus_covid");
const [positive, negative]: Dataset<[Image, string]>[] = [
(await loadImagesInDir(path.join(folder, "COVID+"))).zip(
Repeat("COVID-Positive"),
),
(await loadImagesInDir(path.join(folder, "COVID-"))).zip(
Repeat("COVID-Negative"),
),
];
const combined: Dataset<[Image, string]> = positive.chain(negative);
const sharded = combined.filter((_, i) => i % totalClient === userIdx);
return sharded;
}
function loadTinderDogData(split: number): Dataset<DataFormat.Raw["image"]> {
const folder = path.join("..", "datasets", "tinder_dog", `${split + 1}`);
return loadCSV(path.join(folder, "labels.csv"))
.map(
(row) =>
[
processing.extractColumn(row, "filename"),
processing.extractColumn(row, "label"),
] as const,
)
.map(async ([filename, label]) => {
try {
const image = await Promise.any(
["png", "jpg", "jpeg"].map((ext) =>
loadImage(path.join(folder, `${filename}.${ext}`)),
),
);
return [image, label];
} catch {
throw Error(`${filename} not found in ${folder}`);
}
});
}
function loadData(dataName: string, split: number): Dataset<DataFormat.Raw["image"]>{
const folder = path.join("..", "datasets", `${dataName}`, `client_${split}`);
return loadCSV(path.join(folder, "labels.csv"))
.map(
(row) => [
processing.extractColumn(row, "filename"),
processing.extractColumn(row, "label"),
] as const,
)
.map(
async ([filename, label]) => {
try {
const img = await Promise.any(
["png", "jpg", "jpeg"].map((ext) =>
loadImage(path.join(folder, `${filename}.${ext}`)))
);
return [img, label]
} catch {
throw Error(`${filename} not found in ${folder}`);
}
}
);
}
export async function getTaskData<D extends DataType>(
taskID: Task.ID,
userIdx: number,
totalClient: number
): Promise<Dataset<DataFormat.Raw[D]>> {
switch (taskID) {
case "simple_face": // remove
return (await loadSimpleFaceData(userIdx, totalClient)) as Dataset<DataFormat.Raw[D]>;
case "titanic":
case "titanic_decentralized":
const titanicData = loadCSV(
path.join("..", "datasets", "titanic_train.csv"),
) as Dataset<DataFormat.Raw[D]>;
return titanicData.filter((_, i) => i % totalClient === userIdx);
case "cifar10":
return loadData("cifar10-agent", userIdx) as Dataset<DataFormat.Raw[D]>;
case "cifar10_federated_simple_model":
case "cifar10_simple_model":
return loadData("cifar10_ext", userIdx) as Dataset<DataFormat.Raw[D]>;
case "lus_covid":
case "lus_covid_decentralized":
return (await loadLusCovidData(userIdx, totalClient)) as Dataset<DataFormat.Raw[D]>;
case "tinder_dog": // remove
return loadTinderDogData(userIdx) as Dataset<DataFormat.Raw[D]>;
case "mnist_federated":
case "mnist":
return loadData("mnist", userIdx) as Dataset<DataFormat.Raw[D]>;
default:
throw new Error(`Data loader for ${taskID} not implemented.`);
}
}