|
2 | 2 |
|
3 | 3 | # BSD 3-Clause License; see https://github.com/scikit-hep/uproot-methods/blob/master/LICENSE
|
4 | 4 |
|
| 5 | +import awkward.type |
| 6 | +import awkward.array.chunked |
| 7 | +import awkward.array.objects |
| 8 | + |
| 9 | +import uproot_methods.classes.TLorentzVector |
| 10 | + |
| 11 | +def getcontent(virtual): |
| 12 | + return virtual.array.content |
| 13 | + |
| 14 | +def jaggedtable(rowname, counts, fields): |
| 15 | + Table = counts.Table |
| 16 | + JaggedArray = counts.JaggedArray |
| 17 | + ChunkedArray = counts.ChunkedArray |
| 18 | + VirtualArray = counts.VirtualArray |
| 19 | + VirtualTLorentzVectorArray = awkward.array.objects.Methods.mixin(uproot_methods.classes.TLorentzVector.PtEtaPhiMassArrayMethods, VirtualArray) |
| 20 | + |
| 21 | + countsarray = counts.array |
| 22 | + if isinstance(countsarray, awkward.array.chunked.ChunkedArray): |
| 23 | + return lazyjagged(countsarray, rowname, [(n, x.array) for n, x in fields]) |
| 24 | + else: |
| 25 | + offsets = JaggedArray.counts2offsets(countsarray) |
| 26 | + table = Table.named(rowname) |
| 27 | + for n, x in fields: |
| 28 | + table[n] = VirtualArray(getcontent, x, type=awkward.type.ArrayType(offsets[-1], x.type.to.to), cache=counts.cache, persistvirtual=counts.persistvirtual) |
| 29 | + columns = table.columns |
| 30 | + if "pt" in columns and "eta" in columns and "phi" in columns and "mass" in columns and "p4" not in columns: |
| 31 | + table["p4"] = VirtualTLorentzVectorArray(uproot_methods.classes.TLorentzVector.TLorentzVectorArray.from_ptetaphim, (table["pt"], table["eta"], table["phi"], table["mass"]), type=awkward.type.ArrayType(offsets[-1], uproot_methods.classes.TLorentzVector.PtEtaPhiMassLorentzVectorArray), cache=counts.cache, persistvirtual=counts.persistvirtual) |
| 32 | + return JaggedArray.fromoffsets(offsets, table) |
| 33 | + |
| 34 | +def lazyjagged(countsarray, rowname, fields): |
| 35 | + ChunkedArray = countsarray.ChunkedArray |
| 36 | + VirtualArray = countsarray.VirtualArray |
| 37 | + |
| 38 | + chunks = [] |
| 39 | + for i, countschunk in enumerate(countsarray.chunks): |
| 40 | + fieldschunks = [] |
| 41 | + tabletype = awkward.type.TableType() |
| 42 | + for fieldname, field in fields: |
| 43 | + assert field.chunksizes[i] == countsarray.chunksizes[i] |
| 44 | + fieldschunks.append((fieldname, field.chunks[i])) |
| 45 | + tabletype[fieldname] = field.type.to.to |
| 46 | + columns = tabletype.columns |
| 47 | + if "pt" in columns and "eta" in columns and "phi" in columns and "mass" in columns and "p4" not in columns: |
| 48 | + tabletype["p4"] = uproot_methods.classes.TLorentzVector.TLorentzVectorArray.from_ptetaphim |
| 49 | + chunks.append(VirtualArray(jaggedtable, (rowname, countschunk, fieldschunks), type=awkward.type.ArrayType(len(countschunk), float("inf"), tabletype), cache=countschunk.cache, persistvirtual=countschunk.persistvirtual)) |
| 50 | + return ChunkedArray(chunks, countsarray.chunksizes) |
| 51 | + |
| 52 | +def crossref(fromarray, links, subj): |
| 53 | + out = fromarray.array |
| 54 | + ChunkedArray = out.ChunkedArray |
| 55 | + VirtualArray = out.VirtualArray |
| 56 | + |
| 57 | + if isinstance(out, awkward.array.chunked.ChunkedArray): |
| 58 | + chunks = [] |
| 59 | + for j, chunk in enumerate(out.chunks): |
| 60 | + newtype = awkward.type.ArrayType(out.chunksizes[j], float("inf"), awkward.type.TableType()) |
| 61 | + for n in chunk.type.to.to.columns: |
| 62 | + newtype.to.to[n] = chunk.type.to.to[n] |
| 63 | + for collection, subname, i, localindex, name, totype in links: |
| 64 | + newtype.to.to[name] = totype |
| 65 | + chunks.append(VirtualArray(crossref, (chunk, links, j), type=newtype, cache=fromarray.cache, persistvirtual=fromarray.persistvirtual)) |
| 66 | + |
| 67 | + return ChunkedArray(chunks, out.chunksizes) |
| 68 | + |
| 69 | + else: |
| 70 | + for collection, subname, i, localindex, name, totype in links: |
| 71 | + toarray = collection[subname].chunks[i] |
| 72 | + out.content[name] = VirtualArray(indexedmask, (toarray, localindex, subj), type=awkward.type.ArrayType(out.offsets[-1], totype), cache=fromarray.cache, persistvirtual=fromarray.persistvirtual) |
| 73 | + return out |
| 74 | + |
| 75 | +def indexedmask(toarray, localindex, subj): |
| 76 | + jagged = toarray.array |
| 77 | + localindex = localindex.array |
| 78 | + if subj is not None: |
| 79 | + jagged = jagged.chunks[subj].array |
| 80 | + localindex = localindex.chunks[subj].array |
| 81 | + |
| 82 | + globalindex = localindex + jagged.starts |
| 83 | + globalindex.content[localindex.content < 0] = -1 |
| 84 | + return toarray.IndexedMaskedArray(globalindex.content, jagged.content) |
| 85 | + |
5 | 86 | def transform(array):
|
6 |
| - raise NotImplementedError |
| 87 | + array._valid() |
| 88 | + array.check_whole_valid = False |
| 89 | + |
| 90 | + Table = array.Table |
| 91 | + VirtualArray = array.VirtualArray |
| 92 | + |
| 93 | + stuff = [("run", "run", None), |
| 94 | + ("luminosityBlock", "lumi", None), |
| 95 | + ("event", "event", None), |
| 96 | + ("Electron_", "electrons", []), |
| 97 | + ("Muon_", "muons", []), |
| 98 | + ("Tau_", "taus", []), |
| 99 | + ("Photon_", "photons", []), |
| 100 | + ("Jet_", "jets", []), |
| 101 | + ("FatJet_", "fatjets", []), |
| 102 | + ("SubJet_", "subjets", []), |
| 103 | + ("IsoTrack_", "isotracks", []), |
| 104 | + ("SoftActivityJet_", "softjets", []), |
| 105 | + ("SoftActivityJetHT", "softactivity.HT", None), |
| 106 | + ("SoftActivityJetHT2", "softactivity.HT2", None), |
| 107 | + ("SoftActivityJetHT5", "softactivity.HT5", None), |
| 108 | + ("SoftActivityJetHT10", "softactivity.HT10", None), |
| 109 | + ("SoftActivityJetNjets2", "softactivity.njets2", None), |
| 110 | + ("SoftActivityJetNjets5", "softactivity.njets5", None), |
| 111 | + ("SoftActivityJetNjets10", "softactivity.njets10", None), |
| 112 | + ("fixedGridRhoFastjetAll", "fixedGridRhoFastjet.everything", None), |
| 113 | + ("fixedGridRhoFastjetCentralCalo", "fixedGridRhoFastjet.centralcalo", None), |
| 114 | + ("fixedGridRhoFastjetCentralNeutral", "fixedGridRhoFastjet.centralneutral", None), |
| 115 | + ("MET_", "MET", Table.named("MET")), |
| 116 | + ("RawMET_", "rawMET", Table.named("RawMET")), |
| 117 | + ("CaloMET_", "caloMET", Table.named("CaloMET")), |
| 118 | + ("PuppiMET_", "puppiMET", Table.named("PuppiMET")), |
| 119 | + ("TkMET_", "tkMET", Table.named("TkMET")), |
| 120 | + ("PV_", "PV", Table.named("PV")), |
| 121 | + ("SV_", "SVs", []), |
| 122 | + ("OtherPV_", "otherPVs", []), |
| 123 | + ("Pileup_", "pileup", Table.named("Pileup")), |
| 124 | + ("Flag_", "flags", Table.named("Flags")), |
| 125 | + ("TrigObj_", "trigobjs", []), |
| 126 | + ("HLT_", "HLT", Table.named("HLT")), |
| 127 | + ("HLTriggerFirstPath", "HLT.firstpath", None), |
| 128 | + ("HLTriggerFinalPath", "HLT.finalpath", None), |
| 129 | + ("Generator_", "gen", Table.named("Generator")), |
| 130 | + ("GenDressedLepton_", "gen.dressedleptons", []), |
| 131 | + ("GenPart_", "gen.partons", []), |
| 132 | + ("GenJet_", "gen.jets", []), |
| 133 | + ("GenJetAK8_", "gen.jetsAK8", []), |
| 134 | + ("SubGenJetAK8_", "gen.subjetsAK8", []), |
| 135 | + ("GenVisTau_", "gen.vistaus", []), |
| 136 | + ("GenMET_", "gen.MET", Table.named("GenMET")), |
| 137 | + ("LHE_", "gen.LHE", Table.named("LHE")), |
| 138 | + ("LHEPart_", "gen.LHEpartons", []), |
| 139 | + ("genWeight", "gen.genweight", None), |
| 140 | + ("LHEPdfWeight", "gen.LHEpdfweight", None), |
| 141 | + ("LHEScaleWeight", "gen.LHEscaleweight", None), |
| 142 | + ("LHEWeight_originalXWGTUP", "gen.LHEweight_originalXWGTUP", None), |
| 143 | + ] |
| 144 | + |
| 145 | + others = [] |
| 146 | + for n in array.columns: |
| 147 | + for prefix, rename, data in stuff: |
| 148 | + if n.startswith(prefix): |
| 149 | + if data is None: |
| 150 | + pass |
| 151 | + elif isinstance(data, list): |
| 152 | + data.append((n[len(prefix):], array[n])) |
| 153 | + else: |
| 154 | + data[n[len(prefix):]] = array[n] |
| 155 | + break |
| 156 | + elif n == "n" + prefix.rstrip("_"): |
| 157 | + break |
| 158 | + else: |
| 159 | + others.append(n) |
| 160 | + |
| 161 | + events = Table.named("Event") |
| 162 | + |
| 163 | + def makecollection(rename): |
| 164 | + if "." in rename: |
| 165 | + outer, inner = rename.split(".") |
| 166 | + if outer not in events.columns: |
| 167 | + events[outer] = Table.named(outer.capitalize()) |
| 168 | + return events[outer], inner |
| 169 | + else: |
| 170 | + return events, rename |
| 171 | + |
| 172 | + for prefix, rename, data in stuff: |
| 173 | + if data is None: |
| 174 | + if prefix in array.columns: |
| 175 | + collection, rename = makecollection(rename) |
| 176 | + collection[rename] = array[prefix] |
| 177 | + elif isinstance(data, list): |
| 178 | + rowname = prefix[:-1] |
| 179 | + countname = "n" + rowname |
| 180 | + if len(data) > 0 and countname in array.columns: |
| 181 | + collection, rename = makecollection(rename) |
| 182 | + collection[rename] = lazyjagged(array[countname], rowname, data) |
| 183 | + else: |
| 184 | + if len(data.columns) > 0: |
| 185 | + collection, rename = makecollection(rename) |
| 186 | + collection[rename] = data |
| 187 | + |
| 188 | + eventtype = events.type |
| 189 | + |
| 190 | + eventtype.to["electrons"].to["photon"] = awkward.type.OptionType(eventtype.to["photons"].to) |
| 191 | + eventtype.to["electrons"].to["photon"].check = False |
| 192 | + eventtype.to["electrons"].to["jet"] = awkward.type.OptionType(eventtype.to["jets"].to) |
| 193 | + eventtype.to["electrons"].to["jet"].check = False |
| 194 | + for i, chunk in enumerate(events["electrons"].chunks): |
| 195 | + assert events["electrons"].chunksizes[i] == events["jets"].chunksizes[i] == events["photons"].chunksizes[i] |
| 196 | + events["electrons"].chunks[i] = VirtualArray(crossref, (chunk, [ |
| 197 | + (events, "photons", i, array["Electron_photonIdx"].chunks[i], "photon", eventtype.to["electrons"].to["photon"]), |
| 198 | + (events, "jets", i, array["Electron_jetIdx"].chunks[i], "jet", eventtype.to["electrons"].to["jet"]), |
| 199 | + ], None), type=awkward.type.ArrayType(events["electrons"].chunksizes[i], eventtype.to["electrons"]), cache=chunk.cache, persistvirtual=chunk.persistvirtual) |
| 200 | + |
| 201 | + eventtype.to["muons"].to["jet"] = awkward.type.OptionType(eventtype.to["jets"].to) |
| 202 | + eventtype.to["muons"].to["jet"].check = False |
| 203 | + for i, chunk in enumerate(events["muons"].chunks): |
| 204 | + assert events["muons"].chunksizes[i] == events["jets"].chunksizes[i] |
| 205 | + events["muons"].chunks[i] = VirtualArray(crossref, (chunk, [ |
| 206 | + (events, "jets", i, array["Muon_jetIdx"].chunks[i], "jet", eventtype.to["muons"].to["jet"]), |
| 207 | + ], None), type=awkward.type.ArrayType(events["muons"].chunksizes[i], eventtype.to["muons"]), cache=chunk.cache, persistvirtual=chunk.persistvirtual) |
| 208 | + |
| 209 | + eventtype.to["taus"].to["jet"] = awkward.type.OptionType(eventtype.to["jets"].to) |
| 210 | + eventtype.to["taus"].to["jet"].check = False |
| 211 | + for i, chunk in enumerate(events["taus"].chunks): |
| 212 | + assert events["taus"].chunksizes[i] == events["jets"].chunksizes[i] |
| 213 | + events["taus"].chunks[i] = VirtualArray(crossref, (chunk, [ |
| 214 | + (events, "jets", i, array["Tau_jetIdx"].chunks[i], "jet", eventtype.to["taus"].to["jet"]), |
| 215 | + ], None), type=awkward.type.ArrayType(events["jets"].chunksizes[i], eventtype.to["taus"]), cache=chunk.cache, persistvirtual=chunk.persistvirtual) |
| 216 | + |
| 217 | + eventtype.to["taus"].to["jet"] = awkward.type.OptionType(eventtype.to["jets"].to) |
| 218 | + eventtype.to["taus"].to["jet"].check = False |
| 219 | + for i, chunk in enumerate(events["taus"].chunks): |
| 220 | + assert events["taus"].chunksizes[i] == events["jets"].chunksizes[i] |
| 221 | + events["taus"].chunks[i] = VirtualArray(crossref, (chunk, [ |
| 222 | + (events, "jets", i, array["Tau_jetIdx"].chunks[i], "jet", eventtype.to["taus"].to["jet"]), |
| 223 | + ], None), type=awkward.type.ArrayType(events["taus"].chunksizes[i], eventtype.to["taus"]), cache=chunk.cache, persistvirtual=chunk.persistvirtual) |
| 224 | + |
| 225 | + eventtype.to["photons"].to["electron"] = awkward.type.OptionType(eventtype.to["electrons"].to) |
| 226 | + eventtype.to["photons"].to["electron"].check = False |
| 227 | + eventtype.to["photons"].to["jet"] = awkward.type.OptionType(eventtype.to["jets"].to) |
| 228 | + eventtype.to["photons"].to["jet"].check = False |
| 229 | + for i, chunk in enumerate(events["photons"].chunks): |
| 230 | + assert events["photons"].chunksizes[i] == events["jets"].chunksizes[i] == events["electrons"].chunksizes[i] |
| 231 | + events["photons"].chunks[i] = VirtualArray(crossref, (chunk, [ |
| 232 | + (events, "electrons", i, array["Photon_electronIdx"].chunks[i], "electron", eventtype.to["photons"].to["electron"]), |
| 233 | + (events, "jets", i, array["Photon_jetIdx"].chunks[i], "jet", eventtype.to["photons"].to["jet"]), |
| 234 | + ], None), type=awkward.type.ArrayType(events["photons"].chunksizes[i], eventtype.to["photons"]), cache=chunk.cache, persistvirtual=chunk.persistvirtual) |
| 235 | + |
| 236 | + eventtype.to["jets"].to["electron1"] = awkward.type.OptionType(eventtype.to["electrons"].to) |
| 237 | + eventtype.to["jets"].to["electron1"].check = False |
| 238 | + eventtype.to["jets"].to["electron2"] = awkward.type.OptionType(eventtype.to["electrons"].to) |
| 239 | + eventtype.to["jets"].to["electron2"].check = False |
| 240 | + eventtype.to["jets"].to["muon1"] = awkward.type.OptionType(eventtype.to["muons"].to) |
| 241 | + eventtype.to["jets"].to["muon1"].check = False |
| 242 | + eventtype.to["jets"].to["muon2"] = awkward.type.OptionType(eventtype.to["muons"].to) |
| 243 | + eventtype.to["jets"].to["muon2"].check = False |
| 244 | + for i, chunk in enumerate(events["jets"].chunks): |
| 245 | + assert events["jets"].chunksizes[i] == events["electrons"].chunksizes[i] == events["muons"].chunksizes[i] |
| 246 | + events["jets"].chunks[i] = VirtualArray(crossref, (chunk, [ |
| 247 | + (events, "electrons", i, array["Jet_electronIdx1"].chunks[i], "electron1", eventtype.to["jets"].to["electron1"]), |
| 248 | + (events, "electrons", i, array["Jet_electronIdx2"].chunks[i], "electron2", eventtype.to["jets"].to["electron2"]), |
| 249 | + (events, "muons", i, array["Jet_muonIdx1"].chunks[i], "muon1", eventtype.to["jets"].to["muon1"]), |
| 250 | + (events, "muons", i, array["Jet_muonIdx2"].chunks[i], "muon2", eventtype.to["jets"].to["muon2"]), |
| 251 | + ], None), type=awkward.type.ArrayType(events["jets"].chunksizes[i], eventtype.to["jets"]), cache=chunk.cache, persistvirtual=chunk.persistvirtual) |
| 252 | + |
| 253 | + eventtype.to["fatjets"].to["subjet1"] = awkward.type.OptionType(eventtype.to["jets"].to) |
| 254 | + eventtype.to["fatjets"].to["subjet1"].check = False |
| 255 | + eventtype.to["fatjets"].to["subjet2"] = awkward.type.OptionType(eventtype.to["jets"].to) |
| 256 | + eventtype.to["fatjets"].to["subjet2"].check = False |
| 257 | + for i, chunk in enumerate(events["fatjets"].chunks): |
| 258 | + assert events["fatjets"].chunksizes[i] == events["jets"].chunksizes[i] |
| 259 | + events["fatjets"].chunks[i] = VirtualArray(crossref, (chunk, [ |
| 260 | + (events, "jets", i, array["FatJet_subJetIdx1"].chunks[i], "subjet1", eventtype.to["fatjets"].to["subjet1"]), |
| 261 | + (events, "jets", i, array["FatJet_subJetIdx2"].chunks[i], "subjet2", eventtype.to["fatjets"].to["subjet2"]), |
| 262 | + ], None), type=awkward.type.ArrayType(events["fatjets"].chunksizes[i], eventtype.to["fatjets"]), cache=chunk.cache, persistvirtual=chunk.persistvirtual) |
| 263 | + |
| 264 | + if len(others) > 0: |
| 265 | + etc = events["etc"] = Table.named("OtherFields") |
| 266 | + for n in others: |
| 267 | + etc[n] = array[n] |
| 268 | + events["raw"] = array |
| 269 | + |
| 270 | + return events |
0 commit comments