Skip to content

Commit 9b37fd5

Browse files
committed
set hierarchy in getters, leads to big gains in performance, raise version to 2.7
1 parent fa4479f commit 9b37fd5

File tree

6 files changed

+49
-71
lines changed

6 files changed

+49
-71
lines changed

build.sbt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import sbt.Keys._
44
lazy val commonSettings = Seq(
55
name := "archivespark",
66
organization := "com.github.helgeho",
7-
version := "2.6.2",
7+
version := "2.7",
88
scalaVersion := "2.11.7",
99
fork := true
1010
)

src/main/scala/de/l3s/archivespark/enrich/EnrichRoot.scala

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,10 @@ import de.l3s.archivespark.utils.Json._
2929
trait TypedEnrichRoot[+Meta] extends EnrichRoot with TypedEnrichable[Meta]
3030

3131
trait EnrichRoot extends Enrichable { this: TypedEnrichRoot[_] =>
32-
setHierarchy(null, null, this)
33-
34-
override def copy() = {
35-
val copy = super.copy().asInstanceOf[EnrichRoot]
36-
copy.setHierarchy(null, null, copy)
37-
copy
38-
}
39-
4032
def metaKey: String = "record"
4133

34+
override def root[A]: TypedEnrichRoot[A] = this.asInstanceOf[TypedEnrichRoot[A]]
35+
4236
def toJson: Map[String, Any] = Map(
4337
metaKey -> json(this.get)
4438
) ++ enrichments.map{e => (e, mapToJsonValue(enrichment(e).get.toJson))}.filter{ case (_, field) => field != null }

src/main/scala/de/l3s/archivespark/enrich/Enrichable.scala

Lines changed: 36 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,6 @@ trait Enrichable extends Serializable with Copyable[Enrichable] with JsonConvert
3737
def get: Any
3838
def typed[T]: TypedEnrichable[T] = this.asInstanceOf[TypedEnrichable[T]]
3939

40-
override def copy(): Enrichable = copy(Map.empty)
41-
42-
protected[enrich] def copy(cloned: Map[String, Enrichable]): Enrichable = {
43-
val copy = super.copy()
44-
copy._enrichments = (_enrichments.keySet ++ cloned.keySet).map{field =>
45-
val enrichable = cloned.getOrElse(field, _enrichments(field).copy())
46-
enrichable.setHierarchy(copy, field, _root)
47-
(field, enrichable)
48-
}.toMap
49-
copy
50-
}
51-
5240
private var excludeFromOutput: Option[Boolean] = None
5341
def isExcludedFromOutput: Boolean = excludeFromOutput match {
5442
case Some(value) => value
@@ -63,19 +51,19 @@ trait Enrichable extends Serializable with Copyable[Enrichable] with JsonConvert
6351
case None => excludeFromOutput = Some(value)
6452
}
6553

66-
private var _field: String = _
54+
@transient private var _field: String = _
6755
def field: String = _field
6856

69-
private var _parent: Enrichable = _
57+
@transient private var _parent: Enrichable = _
7058
def parent[A]: TypedEnrichable[A] = _parent.asInstanceOf[TypedEnrichable[A]]
7159

72-
private var _root: EnrichRoot = _
60+
@transient private var _root: EnrichRoot = _
7361
def root[A]: TypedEnrichRoot[A] = _root.asInstanceOf[TypedEnrichRoot[A]]
7462

7563
def path: Seq[String] = if (_parent == null) Seq.empty else _parent.path :+ _field
7664
def chain: Seq[Enrichable] = if (_parent == null) Seq(this) else _parent.chain :+ this
7765

78-
protected[enrich] def setHierarchy(parent: Enrichable, field: String, root: EnrichRoot = null): Unit = {
66+
protected[enrich] def setHierarchy(parent: Enrichable, field: String, root: EnrichRoot): Unit = {
7967
_field = field
8068
_parent = parent
8169
_root = root
@@ -92,21 +80,29 @@ trait Enrichable extends Serializable with Copyable[Enrichable] with JsonConvert
9280
clone
9381
}
9482

95-
def enrichment[D : ClassTag](key: String): Option[TypedEnrichable[D]] = _enrichments.get(field(key)).map(_.asInstanceOf[TypedEnrichable[D]])
83+
def enrichment[D : ClassTag](key: String): Option[TypedEnrichable[D]] = {
84+
val fieldname = field(key)
85+
_enrichments.get(fieldname) match {
86+
case Some(enrichable) =>
87+
enrichable.setHierarchy(this, fieldname, root)
88+
Some(enrichable.asInstanceOf[TypedEnrichable[D]])
89+
case None => None
90+
}
91+
}
9692

9793
def enrich(fieldName: String, enrichment: Enrichable): Enrichable = {
98-
val clone = copy(Map(fieldName -> enrichment))
94+
val clone = copy()
95+
clone._enrichments = clone._enrichments.updated(fieldName, enrichment)
9996
clone._lastException = enrichment._lastException
10097
clone._aliases -= fieldName
10198
clone
10299
}
103100

104101
def enrichValue[Value](fieldName: String, value: Value): Enrichable = {
105-
val enrichable = SingleValueEnrichable[Value](value, this, fieldName, _root)
106-
enrich(fieldName, enrichable)
102+
enrich(fieldName, SingleValueEnrichable[Value](value))
107103
}
108104

109-
private[enrich] def enrich[D](func: EnrichFunc[_, D], excludeFromOutput: Boolean = false): Enrichable = {
105+
private def enrich[D](func: EnrichFunc[_, D], excludeFromOutput: Boolean = false): Enrichable = {
110106
if (!func.exists(this)) {
111107
val derivatives = new Derivatives(func.fields, func.aliases)
112108
var lastException: Option[Exception] = None
@@ -118,20 +114,28 @@ trait Enrichable extends Serializable with Copyable[Enrichable] with JsonConvert
118114
// if (ArchiveSpark.conf.catchExceptions) lastException = Some(exception)
119115
// else throw exception
120116
}
121-
val clone = copy(derivatives.get)
117+
val clone = copy()
122118
clone._lastException = lastException
123119
clone._aliases ++= derivatives.aliases
124120
for ((field, enrichment) <- derivatives.get) {
125121
enrichment.excludeFromOutput(excludeFromOutput, overwrite = false)
122+
clone._enrichments = clone._enrichments.updated(field, enrichment)
126123
clone._aliases -= field
127124
}
128125
clone
129-
} else if (!excludeFromOutput && func.fields.exists(f => enrichment(f).get.isExcludedFromOutput)) {
130-
val clone = copy()
131-
for (field <- func.fields if enrichment(field).get.isExcludedFromOutput) {
132-
clone.enrichment(field).get.excludeFromOutput(value = false)
126+
} else if (!excludeFromOutput) {
127+
val excluded = func.fields.map(enrichment).filter(_.isDefined).map(_.get).filter(_.isExcludedFromOutput)
128+
if (excluded.nonEmpty) {
129+
val clone = copy()
130+
for (enrichment <- excluded) {
131+
val enrichmentClone = enrichment.copy()
132+
enrichmentClone.excludeFromOutput(value = false)
133+
clone._enrichments = clone._enrichments.updated(enrichment.field, enrichmentClone)
134+
}
135+
clone
136+
} else {
137+
this
133138
}
134-
clone
135139
} else {
136140
this
137141
}
@@ -158,10 +162,11 @@ trait Enrichable extends Serializable with Copyable[Enrichable] with JsonConvert
158162
val remaining = path.tail
159163
enrichment(remaining.head) match {
160164
case Some(child) => child(remaining.tail)
161-
case None => for (child <- _enrichments.values) {
162-
val target: Option[TypedEnrichable[D]] = child[D](path)
163-
if (target.isDefined) return target
164-
}
165+
case None =>
166+
for (child <- _enrichments.values) {
167+
val target: Option[TypedEnrichable[D]] = child[D](path)
168+
if (target.isDefined) return target
169+
}
165170
None
166171
}
167172
} else if (path.head.matches("\\[\\d+\\]")) {

src/main/scala/de/l3s/archivespark/enrich/MultiValueEnrichable.scala

Lines changed: 7 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -32,28 +32,9 @@ class MultiValueEnrichable[T] private (private var _children: Seq[TypedEnrichabl
3232
def children: Seq[TypedEnrichable[T]] = _children
3333
def get: Seq[T] = children.map(e => e.get)
3434

35-
override protected[enrich] def copy(cloned: Map[String, Enrichable]): Enrichable = {
36-
val copy = super.copy(cloned).asInstanceOf[MultiValueEnrichable[T]]
37-
copy._children = _children.zipWithIndex.map{case (c, i) =>
38-
val child = c.copy().asInstanceOf[TypedEnrichable[T]]
39-
child.setHierarchy(copy, s"[$i]", root)
40-
child
41-
}
42-
copy
43-
}
44-
45-
protected[enrich] def copy(children: Seq[TypedEnrichable[T]]): Enrichable = {
46-
val copy = super.copy().asInstanceOf[MultiValueEnrichable[T]]
47-
copy._children = children.zipWithIndex.map{case (c, i) =>
48-
c.setHierarchy(copy, s"[$i]", root)
49-
c
50-
}
51-
copy
52-
}
53-
5435
override protected[enrich] def setHierarchy(parent: Enrichable, field: String, root: EnrichRoot): Unit = {
5536
super.setHierarchy(parent, field, root)
56-
for ((child, i) <- _children.zipWithIndex) child.setHierarchy(this, s"[$i]", root)
37+
for ((c, i) <- _children.zipWithIndex) c.setHierarchy(this, s"[$i]", root)
5738
}
5839

5940
override protected[archivespark] def excludeFromOutput(value: Boolean, overwrite: Boolean): Unit = {
@@ -70,13 +51,12 @@ class MultiValueEnrichable[T] private (private var _children: Seq[TypedEnrichabl
7051
if (enriched != c) {
7152
hasEnriched = true
7253
lastException = enriched._lastException.orElse(lastException)
73-
(enriched.asInstanceOf[TypedEnrichable[T]], true)
74-
} else {
75-
(c, false)
76-
}
54+
enriched.asInstanceOf[TypedEnrichable[T]]
55+
} else c
7756
}
7857
if (hasEnriched) {
79-
val clone = copy(enriched.map{case (enriched, cloned) => if (cloned) enriched else enriched.copy().asInstanceOf[TypedEnrichable[T]]})
58+
val clone = copy().asInstanceOf[MultiValueEnrichable[T]]
59+
clone._children = enriched
8060
clone._lastException = lastException
8161
clone
8262
} else this
@@ -86,7 +66,8 @@ class MultiValueEnrichable[T] private (private var _children: Seq[TypedEnrichabl
8666
val enriched = children(index).enrich(path.tail, func, excludeFromOutput).asInstanceOf[TypedEnrichable[T]]
8767
if (children(index) == enriched) this
8868
else {
89-
val clone = copy(children.zipWithIndex.map{case (c, i) => if (i == index) enriched else c.copy().asInstanceOf[TypedEnrichable[T]]})
69+
val clone = copy().asInstanceOf[MultiValueEnrichable[T]]
70+
clone._children = children.zipWithIndex.map{case (c, i) => if (i == index) enriched else c}
9071
clone._lastException = enriched.lastException
9172
clone
9273
}

src/main/scala/de/l3s/archivespark/enrich/SingleValueEnrichable.scala

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,12 @@ package de.l3s.archivespark.enrich
2626

2727
import de.l3s.archivespark.utils.Json._
2828

29-
class SingleValueEnrichable[T] private (override val get: T, parentEnrichable: Enrichable = null, field: String = null, enrichRoot: EnrichRoot = null) extends TypedEnrichable[T] {
30-
setHierarchy(parentEnrichable, field, enrichRoot)
31-
29+
class SingleValueEnrichable[T] private (override val get: T) extends TypedEnrichable[T] {
3230
def toJson: Map[String, Any] = (if (isExcludedFromOutput) Map() else Map(
3331
null.asInstanceOf[String] -> json(this.get)
3432
)) ++ enrichments.map{e => (e, mapToJsonValue(enrichment(e).get.toJson))}.filter{ case (_, field) => field != null }
3533
}
3634

3735
object SingleValueEnrichable {
38-
def apply[T](value: T, parent: Enrichable = null, field: String = null, root: EnrichRoot = null): SingleValueEnrichable[T] = new SingleValueEnrichable[T](value, parent, field, root)
36+
def apply[T](value: T): SingleValueEnrichable[T] = new SingleValueEnrichable[T](value)
3937
}

src/main/scala/de/l3s/archivespark/utils/Json.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ object Json extends Serializable {
5555

5656
def mapToEnrichable(jsonMap: Map[String, Any], parent: Enrichable, field: String): Enrichable = {
5757
val json = de.l3s.archivespark.utils.Json.mapToJson(jsonMap)
58-
var enrichable: Enrichable = SingleValueEnrichable[String](json, parent, field, if (parent != null) parent.root else null)
58+
var enrichable: Enrichable = SingleValueEnrichable[String](json)
5959
enrichable.excludeFromOutput()
6060
for ((key, value) <- jsonMap) {
6161
enrichable = Try{value.asInstanceOf[Map[String, Any]]} match {

0 commit comments

Comments
 (0)