fix http header type as expected for string contents

helgeho · helgeho · commit 6e430135cf46 · 2024-04-04T10:01:23.000+02:00
diff --git a/src/main/scala/org/archive/webservices/archivespark/functions/Entities.scala b/src/main/scala/org/archive/webservices/archivespark/functions/Entities.scala
@@ -24,18 +24,14 @@
 
 package org.archive.webservices.archivespark.functions
 
-import java.util.Properties
-import edu.stanford.nlp.ling.CoreAnnotations.{NamedEntityTagAnnotation, SentencesAnnotation, TextAnnotation, TokensAnnotation}
-import edu.stanford.nlp.ling.CoreLabel
-import edu.stanford.nlp.pipeline.{Annotation, CoreDocument, StanfordCoreNLP}
-import edu.stanford.nlp.util.CoreMap
+import edu.stanford.nlp.pipeline.{CoreDocument, StanfordCoreNLP}
 import org.archive.webservices.archivespark.model._
 import org.archive.webservices.archivespark.model.dataloads.ByteLoad
 import org.archive.webservices.archivespark.model.pointers.DependentFieldPointer
 import org.archive.webservices.sparkling.ars.WANE
 
+import java.util.Properties
 import scala.collection.JavaConverters._
-import scala.collection.mutable
 
 object EntitiesNamespace {
   def get: DependentFieldPointer[ByteLoad.Root, String] = HtmlText.mapIdentity("entities").get[String]("entities")
@@ -66,7 +62,9 @@ class Entities private (properties: Properties, tagFieldMapping: Seq[(String, St
       case _: Exception => Map.empty[String, Set[String]]
     }
 
-    for ((tag, _) <- tagFieldMapping) derivatives.setNext(MultiValueEnrichable(mentions.getOrElse(tag, Set.empty).toSeq))
+    for ((tag, _) <- tagFieldMapping) {
+      derivatives.setNext(MultiValueEnrichable(mentions.getOrElse(tag, Set.empty).toSeq))
+    }
   }
 }
 
diff --git a/src/main/scala/org/archive/webservices/archivespark/specific/warc/functions/WarcPayload.scala b/src/main/scala/org/archive/webservices/archivespark/specific/warc/functions/WarcPayload.scala
@@ -49,7 +49,7 @@ class WarcPayload private (http: Boolean = true)
       if (http) {
         for (msg <- record.http) {
           derivatives << msg.statusLine
-          derivatives << msg.headers.toMap
+          derivatives << msg.headers
           derivatives << IOUtil.bytes(msg.payload)
         }
       } else {

Original file line number	Diff line number	Diff line change
`@@ -49,7 +49,7 @@ class WarcPayload private (http: Boolean = true)`
`49`	`49`	`if (http) {`
`50`	`50`	`for (msg <- record.http) {`
`51`	`51`	`derivatives << msg.statusLine`
`52`		`- derivatives << msg.headers.toMap`
	`52`	`+ derivatives << msg.headers`
`53`	`53`	`derivatives << IOUtil.bytes(msg.payload)`
`54`	`54`	`}`
`55`	`55`	`} else {`