Let's connect
Let's connect

How to mine Scala 3 compiler metadata using TASTy files

Picture of Andrzej Ratajczak, Kotlin, Scala Developer

Andrzej Ratajczak

Kotlin, Scala Developer

15 minutes read

java

val elems = (1 to 64).par
 .flatMap { page =>
   Jsoup
     .connect(
       s"https://index.scala-lang.org/search?sort=stars&languages=3.x&q=*&page=$page"
     )
     .get()
     .select("h4")
     .eachText
     .asScala
 }
 .flatMap { header =>
   Try(
     Jsoup
       .connect(s"https://index.scala-lang.org/$header/artifacts/version")
       .get()
   ).toOption
     .map { page =>
       val version = page.select(".head-last-version").text.trim
       page.select("option").eachText.asScala.map((_, (header, version)))
     }
 }
 .flatten
 .flatMap { case (name, (header, version)) =>
   Try {
     val text = Jsoup
       .connect(
         s"https://index.scala-lang.org/$header/artifacts/$name/$version?binary-versions=_3"
       )
       .get()
       .select("#copy-maven")
       .text
     Jsoup.parse(text, "", Parser.xmlParser())
   }.toOption
     .filter(_.select("artifactId").text.endsWith("_3"))
     .map { doc =>
       doc.select("groupId").text + ":" + doc
         .select("artifactId")
         .text + ":" + doc.select("version").text
     }
 }

java

Fetch()
 .withRepositories(repositories)
 .withDependencies(
   Seq(
     Dependency(
       Module(Organization(organization), ModuleName(module)),
       version
     )
   )
 )
 .run

java

class MyInspector(fileOutputName: String, classpath: String) extends Inspector:
 val file = new File(fileOutputName)
 val bw = new BufferedWriter(new FileWriter(file))
 def inspect(using Quotes)(tastys: List[Tasty[quotes.type]]): Unit =
   import quotes.reflect.*
   object Traverser extends TreeAccumulator[List[DefDef]]:
     def foldTree(defdefs: List[DefDef], tree: Tree)(
         owner: Symbol
     ): List[DefDef] =
       val defdef = tree match
         case d: DefDef =>
           List(d)
         case tree =>
           Nil
       foldOverTree(defdefs ++ defdef, tree)(owner)
   end Traverser


   tastys
     .flatMap { tasty =>
       val tree = tasty.ast
       Traverser.foldTree(List.empty, tree)(tree.symbol)
     }
     .filter(_.symbol.docstring.nonEmpty)
     .flatMap { defdef =>
       val comment = Cleaner.clean(defdef.symbol.docstring.get).mkString(" ")
       Option.when(!comment.isBlank && defdef.rhs != None)(
         s"${astCode(defdef)}␟${byteCode(defdef)}␟${sourceCode(defdef, true)}␟${sourceCode(defdef, false)}␟${comment}\n"
       )
     }
     .foreach(bw.write)


   bw.close()


 extension (s: String)
   def removeNewLines: String =
     s.replaceAll("\\p{C}|\\s+|\\r$|\\\\t|\\\\n|\\\\r", " ")


 def astCode(using Quotes)(defdef: quotes.reflect.DefDef): String =
   Extractors.showTree(defdef).removeNewLines

java

TastyInspector.inspectAllTastyFiles(
 Nil,
 List(classpath.head),
 classpath.tail.toList
)(
 new MyInspector(coordinates, classpath)
)

java

def sourceCode(using Quotes)(
   defdef: quotes.reflect.DefDef,
   fullNames: Boolean
): String =
 val sourceCode = Try(
   SourceCode
     .showTree(defdef)(SyntaxHighlight.plain, fullNames)
     .removeNewLines
 )
 sourceCode.toOption.getOrElse("NO_SOURCECODE")

java

def byteCode(using Quotes)(defdef: quotes.reflect.DefDef): String =
 val reader = Try {
   SyntheticRepository
     .getInstance(ClassPath(classpath))
     .loadClass(defdef.symbol.owner.fullName.replaceAll("\\$\\.", "\\$"))
     .getMethods()
 }
 reader.toOption
   .flatMap {
     _.toList
       .find(_.getName == defdef.symbol.name)
       .map(_.getCode)
       .filter(_ != null)
       .map(x =>
         Utility.codeToString(x.getCode, x.getConstantPool, 0, -1, true)
       )
       .map(_.toString.removeNewLines)
   }
   .getOrElse("NO_BYTECODE")

Curated by

Sebastian Synowiec

Liked the article?

Share it with others!

explore more on

Take the first step to a sustained competitive edge for your business

Let's connect

VirtusLab's work has met the mark several times over, and their latest project is no exception. The team is efficient, hard-working, and trustworthy. Customers can expect a proactive team that drives results.

Stephen Rooke
Stephen RookeDirector of Software Development @ Extreme Reach

VirtusLab's engineers are truly Strapi extensions experts. Their knowledge and expertise in the area of Strapi plugins gave us the opportunity to lift our multi-brand CMS implementation to a different level.

facile logo
Leonardo PoddaEngineering Manager @ Facile.it

VirtusLab has been an incredible partner since the early development of Scala 3, essential to a mature and stable Scala 3 ecosystem.

Martin_Odersky
Martin OderskyHead of Programming Research Group @ EPFL

The VirtusLab team's in-depth knowledge, understanding, and experience of technology have been invaluable to us in developing our product. The team is professional and delivers on time – we greatly appreciated this efficiency when working with them.

Michael_Grant
Michael GrantDirector of Development @ Cyber Sec Company