import React from 'react'

import { Container, Typography, Button, Link, Fade, Slide } from '@material-ui/core'
import GitHubIcon from '@material-ui/icons/GitHub'
import WebIcon from '@material-ui/icons/Web'
import LinkedInIcon from '@material-ui/icons/LinkedIn'

import { makeStyles, useTheme } from '@material-ui/core/styles'
import useMediaQuery from '@material-ui/core/useMediaQuery'

import ScrollToTop from '../Utils/ScrollToTop'

const useStyles = makeStyles(theme => ({
  container: {
    marginTop: theme.spacing(24),
    [theme.breakpoints.down('xs')]: { marginTop: theme.spacing(4) }
  },
  section: {
    marginTop: theme.spacing(6)
  },
  button: {
    marginRight: theme.spacing(2),
    textTransform: 'none'
  }
}))

function AboutPage() {
  const classes = useStyles()
  const theme = useTheme()
  const matches = useMediaQuery(theme.breakpoints.down('sm'))
  return (
    <>
      <ScrollToTop />
      <Slide in direction="up">
        <Fade in>
          <Container fixed className={classes.container}>
            <Typography variant={matches ? 'h4' : 'h2'} component="h1" gutterBottom color="primary">
              Multi-languange affect analysis using neural networks with zero shot cross lingual
              transfer learning.
            </Typography>
          </Container>
        </Fade>
      </Slide>
      <Container maxWidth="md" className={classes.container}>
        <Typography variant="h4" component="h2" gutterBottom>
          Abstract
        </Typography>
        <Typography paragraph>
          This project demonstrates using AI to determine emotion in text sentences from tweets in
          (close to) any language.
        </Typography>
        <Typography paragraph>
          It has an <b>averaged accuracy of 83%</b> against test data (ranging from 76% to 93%
          depending on the specific emotion) and has only learned from reading the equivalent of 1
          printed newspaper from Monday to Friday on one single week. To give some context, there is{' '}
          <Link
            target="blank"
            rel="noopener"
            href="https://www.informationweek.com/software/information-management/expert-analysis-is-sentiment-analysis-an-80--solution/d/d-id/1087919"
          >
            research
          </Link>{' '}
          <Link
            target="blank"
            rel="noopener"
            href="https://www.lexalytics.com/lexablog/sentiment-accuracy-baseline-testing"
          >
            available
          </Link>{' '}
          showing that the agreement between human analysts when performing sentiment analysis can
          be quantified at the 80% - 90% range.
        </Typography>
        <Typography>
          {`A note of warning: this AI doesn't understand irony or sarcasm, so mind the results if you
          feed that kind of tweets (it can be hilarious!)`}
        </Typography>
      </Container>
      <Container maxWidth="md" className={classes.section}>
        <Typography variant="h5" gutterBottom>
          But what is affect analysis?
        </Typography>
        <Typography paragraph>
          {`Let's start with sentiment analysis. Sentiment analysis is a field of study dedicated to
          systematically extracting affective states and subjective information from language.`}
        </Typography>
        <Typography paragraph>
          The most basic analysis possible is polarity: determining if a body of text or speech has
          positive, negative or neutral connotations. Affect analysis is a classification task where{' '}
          <b>text or speech is mapped to concrete emotional states</b>{' '}
          {`like "happy", "sad" or
          "angry". This project does affect analysis to determine 11 emotions in sentences: anger, 
          anticipation, disgust, fear, joy, love, optimism, pessimism, sadness, surprise and trust.`}
        </Typography>
      </Container>
      <Container maxWidth="md" className={classes.section}>
        <Typography variant="h5" gutterBottom>
          OK, and neural networks?
        </Typography>
        <Typography paragraph>
          Neural networks, or formally Artificial Neural Networks (ANN), are systems{' '}
          <b>inspired in how the human brain, and neurons in particular, work</b>.
        </Typography>
        <Typography paragraph>
          They are made of <b>layers</b>: one input layer, one output layer and any number of hidden
          layers. Layers are built with a number of nodes (or <b>artificial neurons</b>), and nodes
          are interconnected with nodes from the neighbour layers. At each node, the incoming
          connection have weights and the output ones have thresholds (above a certain level they
          fire signal,below they do not).
        </Typography>
        <Typography paragraph>
          All these parameters are determined by running successive training processes (measuring
          the accuracy of the output for known inputs) and iterating them with mathematical
          optimization functions. This allows a neural network to{' '}
          <b>learn relationships or patterns</b> from the training data that is fed. A trained
          neural network can then be applied to identify those relationships or patterns on new
          data.
        </Typography>
      </Container>
      <Container maxWidth="md" className={classes.section}>
        <Typography variant="h5" gutterBottom>
          Tell me about sentence embeddings
        </Typography>
        <Typography paragraph>
          <b>Natural Language Processing</b> (NLP) is the field of AI that deals with enabling
          machine understanding of human language. NLP systems can be used for tasks like document
          summarization, translation, speech recognizion, predictive typing, sentiment analysis,
          classification of texts, etc.
        </Typography>
        <Typography paragraph>
          Text in particular needs to be transformed into <b>vectors of numbers</b> to allow
          computers to run them through algorithms. These vectors are called embeddings. Two
          established ways to do this are processing the words or processing the sentences.
        </Typography>
        <Typography paragraph>
          There are many processes to obtain sentence embeddings. At first, the methods proposed
          were able to produce embeddings on a specific language or for a specific corpus like news
          articles. Recently, multi-language methods have been developed.
        </Typography>
        <Typography paragraph>
          LASER (Language Agnostic SEntence Representations) by facebook Research is one of these
          methods. Their model was trained using a dataset of 223 million parallel sentences in 93
          languages from 34 families and 28 scripts. It can provide sentence embeddings on a
          1024-dimensional space that is <b>language-agnostic</b>.
        </Typography>
      </Container>
      <Container maxWidth="md" className={classes.section}>
        <Typography variant="h5" gutterBottom>
          Now explain to me zero shot crosslingual transfer learning
        </Typography>
        <Typography paragraph>
          Because the embeddings from LASER are not tied to a specific language, they can be used to{' '}
          <b>train AI systems</b> for specific NLP tasks (like classification){' '}
          <b>in one language</b> (say English) and then applying the AI <b>on any other language</b>{' '}
          (maybe Spanish). This training technique is what is called zero-shot crosslingual transfer
          learning.
        </Typography>
      </Container>
      <Container maxWidth="md" className={classes.section}>
        <Typography variant="h5" gutterBottom>
          So, step by step, what happens here?
        </Typography>
        <Typography paragraph>
          When you select a tweet (either pasting its link or using one of the preloaded examples)
          its full raw <b>text</b> is retrieved.
        </Typography>
        <Typography paragraph>
          This text is then sent to a server that breaks it down into <b>sentences</b>.
        </Typography>
        <Typography paragraph>
          {`The server transforms each of this sentences to vectors of 1024 values. These values
          represent the "meaning" of the sentence independent of the language: "I love you" and "Je
          t'aime" get almost the same 1024 values. These are the sentence embeddings.`}
        </Typography>
        <Typography paragraph>
          Next, the server uses this values as inputs of the 11 neural networks that we have trained
          for each specific task and results in a <b>probability</b> for each one of the 11 emotions
          being present on each sentence.
        </Typography>
        <Typography paragraph>
          The sentences and the probabilites are sent back to your web browser, and displayed on
          screen.
        </Typography>
      </Container>
      <Container maxWidth="md" className={classes.section}>
        <Typography variant="h5" gutterBottom>
          The technical explanation
        </Typography>
        <Typography paragraph>
          The development process of this AI method and a playground can be found here:{' '}
          <Link
            target="blank"
            rel="noopener"
            href="https://github.com/pbaitor/i18n-twitter-sentiment"
          >
            i18n-twitter-sentiment
          </Link>
        </Typography>
      </Container>
      <Container maxWidth="md" className={classes.section}>
        <Typography variant="h5" gutterBottom>
          Acknowledgments and inspiration
        </Typography>
        <Typography paragraph>Further reading if you have interest:</Typography>

        <Typography gutterBottom>
          <Link
            target="blank"
            rel="noopener"
            href="https://machinelearningmastery.com/multi-label-classification-with-deep-learning/"
          >
            Multi-Label Classification with Deep Learning
          </Link>
        </Typography>
        <Typography gutterBottom>
          <Link target="blank" rel="noopener" href="https://github.com/facebookresearch/LASER">
            Language-Agnostic SEntence Representations (LASER)
          </Link>
        </Typography>
        <Typography gutterBottom>
          <Link target="blank" rel="noopener" href="https://arxiv.org/pdf/1909.12642.pdf">
            HateMonitors: Language Agnostic Abuse Detection in Social Media
          </Link>
        </Typography>
        <Typography paragraph>
          <Link
            target="blank"
            rel="noopener"
            href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.130.7318&rep=rep1&type=pdf"
          >
            Multi-Label Neural Networks with Applications to Functional Genomics and Text
            Categorization
          </Link>
        </Typography>

        <Typography paragraph>You may look at the full list on the github repository.</Typography>

        <Typography paragraph variant="caption">
          <Button
            color="primary"
            startIcon={<GitHubIcon />}
            className={classes.button}
            component={Link}
            href="https://github.com/pbaitor"
            target="blank"
            rel="noopener"
          >
            @pbaitor
          </Button>
          <Button
            color="primary"
            startIcon={<LinkedInIcon />}
            className={classes.button}
            component={Link}
            href="https://linkedin.com/in/pbaitor"
            target="blank"
            rel="noopener"
          >
            in/aitorperez
          </Button>
          <Button
            color="primary"
            startIcon={<WebIcon />}
            className={classes.button}
            component={Link}
            href="https://aitorperez.com"
            target="blank"
            rel="noopener"
          >
            aitorperez.com
          </Button>
        </Typography>
      </Container>
    </>
  )
}

export default AboutPage
