import React from "react"
import {Button, Row, Col, Divider, Modal, Radio, Popover, Card, Tabs } from 'antd';
import { Collapse, Icon } from 'antd';
import { graphql } from "gatsby"
import katex from 'katex'
import Img from 'gatsby-image'
import Layout from '../../components/Layouts';
import BlogPostChrome from "../../components/BlogPostChrome"
import CustomAttentionImageGridWithCaption from './CustomAttentionImageGridWithCaption';
import CartoonPlot from './CartoonPlot';
import NMTVanilla from './assets/NMTVanilla.svg';
import WhatNext from './assets/whatnext.svg';
import "./NIC.css"
// import herdGIF from './herd.gif'
// import ridingGIF from './ridingman.gif'
// import animalGIF from './animal.gif'
// import dogbikeGIF from './dogbike.gif'


const { Meta } = Card;
const { Panel } = Collapse;
const { TabPane } = Tabs;

export const frontmatter = {
    title: `Using Attention to Improve Neural Image Caption Generator`,
    written: `2020-01-18`,
    updated: `2020-01-18`,
    layoutType: `post`,
    contentType: `blog`,
    path: `/attn-nic/`,
    category: `Deep Learning`,
    image: `./poster3.png`,
    cover: `./poster3.png`,
    coverText: "Animals in the wild at the Ranthambore National Park, Rajasthan",
    description: `We use soft attention based technique to improve vanilla image caption model that we saw in the previous post.`
}

const customPanelStyle = {
  background: '#f7f7f7',
  borderRadius: 4,
  marginBottom: 24,
  border: 0,
  overflow: 'hidden',
};

const KatexEquation = (props) => {
  var eq = katex.renderToString(props.equation, {
      throwOnError: false
  });
  
  return (
      <span dangerouslySetInnerHTML={{__html: eq}} /> 
  )
} 

const decoderMap = [
  {value: 1, name: "Greedy", code: "greedy"},
  {value: 2, name: "Beam Search", code: "beam"},
  {value: 3, name: "Pure", code: "pure"},
  {value: 4, name: "Top-K", code: "top_k"},
]

const PreviewCard = props => {
  const { cover, title, description } = props;
  return (
    <Card
      hoverable
      style={{ width: 400 }}
      cover={<Img fluid={cover} />}
    >
    <Meta title={title} description={description} />
    </Card>
  )  
}

class NICPage extends React.Component {
    // constructor(props) {
    //     super(props)
    // }

    state = {
      decoder: 4,
      playingCartoon: false
    };


    handleDecoderChanged = (e) => {
      this.setState({decoder: e.target.value});
    }


    decoderIdToString(decId) {
      const result = decoderMap.filter(item => item.value == decId);
      return result.length > 0 ? result[0].code : "greedy";      
    }
    
    render() {    
      const nmt_attn = this.props.data.nmt_attn.childImageSharp;
      const attn_cool = this.props.data.attn_cool.childImageSharp;
      const global_attn = this.props.data.global_attn.childImageSharp;
      const local_attn = this.props.data.local_attn.childImageSharp;

      const { playingCartoon = false} = this.state;
        return (
            <Layout data={this.props.data} location={this.props.location}>
            <BlogPostChrome {...this.props.data.javascriptFrontmatter}>
            <h1 style={{ textAlign: "center"}}>
              Using Attention to Improve Neural Image Caption Generator
            </h1>
          <p
          className="header-subtitle"
          style={{ marginTop: 20, marginBottom: 10 }}
        >          
          18 Jan, 2020
        </p>            
            <h2>Introduction</h2>
            <p>
            In this post, I attempt to explain the attention in Neural networks 
            in the context of image captioning. And, how it improves the model performance and provides some interpretability.
            We will manually inspect the captions generated by the vanilla model against that of the attention powered one.
            We will also glance through the automatic evaluation criteria used for comparing models in the NLP world and 
            review the pros and cons of using such evaluation metric.
            </p>
            <p>
            Recollect that, in the <a href="/nic-p1">previous post</a>, we used vanilla seq2seq architecture to create a model that could caption the images.
            The seq2seq architecture has two parts: encoder and decoder. 
            We use the encoder to convert the input image into a low dimension image vector. 
            The image vector is then used by the decoder to generate the captions. 
            The decoder generates the caption by producing a single word at every time step. This 
            is done using a combination of word generated at the previous step and the image vector. 
            It continues to do so until it reaches the end of sentence marker or a pre-defined 
            max number of timesteps.          
            </p>
            <p>
            We will make many references to the neural machine translation (NMT) because innovations 
            such as seq2seq, attention were introduced there. 
            These were later adopted by other domains in NLP as well as Computer Vision.
            </p>
            <h2>Motivation</h2>
            <p>In NMT, we pass the source sentence through the encoder 
              one word at a time. At each timestep, the encoder updates its hidden state, and we expect
              the final hidden state <span className="encoder-tag"><KatexEquation equation="{h_N^{encoder}}" /></span> to 
              encapsulate enough information to allow the decoder to generate the translation. 
              The decoder makes use of the 
              <span className="encoder-tag"><KatexEquation equation="h_N^{encoder}" /></span> along with 
              its own internal hidden state <span className="decoder-tag"><KatexEquation equation="{h_t^{decoder}}" /></span>, 
              to generate one word at a time.
              We seem to be asking a lot from the final hidden state of encoder, and 
              indeed, the layer highlighted in <span style={{color: "red"}}>red</span> is 
              the <span style={{color: "red"}}>information bottleneck</span>.
              </p>
              <p>
                We visualise the NMT architecture below with input in the Sanskrit language and its English translation.
                <blockquote>
                The input source is taken from the <a href="https://en.wikipedia.org/wiki/Bhagavad_Gita">Bhagavad Gita</a>, the ancient Sanskrit scripture, that talks about focusing on the process 
                rather than the results
                </blockquote>
              </p>
              <NMTVanilla />            
            <p style={{fontSize: 12, textAlign: "center"}}>Example of seq2seq NMT model</p>
            <p>
            If you and I were to caption an image, 
            we would most likely look at specific parts of the image as we come up with the caption. 
            In contrast, our model looked at the entire image (vector) at every timestep. What if we could 
            teach the network to focus on certain parts instead? Similarly, in NMT, what if the decoder 
            could access all the hidden states in encoder and somehow learn to decide how much should it 
            focus on each <span className="encoder-tag"><KatexEquation equation="{h_1^{encoder},h_2^{encoder},...,h_N^{encoder}}" /></span>
            &nbsp; to generate the next word in target language.
            And, this motivates the concept of attention in neural networks.
            </p>            
            <h2>Attention</h2>
            <p>Attention allows the neural network, the decoder in case of image caption, 
              to focus on the specific parts of the image as it generates the caption. Before we see 
              how it is done, let's visualise attention using the cartoon below.
            </p>            
            <CartoonPlot />
            <p style={{fontSize: 12, textAlign: "center"}}>Click on the play button to see the animation</p>
            <p>
              We overlay the attention heatmap to visualise what parts are in focus at each timestep. 
              As you can see, the decoder now focuses on certain parts of the image as it decides the 
              next word. At this point, I must emphasize that without attention, the decoder in previous, 
              used the entire image vector.
            </p>
            <p>
              The hidden state of decoder <KatexEquation equation="h_N^{decoder}" /> is called <span className="decoder-tag"><b>query</b></span>, and the hidden state of 
              encoder <KatexEquation equation="h_N^{encoder}" /> in called <span className="encoder-tag"><b>value</b></span>. In our case, the value is simply the image 
              vector (i.e the output of CNN based encoder). We calculate attention using the hidden state of decoder at a particular 
              timestep and the entire image vector from the CNN based encoder. With that, its time for the definition:
            </p>
            <blockquote> Attention is the weighted sum of values dependent on the query</blockquote>
            <p>
              In the case of NMT seq2seq, usually, both encoder and decoder are some variants of 
              RNN and hence have internal hidden states. The decoder makes use of hidden states 
              from all steps (
                <KatexEquation equation="h_1^{encoder}" />, <KatexEquation equation="h_2^{encoder}" />, ...., 
                <KatexEquation equation="h_N^{encoder}" />) to calculate 
              attention score. This means that decoder is no longer restricted by the limitations of 
              relying on the final hidden state <KatexEquation equation="h_N^{encoder}" />. Thus, attention provides a solution to 
              the information bottleneck problem we saw earlier.
            </p>
            <Collapse 
            bordered={false}
            // defaultActiveKey={[]}
            expandIcon={({ isActive }) => <Icon type="caret-right" rotate={isActive ? 90 : 0} />}
            >
            <Panel header="Click here to see how Attention is calculated in NMT" key="1" style={customPanelStyle}>
              <Img sizes={nmt_attn.fluid} />
              <p style={{fontSize: 12, textAlign: "center"}}>Source: XCS224N Lecture 8 Slide</p>
            </Panel>
            </Collapse>

            <h3>General Framework</h3>
            <p>There are several variants of attention, but the process of generating attention generally follows the following three steps :</p>
            <ol>
              <li>
                <b>Attention Score</b>: 
                Calculate attention score, <KatexEquation equation="e^t \in \R^N" />, using the hidden state of encoder &nbsp; 
                <KatexEquation equation="h_i^{encoder} \in \R^h" /> and the hidden state of decoder &nbsp;
                <KatexEquation equation="s_t \in \R^h" />
                </li>
              <li>
                <b>Attention Distribution</b>: 
                Calculate attention distribution using softmax over all hidden states <KatexEquation equation="\alpha^t = softmax(e^t) \in \R^N" />
                </li>
              <li>
                <b>Attention Output</b>: 
                Calculate attention output, also known as <b>context vector</b>, by taking the weighted sum of the encoder 
                hidden state and attention distribution <KatexEquation equation="a_t=\sum_{i=1}^{N} \alpha_i^t h_i^{encoder} \in \R^h" />
                </li>                
            </ol>
            <p>                
              We then concatenate attention output <KatexEquation equation="a_t" /> and the decoder 
              hidden state <KatexEquation equation="s_t" /> and continue with rest of the forward pass 
              depending on the architecture (e.g., In this case, we kept the architecture same as the vanilla seq2seq model, 
              GRU -> fully connected) &nbsp; <KatexEquation equation="[a_t;s_t] \in \R^{2h}" />
            </p>
            <p>
              These steps are discussed in the context of NMT but are also applicable to the image caption model. 
              Instead of encoder hidden state, we just make use of image vector. If it helps, imagine 
              setting <KatexEquation equation="h^{encoder} = \text{image vector}" /> and re-reading the steps above.
            </p>


            <h3>Types of Attention</h3>
            <p>
              There are several ways to compute the attention score. In this post, we cover three 
              common ones listed below. If you're interested in learning more, 
              I recommend <a href="https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html" target="_blank" rel="noopener noreferrer">this</a> detailed 
              post on attention mechanisms 
              (e.g <a href="https://arxiv.org/abs/1508.04025" target="_blank" rel="noopener noreferrer">Hard</a> vs <a href="https://arxiv.org/abs/1409.0473" target="_blank" rel="noopener noreferrer">Soft</a> attention, 
              <Popover key={`global-attn-previewer`} 
                content={
                  <PreviewCard cover={global_attn.fluid} description="source: https://arxiv.org/abs/1508.04025" />
                } 
              >
              <Button type="link" size="large" >Global</Button>
              </Popover> vs 
              <Popover key={`global-attn-previewer`} 
                content={
                  <PreviewCard cover={local_attn.fluid} description="source: https://arxiv.org/abs/1508.04025" />
                } 
              >
              <Button type="link" size="large">Local</Button>
              </Popover> attention).
              </p>
            <h4>Dot Product Attention</h4>
            <div>
            <p>
              The most basic but fastest form of attention in terms of compute.
              We calculate the attention score using decoder hidden state <KatexEquation equation="s \in \R^h" /> and the hidden 
              state <KatexEquation equation="h_i^{encoder} \in \R^h" /> for every step <KatexEquation equation="{1..N}" /> 
              and summing them together. This requires the encoder and decoder hidden state dimensions to be the same i.e. <KatexEquation equation="d_1=d_2" />:
            </p>
              <KatexEquation equation="e^t = [s_t^T h_1,s_t^T h_2...,s_t^T h_N]  \in \R^h" /> 
            </div>

        <h4 style={{marginTop: "8px"}}>Multiplicative Attention</h4>
        <p>
          In this case, we make use of a weight matrix <KatexEquation equation="W" /> to compute the attention score. 
          This allows the dimensions d1 and d2 of encoder and decoder hidden states to be different. 
          The weight matrix <KatexEquation equation="W" /> is learned during the training phase:
          </p>
          <KatexEquation equation="e_i = s^TWh_i \in \R" /> <br/>
          <KatexEquation equation="W \in \R^{d2\, *\, d1}" />
        <h4 style={{marginTop: "8px"}}>Additive Attention</h4>
        <p>
          This form makes use of two weight matrices <KatexEquation equation="W_1" /> and <KatexEquation equation="W_2" /> along 
          with the weight vector <KatexEquation equation="v" />. The weight matrices are learned during the training phase:
        </p>        
        <KatexEquation equation="e_i = v^Ttanh(W_1h_i + W_2s) \in \R" /> <br/>
        <KatexEquation equation="v \in \R^{d_3} \quad W_1 \in \R^{d_3 * d_1} \quad W_2 \in \R^{d3*d2}" />
      <Divider />
      <h2>Examples</h2>
      <p>
        In this section, we  compare the captions generated by vanilla seq2seq from the previous post 
        against that of attention powered model and the human baseline, that was created by an 8-year-old. 
        Choose the <a href="/nlg-decoders">decoding algorithm</a> from below and hover over the image to see the captions.
      </p>
      <Radio.Group name="radiogroup" defaultValue={this.state.decoder} onChange={this.handleDecoderChanged} style={{marginBottom: "8px"}}>
        {
          decoderMap.map(item => <Radio value={item.value}>{item.name}</Radio>)
        }
      </Radio.Group>

      <CustomAttentionImageGridWithCaption decoder={this.decoderIdToString(this.state.decoder)} />
      <p style={{fontSize: 12, textAlign: "center"}}>Hover over the image to view the captions</p>
      <p>
        In my experiments with decoding algorithms, Pure Sampling Decoder and Top-K Sampling Decoder tend to generate better captions.
      </p>
      <h2>Benefits</h2>
      <p>
        Attention greatly improves the model performance by allowing the decoder to focus on certain 
        parts of the encoder. In the NMT vanilla seq2seq model, only the last hidden state of the encoder 
        is used to generate the translation, which causes the information bottleneck situation. Attention 
        solves this issue by providing direct access to all states, which in turn also solves the vanishing 
        gradient problem. This is similar to how resnets solve it by providing skip-connections. We also get some 
        interpretability because we can now see what parts of the input were responsible for a particular 
        word in the caption. In the example below, we overlay attention output over the input image.
      </p>
      <Tabs defaultActiveKey="1">
      <TabPane tab="Example 1" key="1">
        <blockquote>
            Visualising attention as the model generates the caption: <b>Herd of cattle are walking in a field with lots of grass</b>
          </blockquote>
        {/* <img src={herdGIF} alt="Visualising attention over the image" />  */}
        <iframe style={{marginBottom: 20, width: 822, height: 612}} src="https://www.youtube.com/embed/Z045JKF7uGs" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>        
      </TabPane>
      <TabPane tab="Example 2" key="2">
      <blockquote>
            <b>Man riding a bike on a long street</b>
          </blockquote>
      {/* <img src={ridingGIF} alt="Visualising attention over the image" />  */}
      <iframe style={{marginBottom: 20, width: 1068, height: 618}} src="https://www.youtube.com/embed/zqgQpgJ1cJE" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>      
      </TabPane>
      <TabPane tab="Example 3" key="3">
        <blockquote>
            <b>Large horned animal standing up on a lush green field</b>
          </blockquote>
      {/* <img src={animalGIF} alt="Visualising attention over the image" />  */}
      <iframe style={{marginBottom: 20, width: 808, height: 610}} src="https://www.youtube.com/embed/c5GF6OG724E" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
      </TabPane>
      <TabPane tab="Fail" key="4">
        <blockquote>
            <b>People riding a dog on a motorcycle behind it 🤣</b>
        </blockquote>
      {/* <img src={dogbikeGIF} alt="Visualising attention over the image" />  */}
      <iframe style={{marginBottom: 20, width: 808, height: 610}} src="https://www.youtube.com/embed/a5PvsqCRHnA" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>      
      </TabPane>
      </Tabs>

    

      <p>
        <Collapse 
            style={{borderTop: "8px"}}
            bordered={false}
            // defaultActiveKey={[]}
            expandIcon={({ isActive }) => <Icon type="caret-right" rotate={isActive ? 90 : 0} />}
            >
            <Panel header="Doodle: Why attention is cool" key="1" style={customPanelStyle}>
              <Img sizes={attn_cool.fluid} />
              <p style={{fontSize: 12, textAlign: "center"}}>From my lecture notes</p>
            </Panel>
            </Collapse>
          </p>

      <h2>Evaluation</h2>
      <p>
        We did not cover any evaluation metrics while comparing the performance of vanilla and attention-based model. 
        There are a few evaluation metrics used in the Machine Translation world that are useful;  
        one of them is Bilingual Language Evaluation Understudy 
        (<a href="https://en.wikipedia.org/wiki/BLEU" target="_blank" rel="noopener noreferrer">BLEU</a>). 
        BLEU generates a score between 0 and 1 by comparing the generated caption against the gold standard reference sentences (i.e human-generated caption). 
        For simplicity, let us assume we have a single <span className="yellow-tag">generated sentence G</span> and 
        the <span className="green-tag">reference sentence R</span>. 
        We can then calculate BLEU score using below:
      </p>

      <ul>
        <li>
          Calculate n-gram precision score where <KatexEquation equation="n = \{1,2,3,4\}, \," /> <br/>
          Calculate the precision score <KatexEquation equation="p_1"/> for unigrams: <br/>
          let <span className="yellow-tag"><KatexEquation equation="{n}" /></span> be the count of 
          unigrams in G that exist 
          in R and <span className="green-tag"><KatexEquation equation="{d_R}" /></span> be the count of 
          all unigrams in R <br/>
          we then calculate precision score using <KatexEquation equation="p_1 = \frac {n} {d_R}" />
        </li>
        <li>
        Calculate the precision score for all n-grams yielding <KatexEquation equation="p_1, p_2, p_3 \, and" /> and <KatexEquation equation="p_4" />
        </li>
        <li>
        Calculate the geometric mean of n-gram precision score <br/>
          <KatexEquation equation="GM = exp (\,\sum_{n=1}^4w_nlogp_n)" />
        </li>
        <li>
        Apply brevity factor to penalise short sentences <KatexEquation equation="BP = \begin {cases} 1 \quad if \; c > r\\ e^{(1-\frac{r}{c})} \quad if \; c \leqslant r\end{cases}" /> where <span className="yellow-tag">c</span> is the length of model generated
        caption and <span className="green-tag">r</span> is the length of reference sentence. 
        As you can see in the image samples above, I was unable to apply brevity factor on the baseline human captions 😜.
        </li>
        <li>
        The BLUE-4 score then is <KatexEquation equation="BP * GM" />.
        </li>
      </ul>
      <p>
      Below is the non-exhaustive list of pros and cons of using BLEU as an evaluation metric.
      </p>
      <table class="table">
        <thead>
          <tr>
            <th>PROS</th>
            <th>CONS</th>
          </tr>
        </thead>
        <tbody>
          <tr>
            <td>
              <ul>
                <li>An automated approach to evaluate model performance</li>
                <li>Fast, easy and computationally inexpensive</li>
                <li>Helped progress the field because researchers can quickly iterate over models and evaluate their performance</li>
              </ul>
            </td>
            <td>
              <ul>
                <li>Lacks correlation with human translation</li>
                <li>Fails to capture the meaning of the sentence</li>
                <li>Improved BLEU score does not necessarily improve the translation quality</li>
              </ul>
            </td>
          </tr>
        </tbody>
      </table>
      {/* <p>
        BLEU provides a fast, easy, and automatic way to compare model performance. It probably has 
        enabled the field along with other automatic evaluation metrics to make progress by allowing 
        researchers to try out ideas quickly, compare performance, and adjust the model.
      </p>
      <p>
        However, it is far from perfect. Among its main criticisms is the lack of correlation with 
        human performance and the lack of ability to capture the meaning of the sentence.
      </p> */}
      <p>
        The model with a high BLEU score is considered good on the leaderboards. And naturally, people started to optimize their models to achieve high BLEU scores. 
        As a result, the models started to attain high BLEU scores, but their correlation with human performance diverged. In other words, 
        the improvements in BLEU score did not translate in improvements against human performance.
        Rachael Tatman covers BLEU and its pitfall in detail <a href="https://towardsdatascience.com/evaluating-text-output-in-nlp-bleu-at-your-own-risk-e8609665a213" target="_blank" rel="noopener noreferrer">here</a>.
      </p>
      <Divider />
      <div style={{marginTop: 10}}>
        <h4>Conclusion</h4>
        <p>
          We introduced the concept of attention in neural networks by choosing an existing baseline model
          and improving it. We observed that attention powered model generated better captions, as shown in 
          the image samples above. We then reviewed a few basic kinds of attention. We found that we could 
          reuse the baseline model architecture and retrofit attention specific logic to it.
          We also found that attention provided added benefit of interpretability by overlaying the attention 
          output over the input image.
        </p>
        <p>
          Overall I found that model generated captions are still not as good as 
          human counterparts and still require quite a bit of work. 
          Some of the ideas to improve the performance include improving the data domain, 
          tuning hyperparams and experimenting with different architectures.
        </p>
        {/* <Img sizes={what_next.fluid} /> */}
        <WhatNext />
        <p>
        Judging by the recent trend in the NLP world, it turns out that Attention is all you need 😉.          
        </p>
        <p>
          <b>TODO</b> Share the code and notebook 👨‍💻
        </p>
        </div>
      <Divider />
        <Row>
          <Col span={4} className="references-header">References & Links:</Col>
          <Col span={20} className="references-text">
              <ol>
              <li>
              Generating Captions using Neural Networks
                  &nbsp;[<a href="/nic-p1" target="_blank" rel="noopener noreferrer">LINK</a>]
              </li>
                <li>
                Stanford Advanced AI Course: XCS224N - Natural Language Processing with Deep Learning
                </li>
                <li>
                BLEU Paper
                  &nbsp;[<a href="https://www.aclweb.org/anthology/P02-1040.pdf" target="_blank" rel="noopener noreferrer">PDF</a>]                                
              </li>
              <li>
              Attention blog  
                  &nbsp;[<a href="https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html" target="_blank" rel="noopener noreferrer">LINK</a>]                                
              </li>
              <li>
              Re-evaluating the Role of BLEU in Machine Translation Research              
                  &nbsp;[<a href="https://www.aclweb.org/anthology/E06-1032.pdf" target="_blank" rel="noopener noreferrer">LINK</a>]
              </li>
              <li>
              Evaluating Text Output in NLP: BLEU at your own risk
                  &nbsp;[<a href="https://towardsdatascience.com/evaluating-text-output-in-nlp-bleu-at-your-own-risk-e8609665a213" target="_blank" rel="noopener noreferrer">LINK</a>]
              </li>
              <li>
                Tensorflow NMT Notebook
                [<a href="https://github.com/tensorflow/tensorflow/blob/r1.13/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb" rel="noopener noreferrer">LINK</a>]
              </li>
              <li>
              Effective Approaches to Attention-based Neural Machine Translation
                [<a href="https://arxiv.org/abs/1508.04025" rel="noopener noreferrer">LINK</a>]
              </li>
              <li>
              Neural Machine Translation by Jointly Learning to Align and Translate
                [<a href="https://arxiv.org/abs/1409.0473" rel="noopener noreferrer">LINK</a>]
              </li>
              <li>
              Beyond Narrative Description: Generating Poetry from Images by Multi-Adversarial Training
                [<a href="https://www.microsoft.com/en-us/research/uploads/prod/2018/10/img2poem_final_camera_ready.pdf" rel="noopener noreferrer">LINK</a>]
              </li>
              </ol>
          </Col>
        </Row> 
      </BlogPostChrome>
      </Layout>    
        )

    }
}    

export default NICPage


export const pageQuery = graphql`
query ($slug: String!) {
  javascriptFrontmatter(fields: { slug: { eq: $slug } }) {
    ...JSBlogPost_data
  }
  attn_cool: file(
    relativePath: {regex: "/attn-is-cool.png/"}) {
      childImageSharp {
        fluid(maxWidth: 3000, quality: 100) {
          ...GatsbyImageSharpFluid
          presentationWidth
        }
      }              
}    
nmt_attn: file(
    relativePath: {regex: "/cs224n-slide.png/"}) {
    childImageSharp {
      fluid(maxWidth: 1200, quality: 100) {
        ...GatsbyImageSharpFluid
        presentationWidth
      }
    }
  }    
  what_next: file(
    relativePath: {regex: "/what-next.png/"}) {
    childImageSharp {
      fluid(maxWidth: 1200, quality: 100) {
        ...GatsbyImageSharpFluid
        presentationWidth
      }
    }
  }       
  global_attn: file(
    relativePath: {regex: "/global-attn.png/"}) {
    childImageSharp {
      fluid(maxWidth: 600, quality: 100) {
        ...GatsbyImageSharpFluid
        presentationWidth
      }
    }
  }       
  local_attn: file(
    relativePath: {regex: "/local-attn.png/"}) {
    childImageSharp {
      fluid(maxWidth: 600, quality: 100) {
        ...GatsbyImageSharpFluid
        presentationWidth
      }
    }
  }       
}
`