import { MDBRow, MDBCol } from 'mdbreact';
import React from 'react';
import { BarChart, CartesianGrid, XAxis, YAxis, Tooltip, Legend, Bar, ResponsiveContainer } from 'recharts'

const data1 = [
  {
    name: 'Dabike et al. [1]',
    validation_WER: 23.33,
    test_WER: 19.60,
  },
  {
    name: 'Demirel et al. [2]',
    validation_WER: 17.70,
    test_WER: 14.96,
  },
  {
    name: 'Demirel et al. [3]',
    validation_WER: 'NA',
    test_WER: 15.38,
  },
  {
    name: "Ours*",
    validation_WER: 13.26,
    test_WER: 14.56,
  },
]

const data2 = [
  {
    name: 'Audio',
    validation_WER: 9.65,
    test_WER: 13.00,
  },
  {
    name: 'Audio+Video',
    validation_WER: 9.56,
    test_WER: 12.81,
  },
  {
    name: 'Audio+Video+IMU*',
    validation_WER: 9.45,
    test_WER: 12.71,
  },
]

const data3 = [
  {
    name: 'w/o Cross Attention',
    valid_WER: 42.47,
    test_WER: 66.93,
  },
  {
    name: 'w/o Self Attention',
    valid_WER: 42.67,
    test_WER: 64.67,
  },
  {
    name: 'Residul Cross Attention*',
    valid_WER: 41.87,
    test_WER: 63.92,
  },
]

const data4_valid = [
  {
    name: -10,
    A_WER: 74.33,
    AV_WER: 42.50,
    AVI_WER: 41.87,
  },
  {
    name: -5,
    A_WER: 45.45,
    AV_WER: 36.91,
    AVI_WER: 36.91,
  },
  {
    name: 0,
    A_WER: 25.28,
    AV_WER: 22.70,
    AVI_WER: 22.81,
  },
  {
    name: 5,
    A_WER: 16.06,
    AV_WER: 15.57,
    AVI_WER: 15.06,
  },
  {
    name: 10,
    A_WER: 12.37,
    AV_WER: 12.96,
    AVI_WER: 12.51,
  },
  {
    name: 'Clean',
    A_WER: 9.65,
    AV_WER: 9.56,
    AVI_WER: 9.45,
  },
  {
    name: 'Average',
    A_WER: 30.53,
    AV_WER: 23.37,
    AVI_WER: 23.10,
  },
]

const data4_test = [
  {
    name: -10,
    A_WER: 90.96,
    AV_WER: 64.83,
    AVI_WER: 63.92,
  },
  {
    name: -5,
    A_WER: 63.02,
    AV_WER: 54.68,
    AVI_WER: 54.68,
  },
  {
    name: 0,
    A_WER: 35.70,
    AV_WER: 34.42,
    AVI_WER: 34.44,
  },
  {
    name: 5,
    A_WER: 22.19,
    AV_WER: 22.38,
    AVI_WER: 22.89,
  },
  {
    name: 10,
    A_WER: 17.41,
    AV_WER: 17.41,
    AVI_WER: 17.03,
  },
  {
    name: 'Clean',
    A_WER: 13.00,
    AV_WER: 12.81,
    AVI_WER: 12.71,
  },
  {
    name: 'Average',
    A_WER: 40.38,
    AV_WER: 34.42,
    AVI_WER: 34.27,
  },
]

class Result extends React.Component {

  componentDidMount = () => {
  }

  render() {

    const styles = {
      landing: {
        padding: '10vh 15vw 0vh 15vw'
      },
      imageContainer: {
        display: 'flex',
        justifyContent: 'center',
        padding: '4vh 0vh'
      },
      image: {
        height: '45vh'
      }
    };

    return (
      <>
        <div style={styles.landing}>
          <MDBRow>

            <MDBCol md="12">
              <div className="header">
                {"Results"}
              </div>
            </MDBCol>

          </MDBRow>

          <MDBRow style={{ paddingTop: '2vh', paddingBottom: '2vh' }}>

            <MDBCol md="12">
              <div className="textHeader font-weight-bold">
                {"System Perfomance"}
              </div>
            </MDBCol>

            <MDBCol style={{ paddingTop: '2vh' }} md="12">
              <div className="textDescription">
                {"In the first figure, we show the performance of the proposed audio-only ALT system on DSing[1] dataset. Experiments show that our audio encoder based on wav2vec 2.0 has stronger performance than TDNN and its variants."}
              </div>
            </MDBCol>

            <MDBCol style={{ paddingTop: '2vh' }} md="12">
              <div className="textDescription">
                {"In the second figure, we show the performance of the proposed ALT system when consuming information from different combinations of modalities on the proposed N20EM dataset. The result shows that the more modalities the input contains, the higher performance it achieves."}
              </div>
            </MDBCol>

            <MDBCol style={{ paddingTop: '2vh' }} md="12">
              <div className="resultsText">
                {"[1]: Dabike, Gerardo Roa, and Jon Barker. \"Automatic Lyric Transcription from Karaoke Vocal Tracks: Resources and a Baseline System.\" Interspeech. 2019."}
              </div>
              <div className="resultsText">
                {"[2]: Demirel Emir, Sven Ahlbäck, and Simon Dixon. \"Automatic lyrics transcription using dilated convolutional neural networks with self-attention.\" 2020 International Joint Conference on Neural Networks (IJCNN). IEEE, 2020."}
              </div>
              <div className="resultsText">
                {"[3]: Demirel Emir, Sven Ahlbäck, and Simon Dixon. \"MSTRE-Net: Multistreaming Acoustic Modeling for Automatic Lyrics Transcription.\" ISMIR2021, International Society for Music Information Retrieval. 2021."}
              </div>
            </MDBCol>
          </MDBRow>

          <MDBRow style={{ position: 'relative', right: '50px', paddingTop: '2vh', paddingBottom: '2vh', height: '400px' }}>
            <MDBCol sm="12" md="6">
              <div style={{ paddingLeft: '7vw', display: 'flex', justifyContent: 'center' }}>
                {"ALT system performance on DSing"}
              </div>
              <ResponsiveContainer width="110%" height="80%">
                <BarChart data={data1}>
                  <CartesianGrid strokeDasharray="3 3" />
                  <XAxis dataKey="name" />
                  <YAxis
                    allowDecimals={false}
                    type="number"
                    domain={[10, 25]}
                    tickCount={6}
                  />
                  <Tooltip />
                  <Legend verticalAlign="top" />
                  <Bar dataKey="validation_WER" name="Validation WER" fill="#8884d8" />
                  <Bar dataKey="test_WER" name="Test WER" fill="#82ca9d" />
                </BarChart>
              </ResponsiveContainer>
              <div style={{ paddingLeft: '7vw', display: 'flex', justifyContent: 'center' }}>
                {"Method"}
              </div>
            </MDBCol>


            <MDBCol sm="12" md="6">
              <div style={{ paddingLeft: '3vw', display: 'flex', justifyContent: 'center' }}>
                {"Performance comparison using different modality combinations"}
              </div>
              <ResponsiveContainer width="110%" height="80%">
                <BarChart data={data2}>
                  <CartesianGrid strokeDasharray="3 3" />
                  <XAxis dataKey="name" />
                  <YAxis
                    allowDecimals={false}
                    type="number"
                    domain={[9, 14]}
                    tickCount={6}
                  />
                  <Tooltip />
                  <Legend verticalAlign="top" />
                  <Bar dataKey="validation_WER" name="Validation WER" fill="#8884d8" />
                  <Bar dataKey="test_WER" name="Test WER" fill="#82ca9d" />
                </BarChart>
              </ResponsiveContainer>

              <div style={{ paddingLeft: '7vw', display: 'flex', justifyContent: 'center' }}>
                {"Modality Combination"}
              </div>
            </MDBCol>
          </MDBRow>



          <MDBRow style={{ paddingTop: '2vh', paddingBottom: '2vh' }}>

            <MDBCol md="12">
              <div className="textHeader font-weight-bold">
                {"Comparison of Feature Fusion"}
              </div>
            </MDBCol>

            <MDBCol style={{ paddingTop: '2vh' }} md="12">
              <div className="textDescription">
                {"To validate the effectiveness of our proposed RCA mechanism, we conduct an ablation study for the feature fusion module in our MM-ALT system. The results are summarized in the figure below. To magnify the differences, we evaluate the ALT performance in -10 dB SNR scenario. We find that without cross attention shortcuts, the ALT performance will drop 0.6\% and 3.01\% WER on the validation and test splits. Without a self-attention mechanism, the ALT performance will decrease by 0.8\% and 0.55\% WER respectively. These results suggest that RCA contributes to better feature fusion."}
              </div>
            </MDBCol>

          </MDBRow>

          <MDBRow style={{ paddingTop: '2vh', paddingBottom: '2vh', height: '400px' }}>
            <MDBCol sm="12" md="6" className="offset-md-2">
              <ResponsiveContainer width="110%" height="80%">
                <BarChart data={data3}>
                  <CartesianGrid strokeDasharray="3 3" />
                  <XAxis dataKey="name" />
                  <YAxis
                    allowDecimals={false}
                    type="number"
                    domain={[35, 70]}
                    tickCount={6}
                  />
                  <Tooltip />
                  <Legend verticalAlign="top" />
                  <Bar dataKey="valid_WER" name="Validation WER" fill="#8884d8" />
                  <Bar dataKey="test_WER" name="Test WER" fill="#82ca9d" />
                </BarChart>
              </ResponsiveContainer>
              <div style={{ paddingLeft: '7vw', display: 'flex', justifyContent: 'center' }}>
                {"Feature Fusion"}
              </div>
            </MDBCol>
          </MDBRow>

          <MDBRow style={{ paddingTop: '2vh', paddingBottom: '2vh' }}>

            <MDBCol md="12">
              <div className="textHeader font-weight-bold">
                {"Robustness"}
              </div>
            </MDBCol>

            <MDBCol style={{ paddingTop: '2vh' }} md="12">
              <div className="textDescription">
                {"When comparing the audio-only system with the audio-visual system, we noticed that the audio-visual system performs much better than the audio-only system in low SNR scenarios. For example, in -10 dB SNR, the audio-visual system significantly improves the audio-only system by about 30% WER. As the SNR level increases, the performance of the two systems gradually approaches. "}
              </div>
            </MDBCol>

            <MDBCol style={{ paddingTop: '2vh' }} md="12">
              <div className="textDescription">
                {"After adding the IMU modality, we noticed that our 3-modal MM-ALT system achieves the best results in the majority of scenarios. The MM-ALT system achieves 23.10% and 34.27% WER on the valid and the test splits on average, surpassing both audio-only and audio-visual configurations. Without accompaniments, our multimodal system receives 9.45% WER on the valid split and 12.71% WER on the test split, which are the best results obtained for the N20EM dataset."}
              </div>
            </MDBCol>

            <MDBRow style={{ paddingTop: '2vh', paddingBottom: '2vh', height: '400px' }}>
              <MDBCol sm="12" md="6">
                <div style={{ paddingLeft: '7vw', display: 'flex', justifyContent: 'center' }}>
                  {"Validation WER"}
                </div>
                <ResponsiveContainer width="105%" height="80%">
                  <BarChart data={data4_valid}>
                    <CartesianGrid strokeDasharray="3 3" />
                    <XAxis dataKey="name" />
                    <YAxis
                      allowDecimals={false}
                      type="number"
                      tickCount={6}
                    />
                    <Tooltip />
                    <Legend verticalAlign="top" />
                    <Bar dataKey="A_WER" name="A" fill="#8884d8" />
                    <Bar dataKey="AV_WER" name="A-V" fill="#82ca9d" />
                    <Bar dataKey="AVI_WER" name="A-V-I" fill="#ffc658" />
                  </BarChart>
                </ResponsiveContainer>
                <div style={{ paddingLeft: '7vw', display: 'flex', justifyContent: 'center' }}>
                  {"SNR (dB)"}
                </div>
              </MDBCol>

              <MDBCol sm="12" md="6">
                <div style={{ paddingLeft: '7vw', display: 'flex', justifyContent: 'center' }}>
                  {"Test WER"}
                </div>
                <ResponsiveContainer width="105%" height="80%">
                  <BarChart data={data4_test}>
                    <CartesianGrid strokeDasharray="3 3" />
                    <XAxis dataKey="name" />
                    <YAxis
                      allowDecimals={false}
                      type="number"
                      tickCount={6}
                    />
                    <Tooltip />
                    <Legend verticalAlign="top" />
                    <Bar dataKey="A_WER" name="A" fill="#8884d8" />
                    <Bar dataKey="AV_WER" name="A-V" fill="#82ca9d" />
                    <Bar dataKey="AVI_WER" name="A-V-I" fill="#ffc658" />
                  </BarChart>
                </ResponsiveContainer>
                <div style={{ paddingLeft: '7vw', display: 'flex', justifyContent: 'center' }}>
                  {"SNR (dB)"}
                </div>
              </MDBCol>
            </MDBRow>
          </MDBRow>

          <MDBRow style={{ paddingBottom: '5vh' }}>

            <MDBCol md="12">
              <div className="textHeader font-weight-bold">
                {"Case Study"}
              </div>
            </MDBCol>
            <MDBCol md="6">
              <div style={{ textAlign: 'center' }} className="textHeader">
                {"Clean"}
              </div>
              <MDBRow>
                <MDBCol style={{ display: 'flex', justifyContent: 'center' }} md="12">
                  <table>
                    <tr style={{textAlign: 'center'}}>
                      <th><span style={{fontWeight: 'bold'}}>Modality</span></th>
                      <th><span style={{fontWeight: 'bold'}}>Text</span></th>
                    </tr>
                    <tr>
                      <td><span>Reference</span></td>
                      <td><span>Son of god love's pure light</span></td>
                    </tr>
                    <tr>
                      <td><span>A</span></td>
                      <td><span><u>The</u> son of god <u>loves</u> pure <u>life</u></span></td>
                    </tr>
                    <tr>
                      <td><span>A-V</span></td>
                      <td><span>Son of god <u>loves</u> pure <u>life</u></span></td>
                    </tr>
                    <tr>
                      <td><span>A-V-I</span></td>
                      <td><span>Son of god <u>loves</u> pure <u>ligh</u></span></td>
                    </tr>
                  </table>
                </MDBCol>
              </MDBRow>
            </MDBCol>

            <MDBCol md="6">
              <div style={{ textAlign: 'center' }} className="textHeader">
                {"Noisy"}
              </div>
              <MDBRow>
                <MDBCol style={{ display: 'flex', justifyContent: 'center' }} md="12">
                  <table>
                    <tr style={{textAlign: 'center'}}>
                      <th><span style={{fontWeight: 'bold'}}>Modality</span></th>
                      <th><span style={{fontWeight: 'bold'}}>Text</span></th>
                    </tr>
                    <tr>
                      <td><span>Reference</span></td>
                      <td><span>Wonder how I got along</span></td>
                    </tr>
                    <tr>
                      <td><span>A</span></td>
                      <td><span><u>What</u> <u>is</u> how I got <u>a</u> <u>lot</u></span></td>
                    </tr>
                    <tr>
                      <td><span>A-V</span></td>
                      <td><span><u>Wander</u> how I got <u>a</u> <u>long</u></span></td>
                    </tr>
                    <tr>
                      <td><span>A-V-I</span></td>
                      <td><span>Wonder how I got <u>a</u> <u>long</u></span></td>
                    </tr>
                  </table>
                </MDBCol>
                
              </MDBRow>
            </MDBCol>
            <MDBCol style={{ paddingTop: '2vh' }} md="12">
              <div className="textDescription">
                {"We show one case from the clean scenario and one from a noisy (mixed with accompaniments) scenario. More quantitative results are displayed in paper's Appendix C. In the clean case, the audio-only system has three word errors, including one insertion and two substitutions. Both audio-visual and audio-visual-IMU models misspell \"love's\" and \"light\", but for the word \"light\" the three-modal system's output has fewer character-level errors (only one missing character). Likewise, in the noisy case, the MM-ALT system also performs better than its audio-only and audio-visual counterparts by correcting the substitutions of \"What\" and \"Wander\" as well as the insertion of \"is\". Although the word \"along\" in reference is not fully recovered, the transcription from the MM-ALT system is closer to the ground truth than the transcription from the audio-only system."}
              </div>
            </MDBCol>
          </MDBRow>

        </div>
      </>
    );
  }
}

export default Result;
